# FSS Snapshot Files (AVRO)

In [2]:
import os
import requests as req
import pandas as pd
import fastavro

from dotenv import load_dotenv
load_dotenv()

True

## Helper Functions

In [3]:
def read_avro_file(filepath) -> pd.DataFrame:

    with open(filepath, "rb") as fp:
        reader = fastavro.reader(fp)
        r_df = pd.DataFrame.from_records(reader)

    return r_df

def read_avro_folder(folderpath) -> pd.DataFrame:

    r_df = pd.DataFrame()
    for filename in os.listdir(folderpath):
        if filename.lower().endswith(".avro"):
            t_df = read_avro_file(folderpath + "/" + filename)
            r_df = pd.concat([r_df, t_df])
    return r_df


## Read all files within a folder

In [4]:
# For articles = True use '0e2f212bf5' (109 files)
# For articles = True (Small) use 'a236f418d8' (1 file)
# For articles = False use 'eb26b0186e' (1 file)
short_id = 'a236f418d8'

fss_df = read_avro_folder(f"./data/{short_id}")

## Check data volume and basic stats

In [5]:
fss_df.shape

(423344, 13)

In [6]:
fss_df.columns

Index(['_company_code', 'score_date', 'score', 'daily_percentage_change',
       'weekly_percentage_change', 'confidence', 'signal',
       'total_article_count', 'negative_article_count',
       'positive_article_count', 'company_name', '_an', 'theme'],
      dtype='object')

In [7]:
fss_df.describe()

Unnamed: 0,score,daily_percentage_change,weekly_percentage_change,confidence,total_article_count,negative_article_count,positive_article_count
count,423344.0,423344.0,423344.0,423344.0,423344.0,423344.0,423344.0
mean,0.233167,0.399763,2.563248,1.0,19204.425715,8676.362939,10528.062776
std,0.069947,0.428316,1.479529,0.0,642.238601,1389.919437,816.227763
min,0.1546,-0.1,0.1,1.0,18126.0,7135.0,9670.0
25%,0.1599,0.1,1.3,1.0,18677.0,7250.0,9776.0
50%,0.2976,0.2,2.5,1.0,19642.0,9972.0,9882.0
75%,0.299,0.8,3.5,1.0,19777.0,10001.0,11329.0
max,0.3017,1.5,5.1,1.0,19887.0,10018.0,11780.0


## Using the Data

In [8]:
fss_df.head()

Unnamed: 0,_company_code,score_date,score,daily_percentage_change,weekly_percentage_change,confidence,signal,total_article_count,negative_article_count,positive_article_count,company_name,_an,theme
0,NVDCRP,2024-12-07,0.1627,0.9,5.1,1,Very Low,18177,7137,11040,NVIDIA Corporation,SAEXC00020241023ekan00igf,not assigned
1,NVDCRP,2024-12-07,0.1627,0.9,5.1,1,Very Low,18177,7137,11040,NVIDIA Corporation,FMETMA0020240710ek7a0000w,not assigned
2,NVDCRP,2024-12-07,0.1627,0.9,5.1,1,Very Low,18177,7137,11040,NVIDIA Corporation,ATYOST0020240904ek940002t,not assigned
3,NVDCRP,2024-12-07,0.1627,0.9,5.1,1,Very Low,18177,7137,11040,NVIDIA Corporation,BLCRNW0020241022ekam0002z,not assigned
4,NVDCRP,2024-12-07,0.1627,0.9,5.1,1,Very Low,18177,7137,11040,NVIDIA Corporation,INVDAI0020240726ek7q0005v,not assigned


In [10]:
fss_df[fss_df.theme == 'Financial performance e.g. declines, growth']

Unnamed: 0,_company_code,score_date,score,daily_percentage_change,weekly_percentage_change,confidence,signal,total_article_count,negative_article_count,positive_article_count,company_name,_an,theme
146736,NVDCRP,2024-12-07,0.1627,0.9,5.1,1,Very Low,18177,7137,11040,NVIDIA Corporation,DJDN000020241120ekbk00421,"Financial performance e.g. declines, growth"
146737,NVDCRP,2024-12-07,0.1627,0.9,5.1,1,Very Low,18177,7137,11040,NVIDIA Corporation,BON0000020240118ek1i00105,"Financial performance e.g. declines, growth"
146738,NVDCRP,2024-12-07,0.1627,0.9,5.1,1,Very Low,18177,7137,11040,NVIDIA Corporation,NMBWK00020241108ekb80002u,"Financial performance e.g. declines, growth"
146739,NVDCRP,2024-12-07,0.1627,0.9,5.1,1,Very Low,18177,7137,11040,NVIDIA Corporation,INVDAI0020241113ekbd001jl,"Financial performance e.g. declines, growth"
146740,NVDCRP,2024-12-07,0.1627,0.9,5.1,1,Very Low,18177,7137,11040,NVIDIA Corporation,SPACOL0020241009eka9000m9,"Financial performance e.g. declines, growth"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
149035,MCROST,2024-12-06,0.3006,0.5,1.1,1,Moderate,19706,9985,9721,Microsoft Corporation,DJDN000020200113eg1d003cb,"Financial performance e.g. declines, growth"
149036,MCROST,2024-12-06,0.3006,0.5,1.1,1,Moderate,19706,9985,9721,Microsoft Corporation,DJDN000020240730ek7u000uw,"Financial performance e.g. declines, growth"
149037,MCROST,2024-12-06,0.3006,0.5,1.1,1,Moderate,19706,9985,9721,Microsoft Corporation,WSJO000020230121ej1l001b9,"Financial performance e.g. declines, growth"
149038,MCROST,2024-12-06,0.3006,0.5,1.1,1,Moderate,19706,9985,9721,Microsoft Corporation,LBA0000020241118ekbi00lj4,"Financial performance e.g. declines, growth"
