In [5]:
import os, sys, re
import numpy as np
import pandas
import matplotlib.pyplot as plt

## Remember to run `python download_fpi_2.py` file before this

In [6]:
DATA_DIR = 'data/fpi-data'

In [7]:
def get_files():
    files = os.listdir(DATA_DIR)
    files = [os.path.join(DATA_DIR, f) for f in files if re.search(r'\.csv$', f)]
    files.sort(reverse=True)
    return files



In [8]:
def get_df(csv_file_path):
    df = pandas.read_csv(csv_file_path, index_col=0)
    return df

def get_date_by_filename(csv_file_path):
    filename = os.path.basename(csv_file_path)
    date = re.search(r'(\d{4}-\d{2}-\d{2})', filename).group(1)
    return date


In [9]:

def get_cleaned_df(csv_file_to_read):
  df  = get_df(csv_file_to_read)
  file_date = get_date_by_filename(csv_file_to_read)
  # print(f'Columns = {df.columns}')
  # print(f'FILE = {csv_file_to_read}')
  # df['Sectors']
  df[file_date] = df['Equity.6']
  df = df.drop([df.index[1] , df.index[2]])


  for col in df.columns:
    # print(f'col = {col}')
    if col not in [ 'Sectors', file_date]:
      # print(f'Sectors = {col}')
      df.drop(col, inplace=True, axis=1)
  df.set_index('Sectors', inplace=True)
  return df


In [10]:
class FPI_Analysis:
  def __init__(self, csv_file_to_read):
    self.file = csv_file_to_read
    self.df = get_cleaned_df(csv_file_to_read)
    self.date = get_date_by_filename(csv_file_to_read)
    self.df_by_sector = self.df.groupby(level=0)
    self.df_by_sector_mean = self.df_by_sector.mean()
    self.df_by_sector_std = self.df_by_sector.std()
    
  def __str__(self):
    return f'FPI: File: {self.file}; date={self.date}; df = \n{self.df}'


# df = get_cleaned_df(files[0])
# print(f'df = \n\n{df}')

In [11]:

def merge_all_dataframes(fpis):
  df = pandas.DataFrame()
  for f in fpis:
    df = df.join(f.df, how='outer')
  return df



In [12]:
def save_df_to_csv(df, merged_csv_path_name):
  df.to_csv(merged_csv_path_name)

In [15]:
unfiltered_files = get_files()
files = [f for f in unfiltered_files if 'merged_fpi_data' not in f]
print(f'files = {files}')
fpis = [FPI_Analysis(f) for f in files]

merged_df = merge_all_dataframes(fpis)
print(f'merged_df = \n\n{merged_df.to_string()}')
CSV_FILE_TO_SAVE = 'data/fpi-data/merged_fpi_data.csv'
save_df_to_csv(merged_df, CSV_FILE_TO_SAVE)
print(f'File saved: {CSV_FILE_TO_SAVE}')



files = ['data/fpi-data/fpi-2022-05-31.csv', 'data/fpi-data/fpi-2022-05-15.csv', 'data/fpi-data/fpi-2022-04-30.csv', 'data/fpi-data/fpi-2022-04-15.csv', 'data/fpi-data/fpi-2022-03-31.csv', 'data/fpi-data/fpi-2022-03-15.csv', 'data/fpi-data/fpi-2022-02-28.csv', 'data/fpi-data/fpi-2022-02-15.csv', 'data/fpi-data/fpi-2022-01-31.csv', 'data/fpi-data/fpi-2022-01-15.csv']
merged_df = 

                                  2022-05-31 2022-05-15 2022-04-30 2022-04-15 2022-03-31 2022-03-15 2022-02-28 2022-02-15 2022-01-31 2022-01-15
Sectors                                                                                                                                        
Airlines                                 NaN        NaN        NaN      12961      13772      12398      13095      15455      13143      15516
Airport Services                         NaN        NaN        NaN       6500       6164       6204       6189       6715       6966       7484
Automobile and Auto Components        228