In [5]:
import pandas as pd
import configparser
import os
from datetime import datetime
from pandas.tseries.offsets import BMonthEnd
from dateutil.relativedelta import relativedelta, FR, TH

### Price

In [154]:
base_path = 'data/raw/prices/'
freqs = os.listdir(base_path)

for freq in freqs:
    full_path = base_path + freq
    files = os.listdir(full_path)
    for file in files:
        file_path = full_path + '/' + file
        data = pd.read_csv(file_path, index_col=0)
        midprice = (data['bidclose'] + data['askclose'])/2
        new_data = pd.DataFrame(midprice).rename({0:'spot'}, axis=1)
        new_data.to_csv(f'data/processed/prices/{freq}/{file}')

### COT

In [155]:
file_path = 'data/raw/cot/'

config = configparser.RawConfigParser()
config.read('configs/cot.cfg')

ins_dict = dict(config.items('INSTRUMENTS'))
parse_dict = dict(config.items('PARSING'))
cols = parse_dict['cols'].replace('\'', '').split(',')

all_file_paths = [file_path + file for file in os.listdir(file_path) if '.csv' in file]

for name, index_name in ins_dict.items():
    data_list = [pd.read_csv(file_path, index_col=0).loc[index_name] for file_path in all_file_paths]
    data_agg = pd.concat(data_list).reset_index(drop=True).set_index('Report_Date_as_MM_DD_YYYY')
    data_agg.index = pd.to_datetime(data_agg.index)
    data_agg.index = data_agg.index.rename('Date')
    data_agg = data_agg[cols]
    data_agg = data_agg.sort_index()
    data_agg.to_csv(f'data/processed/cot/{name.upper()}.csv')

### Bonds

In [19]:
file_path = 'data/raw/bonds/'
maturitues = ['1Y', '2Y', '3Y', '5Y', '10Y', '20Y', '6M', '3M']

for maturity in maturitues:
    all_files = os.listdir(file_path + maturity + '/')
    all_file_paths = [file_path + '/' + maturity + '/' + file for file in os.listdir(file_path + maturity + '/') if '.csv' in file]

    for name, path in zip(all_files, all_file_paths):
        data = pd.read_csv(path, index_col=0)
        data.index = pd.to_datetime(data.index)
        data = data.sort_index()
        real_name = name.strip('.csv')
        data = pd.DataFrame(data['Price']).rename({'Price': f'Bond{maturity}'}, axis=1)
        data.to_csv(f'data/processed/bonds/{maturity}/{name}')

### Inflation

In [6]:
def get_next_friday(date):
    return date + relativedelta(months=1) + relativedelta(weekday=TH(-1))

In [7]:
file_path = 'data/raw/inflation/'

all_files = os.listdir(file_path)
all_file_paths = [file_path + file for file in os.listdir(file_path) if '.csv' in file]

for name, path in zip(all_files, all_file_paths):
    data = pd.read_csv(path)
    data = data.rename({'DateTime': 'Date'}, axis=1)
    data = data.set_index('Date')
    data.index = pd.to_datetime(data.index)
    data.index = data.index.map(get_next_friday)
    data = pd.DataFrame(data['Value'])
    real_name = name.strip('.csv')
    data = data.rename({'Value': f'Inflation'}, axis=1)
    data.to_csv(f'data/processed/inflation/{name}')

### Trade

In [8]:
file_path = 'data/raw/trade/'

all_files = os.listdir(file_path)
all_file_paths = [file_path + file for file in os.listdir(file_path) if '.csv' in file]

for name, path in zip(all_files, all_file_paths):
    data = pd.read_csv(path)
    data = data.rename({'DateTime': 'Date'}, axis=1)
    data = data.set_index('Date')
    data.index = pd.to_datetime(data.index)
    data.index = data.index.map(get_next_friday)
    data = pd.DataFrame(data['Value'])
    real_name = name.strip('.csv')
    data = data.rename({'Value': f'Trade'}, axis=1)
    data.to_csv(f'data/processed/trade/{name}')

### Volatility

In [17]:
vix = pd.read_csv('data/raw/vol/VIX.csv', index_col=0)
vix.index = pd.to_datetime(vix.index)
vix = pd.DataFrame(vix['Price'])
vix.to_csv(f'data/processed/vol/VIX.csv')