In [1]:
"""We begin by first importing the necessary libraries"""
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

In [4]:
"""First get our csv files and import as DataFrames"""
SP500=pd.read_csv('../Data/SP500.csv', index_col='Date', parse_dates=True)
Nasdaq=pd.read_csv('../Data/NASDAQ.csv', index_col='Date', parse_dates=True)
DJI=pd.read_csv('../Data/DJI.csv', index_col='Date', parse_dates=True)
DAX=pd.read_csv('../Data/DAX.csv', index_col='Date', parse_dates=True)
Paris=pd.read_csv('../Data/CAC40.csv', index_col='Date', parse_dates=True)
Tokyo=pd.read_csv('../Data/N225.csv', index_col='Date', parse_dates=True)
HongKong=pd.read_csv('../Data/HSI.csv', index_col='Date', parse_dates=True)
Aus=pd.read_csv('../Data/ASX.csv', index_col='Date', parse_dates=True)
"""Get rid of the first year because ASX has no data"""
SP500 = SP500[502:]
"""Fill in missing data by forward fill"""
SP500.fillna(method='ffill',inplace=True)
Nasdaq.fillna(method='ffill',inplace=True)
DJI.fillna(method='ffill',inplace=True)
DAX.fillna(method='ffill',inplace=True)
Paris.fillna(method='ffill',inplace=True)
Tokyo.fillna(method='ffill',inplace=True)
HongKong.fillna(method='ffill',inplace=True)
Aus.fillna(method='ffill',inplace=True)

In [5]:
"""We define a function that joins a child to a mother DataFrame"""
def left_join(mother, child):
    """This function grabs data from all dfs on days SP500 was traded"""
    df_temp = pd.DataFrame(index = mother.index)
    df_temp1 = df_temp.join(child)
    df_temp1 = df_temp1.replace('null', np.nan)
    df_temp1.fillna(method='ffill', inplace=True)
    df_temp1.fillna(method='backfill', inplace=True)
    return df_temp1

In [6]:
"""Join the DataFrames to SP500 so that we have the trading data of all markets on the days SP500 was traded"""
Nasdaq_new = left_join(SP500, Nasdaq)
DJI_new = left_join(SP500, DJI)
DAX_new = left_join(SP500, DAX)
Paris_new = left_join(SP500, Paris)
Tokyo_new = left_join(SP500, Tokyo)
HongKong_new = left_join(SP500, HongKong)
Aus_new = left_join(SP500, Aus)

In [7]:
"""We define a function that restores the date as a feature"""
def reset_index(df):
    """Dates are no longer important"""
    df['Date'] = df.index
    df = df.reset_index(level=['Date'])
    return df

In [8]:
"""Restore Date as a feature"""
SP500 = reset_index(SP500)
Nasdaq = reset_index(Nasdaq_new)
DJI = reset_index(DJI_new)
DAX = reset_index(DAX_new)
Paris = reset_index(Paris_new)
Tokyo = reset_index(Tokyo_new)
HongKong = reset_index(HongKong_new)
Aus = reset_index(Aus_new)

In [None]:
"""Output our DataFrames as csv files to the designated folder"""
SP500.to_csv('../Data/SP500_new.csv', index=False)
Nasdaq.to_csv('../Data/Nasdaq_new.csv', index=False)
DJI.to_csv('../Data/DJI_new.csv', index=False)
DAX.to_csv('../Data/DAX_new.csv', index=False)
Paris.to_csv('../Data/Paris_new.csv', index=False)
Tokyo.to_csv('../Data/Tokyo_new.csv', index=False)
HongKong.to_csv('../Data/HongKong_new.csv', index=False)
Aus.to_csv('../Data/Aus_new.csv', index=False)