# Data gathering and preprocessing
Data is collected through [Yahoo Finance](https://uk.finance.yahoo.com/watchlists), including weekly closing price and volume for the 10-year duration of 01 Jun 2009 - 01 Jun 2019.

This code shows how data was merged, organized, cleaned to create one single .csv file containing a complete cross-sectional time series data of the chosen assets.

## 1. Import libaries

In [6]:
#import pandas as pd
import datetime
import quandl as q
import pandas as pd
import pandas_datareader.data as web

## 2. Import data 

#### Get and visualize data 

In [18]:
def get_data(asset_name):
    return pd.read_csv('Asset_Dataset/'+asset_name+'_10y.csv', usecols=['Date','Adj Close'], parse_dates=True, index_col='Date' ).astype('float32').dropna()

def get_vol(asset_name):
    return pd.read_csv('Asset_Dataset/'+asset_name+'_10y.csv', usecols=['Date','Volume'], parse_dates=True, index_col='Date' ).astype('float32').dropna()



In [19]:
# get asset data
data_IVV = get_data('IVV')
data_SHY = get_data('SHY')
data_VNQ = get_data('VNQ')
data_GLD = get_data('GLD')
data_VIX = get_data('VIX')

# get volume data
vol_IVV = get_vol('IVV')
vol_SHY = get_vol('SHY')
vol_VNQ = get_vol('VNQ')
vol_GLD = get_vol('GLD')

%store data_IVV
%store data_SHY
%store data_VNQ
%store data_GLD
%store data_VIX
%store vol_IVV
%store vol_SHY
%store vol_VNQ 
%store vol_GLD 


Stored 'data_IVV' (DataFrame)
Stored 'data_SHY' (DataFrame)
Stored 'data_VNQ' (DataFrame)
Stored 'data_GLD' (DataFrame)
Stored 'data_VIX' (DataFrame)
Stored 'vol_IVV' (DataFrame)
Stored 'vol_SHY' (DataFrame)
Stored 'vol_VNQ' (DataFrame)
Stored 'vol_GLD' (DataFrame)


In [20]:
data_gdp = q.get("FRED/GDPC1", start_date="2009-06-01", end_date="2019-06-01",collapse='daily', 
                 authtoken="oWpRXksxc4gyrtAwXe18")
%store data_gdp

data_infl = q.get("FRED/CPIAUCSL", start_date="2009-06-01", end_date="2019-06-01",collapse='daily', 
                  authtoken="oWpRXksxc4gyrtAwXe18")
%store data_infl

data_int = q.get("FRED/DFF", start_date="2009-06-01", end_date="2019-06-01",collapse='daily', 
                 authtoken="oWpRXksxc4gyrtAwXe18")
%store data_int

data_unemp = q.get("FRED/UNRATE", start_date="2009-06-01", end_date="2019-06-01",collapse='daily', 
                   authtoken="oWpRXksxc4gyrtAwXe18")
%store data_unemp

data_savings = q.get("FRED/PSAVERT", start_date="2009-06-01", end_date="2019-06-01",collapse='daily', 
                     authtoken="oWpRXksxc4gyrtAwXe18")
%store data_savings


Stored 'data_gdp' (DataFrame)
Stored 'data_infl' (DataFrame)
Stored 'data_int' (DataFrame)
Stored 'data_unemp' (DataFrame)
Stored 'data_savings' (DataFrame)


In [None]:
from pandas_datareader import data, wb

tickers = ['AAPL', 'MSFT', 'NFLX', 'AMZN', 'GOOG']
start = datetime.datetime(2010, 1, 1)
end = datetime.datetime(2018, 12, 31)

df = pd.DataFrame([data.DataReader(ticker, 'yahoo', start, end)['Adj Close'] for ticker in tickers]).T
df.columns = tickers

