# Research Book

#### documenting the steps taken to

1- transform data

2- prep for modelling

3- analysis

4- notes

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
% matplotlib inline

In [39]:
data = pd.read_csv('data_source/multifactorLMdata-xtnd.csv', parse_dates=True,infer_datetime_format= True)\
         .drop('Unnamed: 0', axis=1)

In [40]:
data.head()

Unnamed: 0,Date,company,current_price,momentum,moving_average,moving_volatility,trading_range,target_return,exp_market_change,rates,...,Gross_profit,Operating_profit,Net_Profit,Issue_of_shares,Share_repurchase,Non_current_assets,Current_assets,Non_current_liabilities,Current_liabilities,net_cash_op_act
0,2010/02/17,Anglogold Ashanti Ltd,29500.0,-0.103343,30598.216667,1854.530002,850.55,0.090136,-0.016852,7.08,...,329.5,431.7,317.9,3.9,3.9,732.8,237.2,220.7,454.3,959.6
1,2010/05/06,Anglogold Ashanti Ltd,31150.0,0.139982,28743.15,1079.933674,697.066667,-0.053612,0.044321,6.58,...,239.3,141.3,115.0,0.3,0.0,741.4,188.0,221.0,396.3,816.6
2,2010/08/11,Anglogold Ashanti Ltd,31420.0,-0.026943,31682.016667,1388.464601,864.983333,0.050286,0.008204,6.42,...,381.9,-8.9,-136.0,2.6,0.0,719.6,203.3,306.8,328.0,1003.0
3,2010/11/10,Anglogold Ashanti Ltd,34987.0,0.096771,32142.716667,784.923382,705.033333,-0.060365,0.136024,5.65,...,400.9,138.2,44.3,559.6,0.0,753.0,248.5,412.7,197.5,1056.6
4,2011/02/16,Anglogold Ashanti Ltd,33230.0,-0.007586,32411.016667,1134.366223,629.016667,-0.061601,0.076571,5.53,...,407.9,197.8,40.4,3.1,3.1,779.3,172.3,444.2,97.4,1095.5


In [41]:
debt = pd.read_csv('data_source/SA_Debt.csv', parse_dates=True,infer_datetime_format= True)

In [48]:
debt.head()

Unnamed: 0,Date,Debt(M),Debt(%GDP),Debt_Per_Capita
0,2017,154899,50.1,
1,2016,152509,51.7,2742.0
2,2015,156650,49.33,2861.0
3,2014,164828,46.96,3057.0
4,2013,161760,44.1,3046.0


In [45]:
# Merge with debt and drop unnecessary columns
df_2 = data.merge(debt, how='left',
                 left_on=data['Date'].apply(lambda x: x[:4]),
                 right_on=debt['Date'].apply(lambda x: str(x)),
                 suffixes=('', '_y')).drop('Date_y', axis=1)

In [47]:
#data + debt
df_2.head(1)

Unnamed: 0,Date,company,current_price,momentum,moving_average,moving_volatility,trading_range,target_return,exp_market_change,rates,...,Issue_of_shares,Share_repurchase,Non_current_assets,Current_assets,Non_current_liabilities,Current_liabilities,net_cash_op_act,Debt(M),Debt(%GDP),Debt_Per_Capita
0,2010/02/17,Anglogold Ashanti Ltd,29500.0,-0.103343,30598.216667,1854.530002,850.55,0.090136,-0.016852,7.08,...,3.9,3.9,732.8,237.2,220.7,454.3,959.6,130020,34.68,2557.0


In [49]:
cpi = pd.read_csv('data_source/SA_CPI.csv', parse_dates=True, infer_datetime_format=True)

In [51]:
cpi.head()

Unnamed: 0,year,CPI(%)
0,2018,3.835
1,2017,6.492
2,2016,6.943
3,2015,4.079
4,2014,5.88


In [59]:
# Merge with cpi and drop unnecessary columns
df_3 = df_2.merge(cpi, how='left',
                 left_on=df_2['Date'].apply(lambda x: x[:4]),
                 right_on=cpi['year'].apply(lambda x: str(x))).drop('year', axis=1)

In [61]:
# data + debt + cpi
df_3.head(1)

Unnamed: 0,Date,company,current_price,momentum,moving_average,moving_volatility,trading_range,target_return,exp_market_change,rates,...,Share_repurchase,Non_current_assets,Current_assets,Non_current_liabilities,Current_liabilities,net_cash_op_act,Debt(M),Debt(%GDP),Debt_Per_Capita,CPI(%)
0,2010/02/17,Anglogold Ashanti Ltd,29500.0,-0.103343,30598.216667,1854.530002,850.55,0.090136,-0.016852,7.08,...,3.9,732.8,237.2,220.7,454.3,959.6,130020,34.68,2557.0,5.468


In [62]:
gdp = pd.read_csv('data_source/SA_GDP.csv', parse_dates=True, infer_datetime_format=True)

In [63]:
gdp.head()

Unnamed: 0,Date,GDP_Mill$,GDP_Growth(%)
0,2017,274.183,0.8
1,2016,294.9,0.3
2,2015,317.568,1.3
3,2014,351.116,1.7
4,2013,366.802,2.5


In [68]:
# Merge with gdp and drop unnecessary columns
df_4 = df_3.merge(gdp, how='left',
                 left_on=df_3['Date'].apply(lambda x: x[:4]),
                 right_on=gdp['Date'].apply(lambda x: str(x)),
                 suffixes=('', '_y')).drop('Date_y', axis=1)

In [70]:
df_4.head(1)

Unnamed: 0,Date,company,current_price,momentum,moving_average,moving_volatility,trading_range,target_return,exp_market_change,rates,...,Current_assets,Non_current_liabilities,Current_liabilities,net_cash_op_act,Debt(M),Debt(%GDP),Debt_Per_Capita,CPI(%),GDP_Mill$,GDP_Growth(%)
0,2010/02/17,Anglogold Ashanti Ltd,29500.0,-0.103343,30598.216667,1854.530002,850.55,0.090136,-0.016852,7.08,...,237.2,220.7,454.3,959.6,130020,34.68,2557.0,5.468,375.304,3.0
