In [22]:
#library imports
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn
import talib
import warnings
from matplotlib.pyplot import figure
warnings.filterwarnings('ignore')

In [23]:
#data imports
volumes = pd.read_csv('../Data/stock_volumes.csv', index_col = 'date', parse_dates = True)
prices = pd.read_csv('../Data/stock_prices.csv', index_col = 'date', parse_dates = True)
info = pd.read_csv('../Data/stock_info.csv', index_col = 'Instrument')
sp_listings = pd.read_csv('../Data/sp500_listings.csv', index_col = 'date', parse_dates = True)

In [24]:
#drop stocks (columns) with all missing values
drop_columns = []

for col in prices.columns:
    if prices[col].isnull().all() == True:
        drop_columns.append(col)

prices = prices.drop(columns = drop_columns)

In [25]:
#calculate returns
returns = prices.copy(deep = True)
returns = returns.pct_change()*100

In [26]:
for col in returns.columns:
    #select the stock
    stock = returns[[col]]
    
    #if stock is not currently listed as active
    if info.loc[col,:]['ESTAT'] != 'ACT.':
        #find delist date
        delist_date = sp_listings.where(sp_listings['stock'] == col).last_valid_index() + pd.DateOffset(1)
        #set all values after delist date to NaN
        stock.loc[delist_date:][col] = np.nan
        #replace in original dataframe
        returns[col] = stock[col]

In [27]:
returns

Unnamed: 0_level_0,905270,921795,904261,905261,916328,923024,936365,902355,912215,929813,...,9660J1,69568X,543755,77463M,29235J,131745,69487D,68157P,9110RA,292703
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1990-01-02,,,,,,,,,,,...,,,,,,,,,,
1990-01-03,-0.835165,-0.475059,5.000000,-0.810537,0.361421,0.000000,-3.030303,0.000000,-0.431034,-1.315789,...,,,,,,,,,,
1990-01-04,-0.797872,-1.272872,2.380952,-1.634321,-0.180060,3.454545,-1.562500,0.530612,0.225108,0.000000,...,,,,,,,,,,
1990-01-05,-2.546917,1.531023,-0.372093,-1.661475,-1.079730,1.054482,-3.174603,0.487211,-0.656531,-1.333333,...,,,,,,,,,,
1990-01-08,2.017423,-0.873016,0.000000,2.534319,0.000000,0.000000,0.000000,1.010101,0.434783,3.405405,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-05-24,,,,,0.136600,,0.349877,,,,...,0.376702,4.400221,1.590198,-0.991004,1.682900,3.364703,2.271817,0.634962,4.859794,1.204016
2021-05-25,,,,,0.946372,,0.542355,,,,...,0.230947,-0.288569,0.449063,-1.661106,0.439588,-0.548765,0.185928,0.727530,1.884279,-0.498203
2021-05-26,,,,,-1.393581,,0.616491,,,,...,1.296083,2.388001,-0.166049,3.255613,2.317760,4.702784,0.673960,2.336242,-0.429666,-1.520393
2021-05-27,,,,,-0.907923,,0.102119,,,,...,0.056867,1.893011,-0.063971,-0.510347,1.044185,-0.275482,1.979237,1.876893,1.247655,2.369727


In [28]:
#save
returns.to_pickle('../Data/returns.pkl')

### Risk Adjusted Returns (Sharpe Ratio)

In [29]:
returns = returns/100
returns.head()

Unnamed: 0_level_0,905270,921795,904261,905261,916328,923024,936365,902355,912215,929813,...,9660J1,69568X,543755,77463M,29235J,131745,69487D,68157P,9110RA,292703
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1990-01-02,,,,,,,,,,,...,,,,,,,,,,
1990-01-03,-0.008352,-0.004751,0.05,-0.008105,0.003614,0.0,-0.030303,0.0,-0.00431,-0.013158,...,,,,,,,,,,
1990-01-04,-0.007979,-0.012729,0.02381,-0.016343,-0.001801,0.034545,-0.015625,0.005306,0.002251,0.0,...,,,,,,,,,,
1990-01-05,-0.025469,0.01531,-0.003721,-0.016615,-0.010797,0.010545,-0.031746,0.004872,-0.006565,-0.013333,...,,,,,,,,,,
1990-01-08,0.020174,-0.00873,0.0,0.025343,0.0,0.0,0.0,0.010101,0.004348,0.034054,...,,,,,,,,,,


In [30]:
mean = returns.mean()
mean

905270    0.000538
921795   -0.000034
904261    0.000104
905261    0.000118
916328    0.000558
            ...   
131745    0.001465
69487D    0.001376
68157P    0.001447
9110RA    0.002684
292703    0.000748
Length: 1238, dtype: float64

In [31]:
std = returns.std()
std

905270    0.019287
921795    0.028428
904261    0.021595
905261    0.020969
916328    0.015987
            ...   
131745    0.033796
69487D    0.027570
68157P    0.024874
9110RA    0.039236
292703    0.021331
Length: 1238, dtype: float64

In [32]:
std = std*np.sqrt(252)
rfr = 0.02
std

905270    0.306168
921795    0.451281
904261    0.342813
905261    0.332869
916328    0.253786
            ...   
131745    0.536502
69487D    0.437665
68157P    0.394867
9110RA    0.622856
292703    0.338621
Length: 1238, dtype: float64

In [33]:
for col in prices.columns:
    cumu_return = (prices[col][-1]/prices[col][0])-1

In [34]:
# risk-free rate is a minimum return that you can expect from an investment
# rfr = Risk Free Rate = 2% per annum
sharpe = (returns - rfr)/std 
sharpe

Unnamed: 0_level_0,905270,921795,904261,905261,916328,923024,936365,902355,912215,929813,...,9660J1,69568X,543755,77463M,29235J,131745,69487D,68157P,9110RA,292703
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1990-01-02,,,,,,,,,,,...,,,,,,,,,,
1990-01-03,-0.092602,-0.054845,0.087511,-0.084434,-0.064565,-0.041038,-0.081069,-0.053221,-0.074800,-0.100036,...,,,,,,,,,,
1990-01-04,-0.091384,-0.072524,0.011113,-0.109182,-0.085902,0.029846,-0.057414,-0.039101,-0.054611,-0.060339,...,,,,,,,,,,
1990-01-05,-0.148511,-0.010392,-0.069195,-0.109997,-0.121352,-0.019401,-0.083394,-0.040256,-0.081738,-0.100565,...,,,,,,,,,,
1990-01-08,0.000569,-0.063664,-0.058341,0.016052,-0.078807,-0.041038,-0.032232,-0.026342,-0.048160,0.042401,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-05-24,,,,,-0.073424,,-0.026594,,,,...,-0.042002,0.042352,-0.007893,-0.034946,-0.006981,0.025437,0.006211,-0.034570,0.045914,-0.023507
2021-05-25,,,,,-0.041516,,-0.023492,,,,...,-0.045774,-0.040381,-0.029872,-0.042776,-0.034353,-0.047507,-0.041449,-0.032225,-0.001858,-0.073776
2021-05-26,,,,,-0.133718,,-0.022297,,,,...,-0.018214,0.006846,-0.041720,0.014670,0.006996,0.050378,-0.030298,0.008515,-0.039008,-0.103963
2021-05-27,,,,,-0.114582,,-0.030586,,,,...,-0.050278,-0.001888,-0.039754,-0.029330,-0.021043,-0.042413,-0.000474,-0.003118,-0.012079,0.010919


In [35]:
sharpe.to_pickle('../Data/risk_adj_returns.pkl')