<h1>Numpy and Pandas Home Assignment</h1>
In this assignment, we will use pandas and numpy to do some fairly basic analysis on equities.

<h3>Imports</h3>

In [1]:
import datetime
import numpy as np
import pandas as pd
%matplotlib inline
import pandas_datareader.data as web
#import fix_yahoo_finance as yf
import yfinance as yf

<h3>Get data on the following four stocks from yahoo finance</h3>
<li>MS, GS, JPM, C
<li>Use the date range 2013/1/1/ to 2018/9/24

In [2]:
start = datetime.datetime(2013,1,1)
end = datetime.datetime(2018,9,24)
stocks_df = web.DataReader(['MS','GS','JPM','C'],'yahoo', start,end)['Close']
print(stocks_df)

Symbols            MS          GS         JPM          C
Date                                                    
2013-01-02  19.620001  131.660004   44.660000  41.250000
2013-01-03  19.580000  130.940002   44.570000  41.389999
2013-01-04  20.190001  134.509995   45.360001  42.430000
2013-01-07  19.799999  134.259995   45.410000  42.470001
2013-01-08  19.650000  133.050003   45.500000  42.459999
...               ...         ...         ...        ...
2018-09-18  47.849998  228.889999  114.300003  71.360001
2018-09-19  49.099998  235.580002  117.620003  73.720001
2018-09-20  49.880001  237.399994  118.629997  74.790001
2018-09-21  49.410000  235.339996  117.849998  74.150002
2018-09-24  48.560001  232.899994  116.720001  73.839996

[1443 rows x 4 columns]


<h3>Compute the cross correlation coefficients on the stocks</h3>
<li>Create a df 'rets' with the one day percentage changes
<li>Calculate cross correlations on the rets df

In [3]:
rets = stocks_df.pct_change()
stocks_corr = rets.corr()

print(stocks_corr)

Symbols        MS        GS       JPM         C
Symbols                                        
MS       1.000000  0.844340  0.801232  0.809949
GS       0.844340  1.000000  0.813289  0.792073
JPM      0.801232  0.813289  1.000000  0.849607
C        0.809949  0.792073  0.849607  1.000000


<h3>Select a pair of stocks to trade</h3>
<li>Choose the pair with the highest correlation
<li>You can just eyeball the result and pick the highest correlation pairs
<li>Create a new df 'pairs' with the returns columns of the highest correlation pairs

In [4]:
# Pair of stocks with greatest correlation coefficient: JPM and C

pairs = rets[['JPM','C']]
print(pairs)

Symbols          JPM         C
Date                          
2013-01-02       NaN       NaN
2013-01-03 -0.002015  0.003394
2013-01-04  0.017725  0.025127
2013-01-07  0.001102  0.000943
2013-01-08  0.001982 -0.000236
...              ...       ...
2018-09-18  0.004041  0.006772
2018-09-19  0.029046  0.033072
2018-09-20  0.008587  0.014514
2018-09-21 -0.006575 -0.008557
2018-09-24 -0.009588 -0.004181

[1443 rows x 2 columns]


<h3>Calculate the mean and std dev of the columns (returns of the two stocks)</h3>
<li>Note that line 1 contains a NaN. You'll have to get rid of this line
<li>df.describe() returns summary stats for all cols in a dataframe. You can use this to extract means and standard deviations

In [5]:
new_df = rets[['C','JPM']].iloc[1:]

c_std = new_df.describe()['C']['std']
j_std = new_df.describe()['JPM']['std']
c_mean = new_df.describe()['C']['mean']
j_mean = new_df.describe()['JPM']['mean']

new_df

Symbols,C,JPM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-03,0.003394,-0.002015
2013-01-04,0.025127,0.017725
2013-01-07,0.000943,0.001102
2013-01-08,-0.000236,0.001982
2013-01-09,-0.009892,-0.000659
...,...,...
2018-09-18,0.006772,0.004041
2018-09-19,0.033072,0.029046
2018-09-20,0.014514,0.008587
2018-09-21,-0.008557,-0.006575


<h3>Create a new column "Long_JPM_Short_C"</h3>
<li>If the return of C is greater than its mean + 0.25* std
<li> AND
<li>If the return of JPM is less than its mean - 0.25*std
<li>The value in this col should be True or False

In [6]:
c_lb = c_mean+0.25*c_std
jpm_ub = j_mean-0.25*j_std

new_df['Long_JPM_Short_C'] = np.where((new_df['C']>c_lb) & (new_df['JPM']<jpm_ub),True,False) 
new_df['Long_JPM_Short_C'].shift(periods=1)
print(new_df)

Symbols            C       JPM  Long_JPM_Short_C
Date                                            
2013-01-03  0.003394 -0.002015             False
2013-01-04  0.025127  0.017725             False
2013-01-07  0.000943  0.001102             False
2013-01-08 -0.000236  0.001982             False
2013-01-09 -0.009892 -0.000659             False
...              ...       ...               ...
2018-09-18  0.006772  0.004041             False
2018-09-19  0.033072  0.029046             False
2018-09-20  0.014514  0.008587             False
2018-09-21 -0.008557 -0.006575             False
2018-09-24 -0.004181 -0.009588             False

[1442 rows x 3 columns]


<h3>Create a new column "Long_C_Short_JPM"</h3>
<li>If the return of JPM is greater than its mean + 0.25* std
<li> AND
<li>If the return of C is less than its mean - 0.25*std
<li>The value in this col should be True or False

In [7]:
jpm_lb = j_mean+0.25*j_std
c_ub = c_mean-0.25*c_std

new_df['Long_C_Short_JPM'] = np.where((new_df['JPM']>jpm_lb) & (new_df['C']<c_ub),True,False) 
new_df['Long_C_Short_JPM'].shift(periods=1)
print(new_df)

Symbols            C       JPM  Long_JPM_Short_C  Long_C_Short_JPM
Date                                                              
2013-01-03  0.003394 -0.002015             False             False
2013-01-04  0.025127  0.017725             False             False
2013-01-07  0.000943  0.001102             False             False
2013-01-08 -0.000236  0.001982             False             False
2013-01-09 -0.009892 -0.000659             False             False
...              ...       ...               ...               ...
2018-09-18  0.006772  0.004041             False             False
2018-09-19  0.033072  0.029046             False             False
2018-09-20  0.014514  0.008587             False             False
2018-09-21 -0.008557 -0.006575             False             False
2018-09-24 -0.004181 -0.009588             False             False

[1442 rows x 4 columns]


<h3>Create a new column "Trade_return"</h3>
<li>If Long_JPM_Short_C is True and Long_C_Short_JPM is False, the value of this column should be the return on JPM minus the return on C
<li>If Long_JPM_Short_C is False and Long_C_Short_JPM is True, the value of this column should be the return on C minus the return on JPM
<li>Otherwise the value should be zero

In [8]:
# new_df['Trade_return']= np.zeros_like(new_df['JPM'])

# new_df.loc[((new_df['Long_JPM_Short_C'] == True) 
#             & (new_df['Long_C_Short_JPM'] == False)),'Trade_return'] = new_df['JPM'] - new_df['C']

# new_df.loc[((new_df['Long_JPM_Short_C'] == False) 
#             & (new_df['Long_C_Short_JPM'] == True)),'Trade_return'] = new_df['C'] - new_df['JPM']

In [9]:
new_df['Trade_return'] = np.where(((new_df['Long_JPM_Short_C']==True) 
                                 & (new_df['Long_C_Short_JPM']==False)), new_df['JPM']-new_df['C'], 0)

new_df['Trade_return'] += np.where(((new_df['Long_C_Short_JPM']==True) 
                                 & (new_df['Long_JPM_Short_C']==False)), new_df['C']-new_df['JPM'], 0)
new_df

Symbols,C,JPM,Long_JPM_Short_C,Long_C_Short_JPM,Trade_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2013-01-03,0.003394,-0.002015,False,False,0.0
2013-01-04,0.025127,0.017725,False,False,0.0
2013-01-07,0.000943,0.001102,False,False,0.0
2013-01-08,-0.000236,0.001982,False,False,0.0
2013-01-09,-0.009892,-0.000659,False,False,0.0
...,...,...,...,...,...
2018-09-18,0.006772,0.004041,False,False,0.0
2018-09-19,0.033072,0.029046,False,False,0.0
2018-09-20,0.014514,0.008587,False,False,0.0
2018-09-21,-0.008557,-0.006575,False,False,0.0


<h3>Calculate the sum of this new column</h3>
<li>Are you going to get rich?

In [10]:
payoff = sum(new_df['Trade_return'])
payoff

-0.6793516982785628

In [11]:
## No, I will get rich, since the total return is negative. ##

### Because the overall trade return is negative, we will NOT get rich if we were to invest in both JPM and C between January 1, 2013 and September 24, 2018 using the strategies Long_JPM_Short_C and Long_C_Short_JPM.

In [12]:
# new_df['Trade_return']= np.zeros_like(new_df['JPM'])

# for i in new_df.index:
#     if (new_df['Long_JPM_Short_C'][i]==True) and (new_df['Long_C_Short_JPM'][i]==False):
#         new_df['Trade_return'][i] = new_df['JPM'][i]-new_df['C'][i]
#     elif new_df['Long_JPM_Short_C'][i]==False and new_df['Long_C_Short_JPM'][i]==True:
#         new_df['Trade_return'][i] = new_df['C'][i]-new_df['JPM'][i]
#     else:
#         new_df['Trade_return'][i] = 0