In [2]:
import pandas as pd
import numpy as np
import pandas_datareader.data as web

In [3]:
# Basic summary statistics
df = pd.DataFrame(pd.Series([1.2, 3.4, 1.6, -3, np.nan, np.nan, .9, np.nan]).values.reshape((4,2)),
                  index=['a', 'b', 'c', 'd'],
                  columns=['one', 'two'])

df

Unnamed: 0,one,two
a,1.2,3.4
b,1.6,-3.0
c,,
d,0.9,


In [4]:
print(df.sum())
print(df.sum(axis=1))
df.sum(axis=1, skipna=False)

one    3.7
two    0.4
dtype: float64
a    4.6
b   -1.4
c    0.0
d    0.9
dtype: float64


a    4.6
b   -1.4
c    NaN
d    NaN
dtype: float64

In [5]:
df.idxmax()
df.idxmin()

one    d
two    b
dtype: object

In [6]:
df.cumsum()

df.describe()

Unnamed: 0,one,two
count,3.0,2.0
mean,1.233333,0.2
std,0.351188,4.525483
min,0.9,-3.0
25%,1.05,-1.4
50%,1.2,0.2
75%,1.4,1.8
max,1.6,3.4


In [75]:
# Correlation and covariance
all_data = {ticker: web.get_data_robinhood(ticker)
       for ticker in ['AAPL', 'IBM', 'MSFT',  'GOOG']}

In [73]:
all_data['GOOG'].info()
all_data['GOOG'].columns

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 252 entries, (GOOG, 2017-08-21 00:00:00) to (GOOG, 2018-08-20 00:00:00)
Data columns (total 7 columns):
close_price     252 non-null object
high_price      252 non-null object
interpolated    252 non-null bool
low_price       252 non-null object
open_price      252 non-null object
session         252 non-null object
volume          252 non-null int64
dtypes: bool(1), int64(1), object(5)
memory usage: 14.9+ KB


Index(['close_price', 'high_price', 'interpolated', 'low_price', 'open_price',
       'session', 'volume'],
      dtype='object')

In [74]:
all_data['GOOG'].index = all_data['GOOG'].index.droplevel(0)

all_data['GOOG'].head()

Unnamed: 0_level_0,close_price,high_price,interpolated,low_price,open_price,session,volume
begins_at,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-08-21,906.66,913.0,False,903.4,910.0,reg,943441
2017-08-22,924.69,925.86,False,911.4751,912.72,reg,1166737
2017-08-23,927.0,929.93,False,919.36,921.93,reg,1090248
2017-08-24,921.28,930.84,False,915.5,928.66,reg,1270306
2017-08-25,915.89,925.555,False,915.5,923.49,reg,1053376


In [77]:
all_data.keys()

dict_keys(['AAPL', 'IBM', 'MSFT', 'GOOG'])

In [78]:
for ticker in all_data.keys():
    all_data[ticker].index = all_data[ticker].index.droplevel(0)

# Bingo!

In [90]:
price = pd.DataFrame({ticker: data['close_price']
                     for ticker, data in all_data.items()})

price.head()
# price.columns
price = price.astype('float')

In [93]:
volume = pd.DataFrame({ticker: data['volume']
                      for ticker, data in all_data.items()})

volume.head()

volume = volume.astype('float')

In [92]:
returns = price.pct_change()

returns.head()

Unnamed: 0_level_0,AAPL,GOOG,IBM,MSFT
begins_at,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-08-21,,,,
2017-08-22,0.016347,0.019886,0.004845,0.013998
2017-08-23,0.001252,0.002498,0.008014,-0.006014
2017-08-24,-0.004438,-0.00617,0.005628,-0.000413
2017-08-25,0.003704,-0.005851,0.005597,0.001788


In [94]:
returns.corr()

Unnamed: 0,AAPL,GOOG,IBM,MSFT
AAPL,1.0,0.582264,0.317535,0.591995
GOOG,0.582264,1.0,0.41671,0.778145
IBM,0.317535,0.41671,1.0,0.40651
MSFT,0.591995,0.778145,0.40651,1.0


In [100]:
returns.AAPL.corr(returns.IBM)

0.31753472919024905

In [101]:
returns.corrwith(returns.GOOG)

AAPL    0.582264
GOOG    1.000000
IBM     0.416710
MSFT    0.778145
dtype: float64

In [102]:
returns.corrwith(volume)

AAPL    0.035335
GOOG   -0.117285
IBM     0.102746
MSFT    0.019398
dtype: float64