# Homework: Financial Ratio Quantile Strategy
Robert Hatem

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy as sp
import quandl
import functools
import seaborn as sns
import time
%matplotlib inline

In [2]:
# API key
quandl.ApiConfig.api_key = "wFcUAbwJv3FbxzDwExsY"

In [3]:
# for quandl data
@functools.lru_cache(maxsize=16)
def fetch_quandl(my_securities, start_date="2011-01-01", end_date="2018-01-01"):
    #print("Fetching from Quandl")
    if len(my_securities) != 1:
        qdata = quandl.get(list(my_securities), start_date="2010-12-01", end_date="2018-01-01", returns="pandas")
    elif len(my_securities) == 1:
        my_securities = my_securities[0]
        qdata = quandl.get(list(my_securities), start_date="2010-12-01", end_date="2018-01-01", returns="pandas")
    return qdata

In [7]:
# clean data
def clean_quandl_columns(dataframe):
    replacement_columns = {}
    for c in dataframe.columns:
        series_name, variable = c.split(' - ')
        source_name, asset = series_name.split('/')
        replacement_columns[c] = asset+":"+variable
    renamed_data = dataframe.rename(columns=replacement_columns)
    return renamed_data

#data = clean_quandl_columns(raw_data)

In [5]:
#df = pd.read_csv('ticker_list.csv')  # after filtering out inactive ones

In [308]:
df1 = pd.read_csv('EOD tickers.csv')  # only has tickers on NYSE and NASDAQ

def query_data(ticker='AAPL'):
    cols = ('ZFB/'+ticker+'_TOT_DEBT_TOT_EQUITY_Q', # debt to market cap
            'ZFB/'+ticker+'_BOOK_VAL_PER_SHARE_Q', # price to book
            'ZFB/'+ticker+'_DILUTED_NET_EPS_Q')    # price to earnings
    raw_data = fetch_quandl(cols)
    if len([1 for col in raw_data.columns if 'Not Found' in col]) > 0:  # if one of the columns is missing
        return pd.DataFrame(), False 
    if raw_data.shape[0]==0:  # if the column is found but it contains no data
        return pd.DataFrame(), False 
    raw_data.iloc[:,0].fillna(0, inplace=True)  # fill NaN for debt to mkt cap with zeros
    
    cols1 = ['TICKER'] + raw_data.columns.tolist()  # for swtitching order of columns
    raw_data1 = raw_data.assign(TICKER=ticker)
    raw_data2 = raw_data1.loc[:,cols1].copy()  # switch order of columns
    
    raw_data3 = raw_data2.reset_index().merge(df1.loc[:, ['ticker','is_active','Sector Code']], left_on='TICKER',right_on='ticker', how='left').set_index('PER_END_DATE').drop(columns=['ticker'])    
    
    return raw_data3, True

df, found_cols = query_data(ticker='AAWW')

In [309]:
df.head()

Unnamed: 0_level_0,TICKER,ZFB/AAWW_TOT_DEBT_TOT_EQUITY_Q - TOT_DEBT_TOT_EQUITY,ZFB/AAWW_BOOK_VAL_PER_SHARE_Q - BOOK_VAL_PER_SHARE,ZFB/AAWW_DILUTED_NET_EPS_Q - DILUTED_NET_EPS,is_active,Sector Code
PER_END_DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-12-31,AAWW,0.464,40.4862,1.58,Y,15
2011-03-31,AAWW,0.4479,40.4501,0.4,Y,15
2011-06-30,AAWW,0.3821,41.4504,0.9,Y,15
2011-09-30,AAWW,0.4735,42.073,1.07,Y,15
2011-12-31,AAWW,0.6571,43.39,1.27,Y,15


In [375]:
def test_data(df, found_cols):
    if found_cols==False:  # if one of the ratios is not available
        return pd.DataFrame(), False
    if df['is_active'].isna().all() or df['Sector Code'].isna().all():  # if the stock isn't in the universe of stocks with sector codes (from Suraj)
        #print('')
        return pd.DataFrame(), False
    if len([1 for col in df.columns if 'Not Found' in col]) > 0:  # if one of the ratio columns is not found
        return pd.DataFrame(), False
    else:
        new_cols = ['TICKER', 'TOT_DEBT_TOT_EQUITY_Q','BOOK_VAL_PER_SHARE_Q','DILUTED_NET_EPS_Q', 'is_active','Sector Code']  # clean up the column names
        df.columns = new_cols
    
    ticker = df.TICKER[0]
    data1 = quandl.get(('EOD/'+ticker+'.11'), start_date="2010-12-31", end_date="2018-01-01", returns="pandas")  # adjusted close price
    
    if data1.shape[0]!=1762:   # if full history of closing prices not available
        return pd.DataFrame(), False
    elif sum(df.iloc[:,1] > 0.1) < 1:  # if not enough debt
        return pd.DataFrame(), False
    elif df['Sector Code'].iat[0] in [5, 13]:  # if in auto/banking/insurance
        return pd.DataFrame(), False
    else:
        data2 = data1.merge(df, left_index=True, right_index=True, how='left').fillna(method='ffill')  # join closing prices with quarterly numbers
        
        cols1 = data2.columns.tolist()
        new_cols1 = [cols1[1], cols1[0]] + cols1[2:5]  # for swtitching order of columns
        data3 = data2.loc[:, new_cols1].copy()
        data3['TICKER'] = data3['TICKER'].fillna(method='bfill').values
        
        debt_to_mktcap = data3['TOT_DEBT_TOT_EQUITY_Q'].values    # rename columns
        price_to_book = data3['Adj_Close']/data3['BOOK_VAL_PER_SHARE_Q']   # compute new columns
        price_to_earnings = data3['Adj_Close']/data3['DILUTED_NET_EPS_Q']
        data3 = data3.assign(debt_to_mktcap=debt_to_mktcap).assign(price_to_book=price_to_book).assign(price_to_earnings=price_to_earnings)
        return data3, True
           
d, is_valid = test_data(df, found_cols)

In [376]:
d.head()

In [313]:
tickers = pd.read_csv('ticker_list.csv')

In [4]:
final_tickers = {}
'''
total=tickers['Ticker'].values.shape[0]
counter_dots = []

for i, ticker in enumerate(tickers['Ticker'].values):
    if ticker!=ticker.replace('.',''):
        counter_dots.append(ticker)
    else:
        ticker1 = ticker
        df, found_cols = query_data(ticker=ticker1)
        df_output, is_valid = test_data(df, found_cols)
        if is_valid==True:
            print('{} of {} | Add {}'.format(i, total, ticker1))
            final_tickers[ticker1] = df_output
        else:
            print('{} of {} | SKIP {}'.format(i, total, ticker1))
'''

"\ntotal=tickers['Ticker'].values.shape[0]\ncounter_dots = []\n\nfor i, ticker in enumerate(tickers['Ticker'].values):\n    if ticker!=ticker.replace('.',''):\n        counter_dots.append(ticker)\n    else:\n        ticker1 = ticker\n        df, found_cols = query_data(ticker=ticker1)\n        df_output, is_valid = test_data(df, found_cols)\n        if is_valid==True:\n            print('{} of {} | Add {}'.format(i, total, ticker1))\n            final_tickers[ticker1] = df_output\n        else:\n            print('{} of {} | SKIP {}'.format(i, total, ticker1))\n"

In [380]:
list(final_tickers.keys())[0:3]

['A', 'AAL', 'AAN']

In [1]:
final_tickers['SPB'].head()

NameError: name 'final_tickers' is not defined

In [377]:
len(list(final_tickers))

1835

In [403]:
eod_etf = quandl.get(("EOD/SPY"), start_date="2010-12-01", end_date="2018-01-01", returns="pandas")
eod_etf1 = eod_etf.loc[:,['Adj_Close']]
eod_etf1['etf_returns'] = eod_etf1['Adj_Close'].pct_change()

In [405]:
eod_etf1.head(3)

Unnamed: 0_level_0,Adj_Close,etf_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-12-01,101.928029,
2010-12-02,103.233608,0.012809
2010-12-03,103.511571,0.002693


In [413]:
data = final_tickers['AAPL'].copy()
data['stock_returns'] = data['Adj_Close'].pct_change()

In [414]:
data.head()

Unnamed: 0_level_0,TICKER,Adj_Close,TOT_DEBT_TOT_EQUITY_Q,BOOK_VAL_PER_SHARE_Q,DILUTED_NET_EPS_Q,debt_to_mktcap,price_to_book,price_to_earnings,stock_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2010-12-31,AAPL,40.362872,0.0,8.479,0.9186,0.0,4.760334,43.939552,
2011-01-03,AAPL,41.240054,0.0,8.479,0.9186,0.0,4.863787,44.894463,0.021732
2011-01-04,AAPL,41.455283,0.0,8.479,0.9186,0.0,4.889171,45.128764,0.005219
2011-01-05,AAPL,41.794393,0.0,8.479,0.9186,0.0,4.929165,45.497924,0.00818
2011-01-06,AAPL,41.760607,0.0,8.479,0.9186,0.0,4.925181,45.461144,-0.000808
