In [1]:
# Importing necessary models
import warnings
warnings.filterwarnings('ignore')

import smtplib
import pandas as pd
import numpy as np
import datetime as dt
import pandas.stats.moments as st
import time
%matplotlib inline
from bs4 import BeautifulSoup as bs
import requests
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from sqlalchemy import *
from sqlalchemy import create_engine
import calendar
import sqlite3 as sql
pd.options.display.float_format = '{:,.4f}'.format

init_notebook_mode(connected=True)

import os
main_dir = os.getcwd()

dbs_dir = 'C:\\Users\\Fang\\Desktop\\Python Trading\\Trading\\Data\\DBs'

os.chdir('C:\\Users\\Fang\\Desktop\\Python Trading\\Trading\\Trading\\Modules\\DataCollection')

from alphavantage import *
from yahoo_query import *
from option_slam_earnings import *
from reuters_query import reuters_query, reuters_insiders
from alphaquery import alphaquery

os.chdir('C:\\Users\\Fang\\Desktop\\Python Trading\\Trading\\Trading\\Modules\\Options')
from optionsFunctions import *

os.chdir('C:\\Users\\Fang\\Desktop\\Python Trading\\Trading\\Trading\\Modules\\Earnings')
from yahoo_earnings import *
from lookup_earnings import lookup_earnings

os.chdir('C:\\Users\\Fang\\Desktop\\Python Trading\\Trading\\Trading\\Modules\\Processing')
from hv_calc import *

os.chdir(main_dir)

prices_dir = 'D:\\Price Data'
os.chdir(prices_dir)
prices_engine = create_engine('sqlite:///histprices.db', echo=False)


dbs_dir = 'C:\\Users\\Fang\\Desktop\\Python Trading\\Trading\\Data\\DBs'

os.chdir(dbs_dir)

earn_engine = create_engine('sqlite:///earningsHistory.db', echo=False)
yahoo_engine = create_engine('sqlite:///yahoo.db', echo = False)
reuters_engine = create_engine('sqlite:///reuters.db', echo=False)
# sec_engine = create_engine('sqlite:///SEC_txt.db', echo=False)



## Initial Tables

In [92]:
for yahoo_table in yahoo_engine.table_names():
    if 'profiles' in yahoo_table:
        query = 'SELECT * FROM {}'
        curr_table = pd.read_sql_query(query.format(yahoo_table), con = yahoo_engine,
                                       index_col = 'Underlying')[['industry', 'sector']]
        curr_table = curr_table[~curr_table.index.duplicated(keep='last')]
        profiles = curr_table
    else:
        continue        

## Financial Ratios
The current ratio measures a company’s ability to pay off short-term liabilities with current assets:
    - Current ratio = 'totalCurrentAssets'/'totalCurrentLiabilities'
The acid-test ratio measures a company’s ability to pay off short-term liabilities with quick assets:
    - Acid-test ratio = ('totalCurrentAssets' – 'inventory')/'totalCurrentLiabilities'
The cash ratio measures a company’s ability to pay off short-term liabilities with cash and cash equivalents:
    - Cash ratio = 'cash'/'totalCurrentLiabilities'
The operating cash flow ratio is a measure of the number of times a company can pay off current liabilities with the cash generated in a given period:
    - Operating cash flow ratio = 'totalCashFromOperatingActivities'/'totalCurrentLiabilities'
The debt ratio measures the amount of a company’s assets that are provided from debt:
    - Debt ratio = 'totalLiab'/'totalAssets'
The debt to equity ratio calculates the weight of total debt and financial liabilities against shareholders equity:
    - Debt to equity ratio = 'totalLiab'/'totalStockholderEquity'
The interest coverage ratio determines how easily a company can pay its interest expenses:
    - Interest coverage ratio = 'operatingIncome'/'interestExpense'
The asset turnover ratio measures a company’s ability to generate sales from assets:
    - Asset turnover ratio = 'totalRevenue'/'totalAssets'
The gross margin ratio compares the gross profit of a company to its net sales to show how much profit a company makes after paying off its cost of goods sold:
    - Gross margin ratio = 'grossProfit'/'totalRevenue'
The operating margin ratio compares the operating income of a company to its net sales to determine operating efficiency:
    - Operating margin ratio = 'operatingIncome'/'totalRevenue'
    - Profit Margin ratio = 'netIncome'/'totalRevenue'
The return on assets ratio measures how efficiently a company is using its assets to generate profit:
    - Return on assets ratio = 'operatingIncome'/'totalAssets'
The return on equity ratio measures how efficiently a company is using its equity to generate profit:
    - Return on equity ratio = 'operatingIncome'/'totalStockholderEquity'
The price-earnings ratio compares a company’s share price to the earnings per share:
    - Free Cash Flow = 'totalCashFromOperatingActivities' - 'capitalExpenditures'
    - Net Issuance/Repurchase of Stock = 'issuanceOfStock' - 'repurchaseOfStock'
    - Purchases to Revenue = 'netStockSales'/'totalRevenue'

In [3]:
raw_statement_fields = ['totalCurrentAssets', 'totalCurrentLiabilities','inventory','cash',
                        'totalCashFromOperatingActivities','totalLiab','totalAssets','totalStockholderEquity',
                        'operatingIncome','interestExpense','totalRevenue','grossProfit',
                        'totalCashFromOperatingActivities','capitalExpenditures','netIncome']

balanceSheet_fields = ['cash','inventory', 'totalAssets', 'totalCurrentAssets',
                       'totalCurrentLiabilities', 'totalLiab', 'totalStockholderEquity']

incomeStatement_fields = ['grossProfit','interestExpense','netIncome','operatingIncome','totalRevenue']

cashFlow_fields = ['capitalExpenditures', 'totalCashFromOperatingActivities', 'repurchaseOfStock',
                   'issuanceOfStock', 'netBorrowings', 'dividendsPaid']


In [129]:
def pull_statement(ticker, statement_name, fields, period):
    
    query = str([period,'Underlying'] + fields).replace('[','').replace(']','').replace("'",'')
    query = 'SELECT {0} FROM {2} WHERE Underlying = "{1}"'.format(query,ticker,statement_name)

    statement = pd.read_sql_query(query, con = yahoo_engine,
                                  index_col = 'Underlying').drop_duplicates()

    statement[period] = pd.to_datetime(statement[period])

    for col in statement.columns:
        if isinstance(statement.reset_index().loc[0,col], dt.datetime) == False and col != 'maxAge':
            statement[col] = pd.to_numeric(statement[col])
    
    return statement.sort_values(period).tail(4)

def fin_factors(ticker, period, statement_period):
    
    balance = pull_statement(ticker, '{}BalanceSheet'.format(statement_period), balanceSheet_fields, period)
    incomes = pull_statement(ticker, '{}IncomeStatement'.format(statement_period), incomeStatement_fields, period)
    cflows = pull_statement(ticker, '{}CashFlow'.format(statement_period), cashFlow_fields, period)
    cflows['issuanceOfStock'] = cflows['issuanceOfStock'].fillna(0)
    cflows['repurchaseOfStock'] = cflows['repurchaseOfStock'].fillna(0)

    fins = balance.merge(incomes, on = period).merge(cflows, on = period).set_index(period)
    fins['freeCashFlow'] = fins['totalCashFromOperatingActivities'] + fins['capitalExpenditures']
    fins['netStockSales'] = (fins['issuanceOfStock'] + fins['repurchaseOfStock'])*-1

    fins['current_ratio'] = fins['totalCurrentAssets']/fins['totalCurrentLiabilities']
    fins['acidTest_ratio'] = (fins['totalCurrentAssets'] - fins['inventory'])/fins['totalCurrentLiabilities']
    fins['cash_ratio'] = fins['cash']/fins['totalCurrentLiabilities']

    fins['operatingCashFlow_ratio'] = fins['totalCashFromOperatingActivities']/fins['totalCurrentLiabilities']
    fins['debt_ratio'] = fins['totalLiab']/fins['totalAssets']
    fins['debtToEquity_ratio'] = fins['totalLiab']/fins['totalStockholderEquity']
    fins['interestCoverage_ratio'] = fins['operatingIncome']/fins['interestExpense']
    fins['assetTurnover_ratio'] = fins['totalRevenue']/fins['totalAssets']
    fins['grossMargin_ratio'] = fins['grossProfit']/fins['totalRevenue']
    fins['operatingMargin_ratio'] = fins['operatingIncome']/fins['totalRevenue']
    fins['profitMargin_ratio'] = fins['netIncome']/fins['totalRevenue']
    fins['returnOnAssets_ratio'] = fins['operatingIncome']/fins['totalAssets']
    fins['returnOnEquity_ratio'] = fins['operatingIncome']/fins['totalStockholderEquity']
    fins['repurchasesToRevenue_ratio'] = fins['netStockSales']/fins['totalRevenue']

    return fins

def annual_factors(ticker):
    
    fins = fin_factors(ticker, 'year', 'annual')
    one_year_changes = fins.pct_change()
    cumulative_changes = (fins.pct_change() + 1).cumprod() - 1

    annual_factors = fins[list(filter(lambda x: 'ratio' in x, 
                                      fins.columns.tolist()))].join(one_year_changes, 
                                                                    rsuffix='change1yr').join(cumulative_changes, 
                                                                                              rsuffix='changeallyrs')
    return annual_factors


def quarter_factors(ticker):
    period = 'quarter'

    #start_time = time.time()

    fins = fin_factors(ticker, period, 'quarterly').sort_index()

    earnings_hist = pd.read_sql_query('SELECT * FROM postEarningsReturns WHERE Underlying = "{}"'.format(ticker), 
                                      con = earn_engine, index_col = 'Underlying').drop_duplicates().fillna(np.nan)
    earnings_hist.earningsDate = pd.to_datetime(earnings_hist.earningsDate)
    
    earnings_hist = earnings_hist.sort_values('earningsDate')

    # curr_prices = pd.read_sql_query('SELECT * FROM historicalPrices WHERE Underlying = "{}"'.format(ticker),
    #                                 con = prices_engine, index_col = 'Underlying').drop_duplicates()
    # curr_prices.Date = pd.to_datetime(curr_prices.Date)

    quarterly_earnings = pd.read_sql_query('SELECT * FROM quarterlyEarnings WHERE Underlying = "{}"'.format(ticker),
                                           con = yahoo_engine, index_col = 'Underlying').drop_duplicates()
    quarterly_earnings.quarter = pd.to_datetime(quarterly_earnings.quarter)
    
    quarterly_earnings = quarterly_earnings.sort_values('quarter')

    tol = pd.Timedelta('90 day')

    earnings_info = pd.merge_asof(left=quarterly_earnings,right=earnings_hist,
                                  left_on = ['quarter'], right_on = ['earningsDate'],
                                  direction='nearest',tolerance=tol).set_index('quarter')

    fins = pd.merge_asof(left=fins.reset_index(),right=earnings_info[['earningsDate','epsActual','surprisePercent']],
                         left_on = ['quarter'], right_on = ['earningsDate'],
                         direction='nearest',tolerance=tol).set_index('quarter')

    del fins['earningsDate']
    #print("--- %s seconds ---" % (time.time() - start_time))

    one_quarter_changes = fins.pct_change()
    cumulative_changes = (fins[list(filter(lambda x: x != 'surprisePercent',
                                           fins.columns.tolist()))].pct_change() + 1).cumprod() - 1

    quarter_factors = fins[list(filter(lambda x: 'ratio' in x, 
                                       fins.columns.tolist()))].join(one_quarter_changes, 
                                                                     rsuffix='change1qtr').join(cumulative_changes, 
                                                                                                   rsuffix='changeallqtrs')
    
    return quarter_factors, earnings_info[['earningsDate','closeToOpenReturn','industryBeta','marketBeta','stock52WeekReturn','market52WeekReturn','industry52WeekReturn']]

def create_data_row(ticker):
    annualFactors = annual_factors(ticker)

    quarterFactors, earnings_info = quarter_factors(ticker)

    earnings_info['year'] = earnings_info.index

    for idx, row in earnings_info.iterrows():

        yearDate = list(filter(lambda x: x < idx - dt.timedelta(days = 90),annualFactors.index.tolist()))
        earnings_info.loc[idx,'year'] = max(yearDate)

    factor_df = earnings_info.reset_index().merge(annualFactors.reset_index(), on = 'year').set_index('quarter')

    factor_df = factor_df.join(quarterFactors.shift(1),lsuffix = '_y', rsuffix = '_q')

    del factor_df['year']

    test_row = factor_df.tail(1)
    test_row.index = [ticker]
    test_row = test_row.join(profiles)
    
    return test_row


## Yahoo Database Pull

In [135]:
start_time = time.time()

df = []

for ticker in profiles.index:
    try:
        curr_point = create_data_row(ticker)
        df.append(curr_point)
    except:
        continue

print("--- %s seconds ---" % (time.time() - start_time))

--- 1351.1860916614532 seconds ---


In [142]:
df = pd.concat(df, axis = 0)
df.to_csv('earnings_oos.csv')
df

Unnamed: 0,earningsDate,closeToOpenReturn,industryBeta,marketBeta,stock52WeekReturn,market52WeekReturn,industry52WeekReturn,current_ratio_y,acidTest_ratio_y,cash_ratio_y,...,assetTurnover_ratiochangeallqtrs,grossMargin_ratiochangeallqtrs,operatingMargin_ratiochangeallqtrs,profitMargin_ratiochangeallqtrs,returnOnAssets_ratiochangeallqtrs,returnOnEquity_ratiochangeallqtrs,repurchasesToRevenue_ratiochangeallqtrs,epsActualchangeallqtrs,industry,sector
ANDV,2018-08-06,0.0063,0.8082,0.7997,0.5061,0.1484,0.1499,1.3763,0.6505,0.1086,...,0.0325,-0.3100,-0.6477,-0.7119,-0.6363,-0.6444,-0.0960,-0.7735,Oil & Gas Refining & Marketing,Energy
ACXM,2018-08-09,0.0072,0.8188,1.1277,0.8120,0.1545,0.2714,2.0204,,0.7977,...,0.0935,0.0382,0.1238,-2.4274,0.2289,0.2279,2.1179,0.2273,Information Technology Services,Technology
AVHI,2018-08-02,0.0009,0.2120,-0.2942,0.3987,0.1356,0.2098,16.2865,5.0665,4.4778,...,-0.2426,0.0374,-0.8697,1.1470,-0.9013,-0.8978,,0.4286,Residential Construction,Consumer Cyclical
BOFI,2018-08-07,-0.0401,0.6594,1.3814,0.4876,0.1505,0.1134,0.0971,0.0969,0.0812,...,0.1412,0.0000,0.0618,0.1923,0.2117,0.3250,1.1142,0.6000,Savings & Cooperative Banks,Financial Services
COBZ,2018-07-26,-0.0059,0.5866,0.7404,0.2297,0.1479,0.1134,0.0304,0.0289,0.0187,...,0.0615,0.0000,-0.0516,0.0987,0.0067,-0.0269,-0.0191,0.1481,Banks - Regional - US,Financial Services
FBNK,2018-07-18,0.0016,0.5148,0.1990,0.1742,0.1423,0.1067,0.0235,,0.0099,...,-0.0565,0.0000,-0.0635,0.0889,-0.1164,-0.0886,3.8479,0.0857,Banks - Regional - US,Financial Services
XCRA,2018-05-23,0.0057,0.6407,0.6952,0.4789,0.1382,0.2459,3.6115,2.7829,1.0535,...,-0.1207,0.0095,-0.2264,-0.2648,-0.3198,-0.3939,,-0.2400,Semiconductor Equipment & Materials,Technology
CVG,2018-08-08,0.0012,0.2174,0.6310,0.0445,0.1521,0.1106,2.6554,,0.5997,...,-0.0200,0.0064,0.0627,-0.1258,0.0414,0.0451,-0.2554,-0.0238,Business Services,Industrials
GPT,2018-07-30,0.0000,0.0618,0.2571,-0.0986,0.1384,-0.0064,0.9071,,0.1577,...,0.0153,-0.0142,-0.1566,-0.4567,-0.1437,-0.1505,-1.0000,-0.5000,REIT - Industrial,Real Estate
PHH,2018-08-03,0.0000,-0.0270,0.0282,-0.2101,0.1412,0.1047,9.2482,,3.8440,...,-0.5601,0.0756,3.2916,0.8857,0.8879,0.4099,,-0.2982,Specialty Finance,Financial Services


In [163]:
ticker = 'LNN'
period = 'year'
statement_period = 'annual'

fin_factors(ticker, period, statement_period).T

year,2015-08-31 00:00:00,2016-08-31 00:00:00,2017-08-31 00:00:00,2018-08-31 00:00:00
cash,139093000.0,101246000.0,121620000.0,160787000.0
inventory,74930000.0,74750000.0,86155000.0,79233000.0
totalAssets,536468000.0,487515000.0,506032000.0,500256000.0
totalCurrentAssets,322167000.0,276775000.0,292934000.0,331051000.0
totalCurrentLiabilities,95112000.0,87860000.0,92037000.0,80094000.0
totalLiab,247908000.0,235948000.0,235977000.0,223390000.0
totalStockholderEquity,288560000.0,251567000.0,270055000.0,276866000.0
grossProfit,156321000.0,148613000.0,145012000.0,151462000.0
interestExpense,-2626000.0,-4751000.0,-4757000.0,-4687000.0
netIncome,26309000.0,20267000.0,23179000.0,20277000.0
