In [1]:
# Importing necessary models
import warnings
warnings.filterwarnings('ignore')

import smtplib
import pandas as pd
import numpy as np
import datetime as dt
import pandas.stats.moments as st
import time
%matplotlib inline

import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from sqlalchemy import *
from sqlalchemy import create_engine

from functools import reduce

import tensorflow as tf

import sqlite3 as sql
pd.options.display.float_format = '{:,.4f}'.format

init_notebook_mode(connected=True)

import os

dbs_dir = 'C:\\Users\\Fang\\Desktop\\Python Trading\\Trading\\Data\\DBs'

os.chdir(dbs_dir)

earn_engine = create_engine('sqlite:///earningsHistory.db', echo=False)
yahoo_engine = create_engine('sqlite:///yahoo.db', echo = False)
reuters_engine = create_engine('sqlite:///reuters.db', echo=False)

In [2]:
period_fundamental_tables = list(filter(lambda x: 'quarterly' in x, yahoo_engine.table_names()))

In [3]:
# 'SELECT * FROM overviews WHERE Underlying IN ' + str(tuple(curr_earnings.index))

fa_df = []

for yahoo_table in period_fundamental_tables:

    yahoo_engine_query = 'SELECT * FROM {0}'.format(yahoo_table)

    yahoo_data = pd.read_sql_query(yahoo_engine_query,
                                   con = yahoo_engine).drop_duplicates()
    yahoo_data.quarter = pd.to_datetime(yahoo_data.quarter)
    yahoo_data = yahoo_data.sort_values(['Underlying','quarter']).reset_index(drop = True)
    
    if len(fa_df) == 0:
        fa_df = yahoo_data
    else:
        fa_df = pd.merge(fa_df, yahoo_data,
                         how = 'inner', on = ['quarter','Underlying'])
        

keep_cols = list(filter(lambda x: '_y' not in x and 
                        'maxAge' not in x and 
                        'minorityInterest' not in x, fa_df.columns.tolist()))
fa_df = fa_df[keep_cols].fillna(np.nan).replace('na',np.nan)

fa_df = fa_df[fa_df.Underlying.isin(fa_df[['quarter',
                                           'Underlying']][fa_df[['quarter',
                                                                 'Underlying']].duplicated()].Underlying.drop_duplicates().tolist()) == False]

fa_df = fa_df.set_index(['quarter','Underlying']).apply(lambda x: pd.to_numeric(x)).reset_index()

earnings_returns = pd.read_sql_query('SELECT * FROM postEarningsReturns', 
                                     con = earn_engine).drop_duplicates().drop_duplicates()

earnings_returns.earningsDate = pd.to_datetime(earnings_returns.earningsDate)
earnings_returns = earnings_returns.sort_values(['Underlying','earningsDate']).reset_index(drop = True)

In [40]:
key_periods = fa_df[['Underlying','quarter']].groupby(['quarter']).count()
key_periods = key_periods[key_periods.Underlying > 3000]

key_names = fa_df[fa_df.quarter.isin(key_periods.index.tolist())].groupby(['Underlying']).count()
key_names = key_names[key_names.quarter == len(key_periods)].index.tolist()

pre_tensor = fa_df[fa_df.Underlying.isin(key_names) & 
                   fa_df.quarter.isin(key_periods.index.tolist())].sort_values(['quarter','Underlying']).reset_index(drop = True)

In [41]:
n_obs = len(pre_tensor.quarter.drop_duplicates())
n_rows = len(pre_tensor.Underlying.drop_duplicates())
n_features = len(pre_tensor.columns)

In [70]:
pre_tensor.values.reshape(n_features,n_obs,n_rows)[0][0]

array([Timestamp('2017-12-31 00:00:00'), 'AA', 1898000000.0, ..., 0.23,
       0.043, 0], dtype=object)

In [47]:
pre_tensor

Unnamed: 0,quarter,Underlying,accountsPayable,capitalSurplus,cash,commonStock,deferredLongTermAssetCharges,deferredLongTermLiab,goodWill,intangibleAssets,...,netIncomeFromContinuingOps,nonRecurring,operatingIncome,otherItems,otherOperatingExpenses,researchDevelopment,sellingGeneralAdministrative,totalOperatingExpenses,totalOtherIncomeExpenseNet,totalRevenue
0,2017-12-31,AA,1898000000.0000,9590000000.0000,1358000000.0000,2000000.0000,814000000.0000,,154000000.0000,62000000.0000,...,-56000000.0000,,559000000.0000,,-4000000.0000,9000000.0000,70000000.0000,2615000000,-343000000,3174000000
1,2017-12-31,AAL,1688000000.0000,5714000000.0000,295000000.0000,5000000.0000,427000000.0000,,4091000000.0000,2203000000.0000,...,258000000.0000,,970000000.0000,,1231000000.0000,,383000000.0000,9631000000,-545000000,10601000000
2,2017-12-31,AAN,80821000.0000,270043000.0000,51037000.0000,45376000.0000,11589000.0000,,622948000.0000,279151000.0000,...,177560000.0000,,110653000.0000,,,,327745000.0000,773974000,-49915000,884627000
3,2017-12-31,AAOI,43624000.0000,285376000.0000,82936000.0000,19000.0000,12801000.0000,,,4811000.0000,...,5716000.0000,,10320000.0000,,,10670000.0000,11164000.0000,69535000,-901000,79855000
4,2017-12-31,AAON,10967000.0000,,21457000.0000,210000.0000,,,,,...,15770000.0000,,17561000.0000,,,,13291000.0000,86599000,-111000,104160000
5,2017-12-31,AAT,38069000.0000,919066000.0000,82610000.0000,473000.0000,,21549000.0000,,9776000.0000,...,9731000.0000,,23792000.0000,,,,6860000.0000,56701000,-13815000,80493000
6,2017-12-31,AAWW,65740000.0000,715735000.0000,280809000.0000,301000.0000,915000.0000,236904000.0000,40361000.0000,66124000.0000,...,209454000.0000,,107865000.0000,,45522000.0000,,39189000.0000,520087000,-856000,627952000
7,2017-12-31,AAXN,8592000.0000,201672000.0000,75105000.0000,1000.0000,15755000.0000,6803000.0000,14927000.0000,18823000.0000,...,-2071000.0000,,8648000.0000,,,14755000.0000,39613000.0000,86003000,-1582000,94651000
8,2017-12-31,AB,,,,1592187000.0000,,,,,...,78593000.0000,,85725000.0000,,,,,0,0,85725000
9,2017-12-31,ABB,5419000000.0000,29000000.0000,4526000000.0000,188000000.0000,1250000000.0000,,11199000000.0000,2581000000.0000,...,423000000.0000,,933000000.0000,,-106000000.0000,398000000.0000,1543000000.0000,8347000000,-352000000,9280000000
