In [1]:
import numpy as np  
import scipy as sc
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
%matplotlib inline

In [2]:
# ALL DATA READ INTO PANDAS DATAFRAME

df_indices = pd.read_csv("modified_datasets/indices", parse_dates=True, header=0, index_col=0)
df_sentiment = pd.read_csv("modified_datasets/sentiment", parse_dates=True, header=0, index_col=0)
df_stocks = pd.read_csv("modified_datasets/stock_info_all", parse_dates=True, header=0, index_col=0)

In [3]:
# CONVERTING TO LOG RETURNS

df_pre = pd.concat([df_stocks, df_indices],axis=1).dropna()

# Stashing column names
prices = [k for k in df_pre.columns if 'close' in k]
mat = df_pre[prices].as_matrix()
matnew = np.zeros((len(mat)-1,len(mat[0])))
for i in xrange(len(matnew)):
    matnew[i] = np.divide(np.subtract(mat[i+1], mat[i]), mat[i])
matnew = np.log(1 + matnew)#, axis=0)

vols = [k for k in df_pre.columns if 'volume' in k]
df_pre.ccgi_volume = df_pre.ccgi_volume.replace(0.0, 1.0)
matv = df_pre[vols].as_matrix()
matnewv = np.zeros((len(mat)-1,len(mat[0])))
for i in xrange(len(matnewv)):
    matnewv[i] = np.divide(np.subtract(matv[i+1], matv[i]), matv[i])
matnewv = np.log(1 + matnewv)

df_pre = df_pre.iloc[1:,]
df_pre = pd.concat([pd.DataFrame(matnewv, columns=vols, index=df_pre.index),
                    pd.DataFrame(matnew, columns=prices, index=df_pre.index)], axis=1)

df_pre = df_pre.iloc[4:]
df_pre = df_pre.reindex_axis(sorted(df_pre.columns), axis=1)
#df_sentiment = (df_sentiment - df_sentiment.min()) / (df_sentiment.max() - df_sentiment.min())

In [None]:
# TECHNIQUES FOR DETRENDING

x = np.cumsum(df_pre.abb_close, axis=0)
y1 = np.zeros(len(x))

# Detrending 1 - SLOW MOVING AVERAGE - PICK WINDOW LENGTH
plt.figure()
win = 60
for i in range(len(x)-60):
    y1[i] = np.mean(x[i:i+60])
y1 = pd.Series(y1, index=x.index) # need to be offset at the start
x.plot(color='yellow', label='Original')
z1 = pd.Series.subtract(x,y1)
z1.plot(color='blue', label='SMA')

# Detrending 2 - SCIPY DETREND

from scipy import signal
y2 = pd.Series(signal.detrend(x), index=x.index)
y2.plot(color='green', label='Scipy function')

# Detrending 3 - LINEAR REGRESSION

from sklearn.linear_model import LinearRegression
mod = LinearRegression()
r = np.arange(len(x)).reshape((len(x),1))
v = x.as_matrix().reshape((len(x),1))
mod.fit(r,v)
trend = mod.predict(v)
y3 = pd.Series(trend.reshape(len(trend),),index=x.index)
#y3.plot(color='red', label='Linear Regerssion')
z3 = pd.Series.subtract(x, y3)
z3.plot(color='red', label='LR')
#plt.legend(bbox_anchor=(1.0,0.5))
plt.show()

In [109]:
from matplotlib.backends.backend_pdf import PdfPages

def detrend(x, mode):
    from sklearn.linear_model import LinearRegression
    mod = LinearRegression()
    if mode==1:
        x1 = np.cumsum(x, axis=0)
    else:
        x1 = x
    r = np.arange(len(x)).reshape((len(x),1))
    v = x.values.reshape((len(x),1))
    mod.fit(r,v)
    trend = mod.predict(v)
    y3 = pd.Series(trend.reshape(len(trend),),index=x.index)
    z3 = pd.Series.subtract(x, y3)
    z3 = pd.Series.subtract(x, y3)
    return z3

def stationarity(x):
    

with PdfPages('seasonal_stock_ticks.pdf') as pdf:
    for j in xrange(0, len(df_pre.columns),2):
        
        fig = plt.figure()
        t = 'Stock ' + df_pre.columns[j].split('_')[0].upper()
        fig.suptitle(t, fontsize=14)
        
        for i in range(0,len(df_pre)-240,240):
            ax1 = fig.add_subplot(223)
            ax1.plot(range(0,60), detrend(df_pre.iloc[i:i+60,j], 1))
            ax1.set_title("3rd quarter Jul-Sept")
            ax2 = fig.add_subplot(224)
            ax2.plot(range(0,60), detrend(df_pre.iloc[i+60:i+120,j], 1))
            ax2.set_title("4th quarter Oct-Dec")

            if i >= 1680:
                break

            ax3 = fig.add_subplot(221)
            ax3.plot(range(0,60), detrend(df_pre.iloc[i+120:i+180,j], 1))
            ax3.set_title("1st quarter Jan-Mar")

            ax4 = fig.add_subplot(222)
            ax4.plot(range(0,60), detrend(df_pre.iloc[i+180:i+240,j], 1),
                     label=df_pre.iloc[i+180:i+181].index.year[0])
            ax4.set_title("2nd quarter Apr-Jun")
            ax4.legend(bbox_to_anchor=(1.0, 0.5), loc='center left', borderaxespad=0.)

        plt.tight_layout(rect=[0, 0.03, 1, 0.95])
        pdf.savefig(bbox_inches='tight')
        plt.close()

In [110]:
with PdfPages('seasonal_stock_vol.pdf') as pdf:
    for j in xrange(1, len(df_pre.columns),2):
        
        fig = plt.figure()
        t = 'Stock ' + df_pre.columns[j].split('_')[0].upper()
        fig.suptitle(t, fontsize=14)
        
        for i in range(0,len(df_pre)-240,240):
            ax1 = fig.add_subplot(223)
            ax1.plot(range(0,60), detrend(df_pre.iloc[i:i+60,j], 0))
            ax1.set_title("3rd quarter Jul-Sept")
            ax2 = fig.add_subplot(224)
            ax2.plot(range(0,60), detrend(df_pre.iloc[i+60:i+120,j], 0))
            ax2.set_title("4th quarter Oct-Dec")

            if i >= 1680:
                break

            ax3 = fig.add_subplot(221)
            ax3.plot(range(0,60), detrend(df_pre.iloc[i+120:i+180,j], 0))
            ax3.set_title("1st quarter Jan-Mar")

            ax4 = fig.add_subplot(222)
            ax4.plot(range(0,60), detrend(df_pre.iloc[i+180:i+240,j], 0),
                     label=df_pre.iloc[i+180:i+181].index.year[0])
            ax4.set_title("2nd quarter Apr-Jun")
            ax4.legend(bbox_to_anchor=(1.0, 0.5), loc='center left', borderaxespad=0.)

        plt.tight_layout(rect=[0, 0.03, 1, 0.95])
        pdf.savefig(bbox_inches='tight')
        plt.close()