In [1]:
from stock_data import Stock
import statsmodels.tsa.stattools as stattools
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime


This file uses ticker_data from Quant connect. The data is assumed to be stored in a folder 'S&P500_3monthdata/ticker_breakdown' in the root directory.
Specify Stock. DATA_FOLDER = <new_folder> to change this folder.


# Finding stocks that move together
## Data setup
First of all, we construct the stock objects and filter them by industry. The following are the industries available:  

`['Industrials', 'Health Care', 'Information Technology', 'Communication Services', 'Consumer Discretionary', 'Utilities', 'Financials', 'Materials', 'Real Estate', 'Consumer Staples', 'Energy']`

The data for all stocks is one minute data from **03/02/2020** to **30/04/2020** from **9:30am** to **4:00pm**.

Using the `Stock` class from the `stock_data` file, we can turn the one minute data into data of any (less granular) time interval by using the aggregating methods.

In [2]:
BY_INDUSTRY = Stock.all_by_industry()
INDUSTRIES = BY_INDUSTRY.keys()
plt.style.use('seaborn-dark-palette')

In [11]:
all_stocks = Stock.all_stocks()
if True:
    for plotted_stock in all_stocks:
        plotted_data = plotted_stock.five_minute()

        time, price = plotted_data['time'], plotted_data['price']

        plt.plot(time, price)
        plt.title(f"Price of {plotted_stock.name} using 5 min intervals")
        plt.xlabel("Date")
        plt.ylabel("Price (USD)")

        plt.gca().set_ylim(bottom=0)
        plt.xticks(rotation=30)
        plt.tight_layout()
        plt.savefig(f"all_stocks_figures/{plotted_stock.name}.png")
        plt.clf()

<Figure size 432x288 with 0 Axes>

In [4]:
len(Stock.LIST)

503

## Analysis
With the multiple sectors there are to choose from, its important to choose a sector that is likely to have pairs of stocks that move together.

As our dataset is 

In [5]:
energy_stocks = BY_INDUSTRY["Energy"]
energy_stocks_5min = [stock.five_minute() for stock in energy_stocks]
energy_stocks_30min = [stock.any_minute(30) for stock in energy_stocks]



# Calculating pvalues for 5 min data over 1.5 months (half the time period)

In [6]:
cointegration_data = energy_stocks_5min
time_length = cointegration_data[0].shape[0]

## Only testing over half the time interval so we can get an initial feel for the data,
## and then do qualitative analysis before a quantitative check on the whole range.
cutoff = int(time_length/2)

In [7]:
p_values = np.zeros((len(cointegration_data), len(cointegration_data)))

In [8]:
for i in range(len(cointegration_data)):
    p_values[i,i] = 1

for stock1_index in range(len(cointegration_data)): # Looping through all stocks
    for stock2_index in range(stock1_index+1, len(cointegration_data)): # Looping from stock1_index to the end
        
        # Getting the pair of stocks over half the total time period
        both_stock_data = [cointegration_data[stock1_index][cutoff:],
                           cointegration_data[stock2_index][cutoff:]]
        
        _, p_value, _ =stattools.coint(both_stock_data[0]["price"], both_stock_data[1]["price"])
        
        p_values[stock1_index, stock2_index] = p_value
        p_values[stock2_index, stock1_index] = p_value


In [9]:
delta = p_values_5min-p_values

NameError: name 'p_values_5min' is not defined

In [None]:
import seaborn as sns

ax = sns.heatmap(p_values_5min, linewidth=0.5)
ax.set_title("Cointegration p-values for 5min data over last 1.5 months")
plt.show()

In [None]:
thresh = 0.90

above_thresh = np.zeros_like(p_values_5min)
above_thresh[p_values_5min >= thresh] = 1

ax = sns.heatmap(above_thresh, linewidth=0.5)
ax.set_title(f"Cointegration p-values above {thresh} for 5min data over last 1.5 months")
plt.show()

In [None]:
energy_stocks[7].name

In [None]:
stock_list = []
for row_index in range(len(above_thresh)):
    for col_index in range(row_index + 1, len(above_thresh[0])):
        if (above_thresh[row_index, col_index]==False): # Skipping all entries that have low p_value
            continue
        stock1, stock2 = energy_stocks[row_index].name, energy_stocks[col_index].name
        
        if stock1 not in stock_list:
            stock_list.append(stock1)
        if stock2 not in stock_list:
            stock_list.append(stock2)
            
        print(f"{stock1}~{stock2}")
        


In [None]:
(",").join(stock_list)

In [None]:
for stock in energy_stocks:
    data = stock.five_minute()
    plt.plot(data["time"],data["price"])
plt.show()

In [None]:
plot_stocks = [Stock("EOG"), Stock("COP")]
plot_stocks_daily = [stock.any_minute(24*60) for stock in plot_stocks]

plt.plot(plot_stocks_daily[0]["time"], plot_stocks_daily[0]["price"])
plt.plot(plot_stocks_daily[1]["time"], plot_stocks_daily[1]["price"])
plt.show()

price_data = [data["price"] for data in plot_stocks_daily]
delta = price_data[0]-price_data[1]
z_delta = (delta-delta.mean())/np.std(delta)
plt.plot(plot_stocks_daily[1]["time"], z_delta)
plt.plot(plot_stocks_daily[1]["time"], np.zeros_like(z_delta), color="red")
plt.title("Standardized difference")
plt.show()

In [19]:
Stock.all_stocks()

[<Stock='A'>,
 <Stock='AAL'>,
 <Stock='AAP'>,
 <Stock='AAPL'>,
 <Stock='ABBV'>,
 <Stock='ABC'>,
 <Stock='ABMD'>,
 <Stock='ABT'>,
 <Stock='ACN'>,
 <Stock='ADBE'>,
 <Stock='ADI'>,
 <Stock='ADM'>,
 <Stock='ADP'>,
 <Stock='ADSK'>,
 <Stock='AEE'>,
 <Stock='AEP'>,
 <Stock='AES'>,
 <Stock='AFL'>,
 <Stock='AIG'>,
 <Stock='AIZ'>,
 <Stock='AJG'>,
 <Stock='AKAM'>,
 <Stock='ALB'>,
 <Stock='ALGN'>,
 <Stock='ALK'>,
 <Stock='ALL'>,
 <Stock='ALLE'>,
 <Stock='ALXN'>,
 <Stock='AMAT'>,
 <Stock='AMCR'>,
 <Stock='AMD'>,
 <Stock='AME'>,
 <Stock='AMGN'>,
 <Stock='AMP'>,
 <Stock='AMT'>,
 <Stock='AMZN'>,
 <Stock='ANET'>,
 <Stock='ANSS'>,
 <Stock='ANTM'>,
 <Stock='AON'>,
 <Stock='AOS'>,
 <Stock='APA'>,
 <Stock='APD'>,
 <Stock='APH'>,
 <Stock='APTV'>,
 <Stock='ARE'>,
 <Stock='ATO'>,
 <Stock='ATVI'>,
 <Stock='AVB'>,
 <Stock='AVGO'>,
 <Stock='AVY'>,
 <Stock='AWK'>,
 <Stock='AXP'>,
 <Stock='AZO'>,
 <Stock='BA'>,
 <Stock='BAC'>,
 <Stock='BAX'>,
 <Stock='BBY'>,
 <Stock='BDX'>,
 <Stock='BEN'>,
 <Stock='BF.B'>,
 <Stock