In [None]:
# Imports
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import talib

# Load and visualize data.

In [None]:
spy_data = pd.read_csv('data/SPY.csv', header=0, names=['Date', 'Open', 'High', 'Low', 'Close', 'AdjClose', 'Volume'])

In [None]:
spy_data[35:1000]

In [None]:
count = 0
coke_data = pd.read_csv('data/coke.csv', header=0, names = ['datadate', 'tic', 'cshtrd', 'prccd', 'prchd', 'prcld', 'prcod']).dropna()


In [None]:
# Process data.
coke_data['datadate'] = pd.to_datetime(coke_data['datadate'], format='%Y%m%d')
coke_data['prccd'] = pd.to_numeric(coke_data['prccd']) #closing price
coke_data['prcod'] = pd.to_numeric(coke_data['prcod']) #opening price
coke_data['prchd'] = pd.to_numeric(coke_data['prchd']) 
coke_data['prcld'] = pd.to_numeric(coke_data['prcld'])
coke_data['cshtrd'] = pd.to_numeric(coke_data['cshtrd'])
coke_data

In [None]:
coke_data.plot('datadate', 'prcod', title='Coke opening stock price from 2006-2021', grid=True)

In [None]:
coke_data.plot('datadate', 'prccd', title='Coke closing stock price from 2006-2021', grid=True)

# Compute technical indicators.

In [None]:
#RSI, MACD, OBV, CCI, ADI
coke_data['rsi'] = talib.RSI(coke_data['prcod']) #default period of 14 days
coke_data['rsi'][:15]

In [None]:
coke_data.plot('datadate', 'rsi', title='Relative Strength Index 2006-2021', grid=True)

In [None]:
coke_data['macd'], coke_data['macdsignal'], coke_data['macdhist'] = talib.MACD(coke_data['prccd']) # defaults: fastperiod=12, slowperiod=26, signalperiod=9

In [None]:
coke_data.plot('datadate', ['macd', 'macdsignal', 'macdhist'], title='Moving Average Convergence/Divergence 2006-2021', grid=True)

In [None]:
coke_data['obv'] = talib.OBV(coke_data['prccd'], coke_data['cshtrd'])
coke_data.plot('datadate', 'obv', title='On-Balance Volume 2006-2021', grid=True)

In [None]:
coke_data['cci'] = talib.CCI(coke_data['prchd'], coke_data['prcld'], coke_data['prccd']) #default period: 14 days
coke_data.plot('datadate', 'cci', title='Commodity Channel Index 2006-2021', grid=True)

In [None]:
coke_data['adx'] = talib.ADX(coke_data['prchd'], coke_data['prcld'], coke_data['prccd']) #default period: 14 days
coke_data.plot('datadate', 'adx', title='Average Directional Index 2006-2021', grid=True)

In [None]:
all_data = pd.read_csv('data/allStocks.csv', header=0, names=['gvkey', 'iid', 'Date', 'tic', 
                                                              'Volume', 'Close', 'High', 'Low', 'Open'])

In [None]:
stock = []
for e in all_data['tic'].unique():
    if '.' in e or len(e) > 4:
        continue
    stock.append(e)

In [None]:
filtered = all_data.loc[all_data['tic'].isin(stock)]

In [None]:
filtered.to_csv('data/filtered.csv', index=0)

In [None]:
fp = pd.read_csv('data/filtered.csv', header=0)
fp

In [None]:
# generate dataframe with technical indicators

output = []

for s in fp['tic'].unique():
    # select all rows with data
    df = fp.loc[fp['tic'] == s]
    # compute technical indicator
    df['rsi'] = talib.RSI(df['Open'])
    df['macd'], df['macdsignal'], df['macdhist'] = talib.MACD(df['Close']) # defaults: fastperiod=12, slowperiod=26, signalperiod=9
    df['obv'] = talib.OBV(df['Close'], df['Volume'])
    df['cci'] = talib.CCI(df['High'], df['Low'], df['Close']) #default period: 14 days
    df['adx'] = talib.ADX(df['High'], df['Low'], df['Close']) #default period: 14 days
    
    output.append(df)
    
output_df = pd.concat(output)
output_df[output_df['tic'] == 'AAR']

In [None]:
# convert date column to datetime
dates = pd.to_datetime(output_df['Date'], format='%Y%m%d', errors='coerce')
output_df['Date'] = dates

In [None]:
output_df[output_df['tic'] == 'AAPL']

In [None]:
output_df.to_csv('data/filtered_with_ti.csv', index=0)