In [1]:
# Import libraries
import os
import sys
import requests

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import plot_roc_curve
from sklearn.metrics import accuracy_score, classification_report

In [2]:
# Load the data from the CSV file
price_data = pd.read_csv('./data/CCL_historical_data.csv')
price_data

Unnamed: 0,datetime,o,h,l,c,v,readable_time
0,1569830400000,43.81,43.81,43.46,43.48,700,2019-09-30 03:00:00
1,1569832200000,43.30,43.30,43.30,43.30,300,2019-09-30 03:30:00
2,1569834000000,43.51,43.51,43.50,43.50,252,2019-09-30 04:00:00
3,1569837600000,43.51,43.51,43.50,43.50,300,2019-09-30 05:00:00
4,1569841200000,43.81,43.81,43.81,43.81,300,2019-09-30 06:00:00
...,...,...,...,...,...,...,...
4631,1591997400000,19.77,19.79,19.65,19.65,62906,2020-06-12 16:30:00
4632,1591999200000,19.65,19.77,19.60,19.73,26396,2020-06-12 17:00:00
4633,1592001000000,19.70,19.79,19.67,19.73,25103,2020-06-12 17:30:00
4634,1592002800000,19.73,19.90,19.72,19.90,44131,2020-06-12 18:00:00


In [3]:
# Add a "Change in price" column
price_data['change_in_price'] = price_data['c'].diff()

In [4]:
price_data.head()

Unnamed: 0,datetime,o,h,l,c,v,readable_time,change_in_price
0,1569830400000,43.81,43.81,43.46,43.48,700,2019-09-30 03:00:00,
1,1569832200000,43.3,43.3,43.3,43.3,300,2019-09-30 03:30:00,-0.18
2,1569834000000,43.51,43.51,43.5,43.5,252,2019-09-30 04:00:00,0.2
3,1569837600000,43.51,43.51,43.5,43.5,300,2019-09-30 05:00:00,0.0
4,1569841200000,43.81,43.81,43.81,43.81,300,2019-09-30 06:00:00,0.31


In [5]:
# Let's smooth out the data to remove some randomness and noise

# Define the number of frequency intervals you want to predict (in this case, 1 minute)
interval_out = 25 # So roughly 1 day out

# Grou p by symbol, then apply the rolling function and grab the min/max
price_data_smoothed = price_data[['c','l','h','o','v']].transform(lambda x: x.ewm(span = interval_out).mean())
    
smoothed_df = pd.concat([price_data[['datetime','readable_time']], price_data_smoothed], axis = 1, sort=False)
smoothed_df

Unnamed: 0,datetime,readable_time,c,l,h,o,v
0,1569830400000,2019-09-30 03:00:00,43.480000,43.460000,43.810000,43.810000,7.000000e+02
1,1569832200000,2019-09-30 03:30:00,43.386400,43.376800,43.544800,43.544800,4.920000e+02
2,1569834000000,2019-09-30 04:00:00,43.427335,43.421194,43.532260,43.532260,4.055181e+02
3,1569837600000,2019-09-30 05:00:00,43.447737,43.443320,43.526010,43.526010,3.758921e+02
4,1569841200000,2019-09-30 06:00:00,43.532226,43.528839,43.592244,43.592244,3.581922e+02
...,...,...,...,...,...,...,...
4631,1591997400000,2020-06-12 16:30:00,19.389961,19.164282,19.582007,19.371606,2.112672e+06
4632,1591999200000,2020-06-12 17:00:00,19.416118,19.197798,19.596468,19.393021,1.952189e+06
4633,1592001000000,2020-06-12 17:30:00,19.440263,19.234122,19.611355,19.416635,1.803951e+06
4634,1592002800000,2020-06-12 18:00:00,19.475627,19.271497,19.633558,19.440740,1.668581e+06


In [6]:
smoothed_df['Signal_Flag'] = smoothed_df['datetime'].transform(lambda x: np.sign(x.diff(interval_out)))

smoothed_df.head(50)

Unnamed: 0,datetime,readable_time,c,l,h,o,v,Signal_Flag
0,1569830400000,2019-09-30 03:00:00,43.48,43.46,43.81,43.81,700.0,
1,1569832200000,2019-09-30 03:30:00,43.3864,43.3768,43.5448,43.5448,492.0,
2,1569834000000,2019-09-30 04:00:00,43.427335,43.421194,43.53226,43.53226,405.518124,
3,1569837600000,2019-09-30 05:00:00,43.447737,43.44332,43.52601,43.52601,375.892141,
4,1569841200000,2019-09-30 06:00:00,43.532226,43.528839,43.592244,43.592244,358.192175,
5,1569844800000,2019-09-30 07:00:00,43.586236,43.583532,43.644233,43.640199,850.702953,
6,1569846600000,2019-09-30 07:30:00,43.633517,43.613384,43.681114,43.659889,997.084516,
7,1569848400000,2019-09-30 08:00:00,43.675238,43.63886,43.715093,43.6778,1314.108147,
8,1569850200000,2019-09-30 08:30:00,43.656475,43.620303,43.775756,43.697607,53073.49137,
9,1569852000000,2019-09-30 09:00:00,43.657665,43.606297,43.765876,43.676296,72686.288569,


In [7]:
# Calculate the 25 interval(30 minute) RSI (so roughly 1 day)
n = 25
up_df, down_df = price_data[['datetime','change_in_price']].copy(), price_data[['datetime','change_in_price']].copy()

up_df.loc['change_in_price'] = up_df.loc[(up_df['change_in_price'] < 0), 'change_in_price'] = 0
down_df.loc['change_in_price'] = down_df.loc[(down_df['change_in_price'] > 0), 'change_in_price'] = 0

# We can obly have positive values so change negative down days to their absolute value
down_df['change_in_price'] = down_df['change_in_price'].abs()

# Calculate the EQMA (Exponential Weighted Moving Average), meaning older values are given less weight compared
#  new values
ewma_up = up_df['change_in_price'].transform(lambda x: x.ewm(span = n).mean())
ewma_down = down_df['change_in_price'].transform(lambda x: x.ewm(span = n).mean())

# # Calculate Relative Strength
relative_strength = ewma_up / ewma_down

# Calculate Relative Strength Index (RSI)
relative_strength_index = 100.0 - (100 / (1.0 + relative_strength))

# Add data into the data frame
price_data['down_days'] = down_df['change_in_price']
price_data['up_days'] = down_df['change_in_price']
price_data['RSI'] = relative_strength_index

price_data.head()

Unnamed: 0,datetime,o,h,l,c,v,readable_time,change_in_price,down_days,up_days,RSI
0,1569830400000,43.81,43.81,43.46,43.48,700,2019-09-30 03:00:00,,,,
1,1569832200000,43.3,43.3,43.3,43.3,300,2019-09-30 03:30:00,-0.18,0.18,0.18,0.0
2,1569834000000,43.51,43.51,43.5,43.5,252,2019-09-30 04:00:00,0.2,0.0,0.0,54.621849
3,1569837600000,43.51,43.51,43.5,43.5,300,2019-09-30 05:00:00,0.0,0.0,0.0,54.621849
4,1569841200000,43.81,43.81,43.81,43.81,300,2019-09-30 06:00:00,0.31,0.0,0.0,77.238366


In [8]:
# Stochastic Oscillator

n = 25

low_25, high_25 = price_data[['datetime', 'l']].copy(), price_data[['datetime', 'h']].copy()

low_25 = low_25['l'].transform(lambda x: x.rolling(window = n).min())
high_25 = high_25['h'].transform(lambda x: x.rolling(window = n).max())

# Calculate the Stochastic Oscillator
k_percent = 100 * ((price_data['c'] - 14) / (high_25 - low_25))

# Add the info to the data frame
price_data['low_14'] = low_25
price_data['high_14'] = high_25
price_data['k_percent'] = k_percent

price_data.head(30)

Unnamed: 0,datetime,o,h,l,c,v,readable_time,change_in_price,down_days,up_days,RSI,low_14,high_14,k_percent
0,1569830400000,43.81,43.81,43.46,43.48,700,2019-09-30 03:00:00,,,,,,,
1,1569832200000,43.3,43.3,43.3,43.3,300,2019-09-30 03:30:00,-0.18,0.18,0.18,0.0,,,
2,1569834000000,43.51,43.51,43.5,43.5,252,2019-09-30 04:00:00,0.2,0.0,0.0,54.621849,,,
3,1569837600000,43.51,43.51,43.5,43.5,300,2019-09-30 05:00:00,0.0,0.0,0.0,54.621849,,,
4,1569841200000,43.81,43.81,43.81,43.81,300,2019-09-30 06:00:00,0.31,0.0,0.0,77.238366,,,
5,1569844800000,43.83,43.85,43.8,43.8,2800,2019-09-30 07:00:00,-0.01,0.01,0.01,75.916117,,,
6,1569846600000,43.75,43.8499,43.75,43.8499,1667,2019-09-30 07:30:00,0.0499,0.0,0.0,77.956121,,,
7,1569848400000,43.77,43.89,43.77,43.89,2946,2019-09-30 08:00:00,0.0401,0.0,0.0,79.470026,,,
8,1569850200000,43.81,44.12,43.515,43.55,346790,2019-09-30 08:30:00,-0.34,0.34,0.34,48.730019,,,
9,1569852000000,43.545,43.705,43.52,43.665,193525,2019-09-30 09:00:00,0.115,0.0,0.0,55.094733,,,


In [9]:
# Calculate the Willams %R
n = 25


low_25, high_25 = price_data[['datetime', 'l']].copy(), price_data[['datetime', 'h']].copy()

low_25 = low_25['l'].transform(lambda x: x.rolling(window = n).min())
high_25 = high_25['h'].transform(lambda x: x.rolling(window = n).max())

# Calculate William %R
r_percent = ((high_25 - price_data['c']) / (high_25 - low_25)) * -100

price_data['r_percent'] = r_percent

price_data.head(30)

Unnamed: 0,datetime,o,h,l,c,v,readable_time,change_in_price,down_days,up_days,RSI,low_14,high_14,k_percent,r_percent
0,1569830400000,43.81,43.81,43.46,43.48,700,2019-09-30 03:00:00,,,,,,,,
1,1569832200000,43.3,43.3,43.3,43.3,300,2019-09-30 03:30:00,-0.18,0.18,0.18,0.0,,,,
2,1569834000000,43.51,43.51,43.5,43.5,252,2019-09-30 04:00:00,0.2,0.0,0.0,54.621849,,,,
3,1569837600000,43.51,43.51,43.5,43.5,300,2019-09-30 05:00:00,0.0,0.0,0.0,54.621849,,,,
4,1569841200000,43.81,43.81,43.81,43.81,300,2019-09-30 06:00:00,0.31,0.0,0.0,77.238366,,,,
5,1569844800000,43.83,43.85,43.8,43.8,2800,2019-09-30 07:00:00,-0.01,0.01,0.01,75.916117,,,,
6,1569846600000,43.75,43.8499,43.75,43.8499,1667,2019-09-30 07:30:00,0.0499,0.0,0.0,77.956121,,,,
7,1569848400000,43.77,43.89,43.77,43.89,2946,2019-09-30 08:00:00,0.0401,0.0,0.0,79.470026,,,,
8,1569850200000,43.81,44.12,43.515,43.55,346790,2019-09-30 08:30:00,-0.34,0.34,0.34,48.730019,,,,
9,1569852000000,43.545,43.705,43.52,43.665,193525,2019-09-30 09:00:00,0.115,0.0,0.0,55.094733,,,,


In [10]:
#  Calculate MACD 
ema_26 = price_data['c'].transform(lambda x: x.ewm(span = 26).mean())
ema_12 = price_data['c'].transform(lambda x: x.ewm(span = 12).mean())
macd = ema_12 - ema_26

ema_9_macd = macd.ewm(span = 9).mean()

# Store the data in the data fram
price_data['MACD'] = macd
price_data['MACD_EMA'] = ema_9_macd

price_data.head(30)

Unnamed: 0,datetime,o,h,l,c,v,readable_time,change_in_price,down_days,up_days,RSI,low_14,high_14,k_percent,r_percent,MACD,MACD_EMA
0,1569830400000,43.81,43.81,43.46,43.48,700,2019-09-30 03:00:00,,,,,,,,,0.0,0.0
1,1569832200000,43.3,43.3,43.3,43.3,300,2019-09-30 03:30:00,-0.18,0.18,0.18,0.0,,,,,-0.004038,-0.002244
2,1569834000000,43.51,43.51,43.5,43.5,252,2019-09-30 04:00:00,0.2,0.0,0.0,54.621849,,,,,0.001056,-0.000891
3,1569837600000,43.51,43.51,43.5,43.5,300,2019-09-30 05:00:00,0.0,0.0,0.0,54.621849,,,,,0.003347,0.000545
4,1569841200000,43.81,43.81,43.81,43.81,300,2019-09-30 06:00:00,0.31,0.0,0.0,77.238366,,,,,0.016858,0.005397
5,1569844800000,43.83,43.85,43.8,43.8,2800,2019-09-30 07:00:00,-0.01,0.01,0.01,75.916117,,,,,0.024235,0.010503
6,1569846600000,43.75,43.8499,43.75,43.8499,1667,2019-09-30 07:30:00,0.0499,0.0,0.0,77.956121,,,,,0.030809,0.015642
7,1569848400000,43.77,43.89,43.77,43.89,2946,2019-09-30 08:00:00,0.0401,0.0,0.0,79.470026,,,,,0.036629,0.020686
8,1569850200000,43.81,44.12,43.515,43.55,346790,2019-09-30 08:30:00,-0.34,0.34,0.34,48.730019,,,,,0.023233,0.021274
9,1569852000000,43.545,43.705,43.52,43.665,193525,2019-09-30 09:00:00,0.115,0.0,0.0,55.094733,,,,,0.019316,0.020835


In [11]:
# Calculate the Price Rate of Change
n = 9

# Calculate the Rate of Change in the Price and store it
price_data['Price_Rate_Of_Change'] = price_data['c'].transform(lambda x: x.pct_change(periods = n))

price_data.head(30)

Unnamed: 0,datetime,o,h,l,c,v,readable_time,change_in_price,down_days,up_days,RSI,low_14,high_14,k_percent,r_percent,MACD,MACD_EMA,Price_Rate_Of_Change
0,1569830400000,43.81,43.81,43.46,43.48,700,2019-09-30 03:00:00,,,,,,,,,0.0,0.0,
1,1569832200000,43.3,43.3,43.3,43.3,300,2019-09-30 03:30:00,-0.18,0.18,0.18,0.0,,,,,-0.004038,-0.002244,
2,1569834000000,43.51,43.51,43.5,43.5,252,2019-09-30 04:00:00,0.2,0.0,0.0,54.621849,,,,,0.001056,-0.000891,
3,1569837600000,43.51,43.51,43.5,43.5,300,2019-09-30 05:00:00,0.0,0.0,0.0,54.621849,,,,,0.003347,0.000545,
4,1569841200000,43.81,43.81,43.81,43.81,300,2019-09-30 06:00:00,0.31,0.0,0.0,77.238366,,,,,0.016858,0.005397,
5,1569844800000,43.83,43.85,43.8,43.8,2800,2019-09-30 07:00:00,-0.01,0.01,0.01,75.916117,,,,,0.024235,0.010503,
6,1569846600000,43.75,43.8499,43.75,43.8499,1667,2019-09-30 07:30:00,0.0499,0.0,0.0,77.956121,,,,,0.030809,0.015642,
7,1569848400000,43.77,43.89,43.77,43.89,2946,2019-09-30 08:00:00,0.0401,0.0,0.0,79.470026,,,,,0.036629,0.020686,
8,1569850200000,43.81,44.12,43.515,43.55,346790,2019-09-30 08:30:00,-0.34,0.34,0.34,48.730019,,,,,0.023233,0.021274,
9,1569852000000,43.545,43.705,43.52,43.665,193525,2019-09-30 09:00:00,0.115,0.0,0.0,55.094733,,,,,0.019316,0.020835,0.004255


In [17]:
def on_balance_volume(stock):
    
    # Grab the volume and close colum
    volume = stock['v']
    change = stock['c'].diff()
    
    # Initialize the previous OBV
    prev_obv = 0
    obv_values = []
    
    # Calculate the On Balance Volume
    for i, j in zip(change, volume):
        if i > 0:
            current_obv = prev_obv + j
        elif i < 0:
            current_obv = prev_obv - j
        else:
            current_obv = prev_obv
            
        prev_obv = current_obv
        obv_values.append(current_obv)
        
    # Return a panda series
    return pd.Series(obv_values, index = stock.index)


# Apply the function
obv = on_balance_volume(price_data)

# Add to the data frame but drop the old index before adding it
price_data['On Balance Volume'] = obv.reset_index(level = 0, drop = True)

price_data.head(30)

Unnamed: 0,datetime,o,h,l,c,v,readable_time,change_in_price,down_days,up_days,RSI,low_14,high_14,k_percent,r_percent,MACD,MACD_EMA,Price_Rate_Of_Change,On Balance Volume
0,1569830400000,43.81,43.81,43.46,43.48,700,2019-09-30 03:00:00,,,,,,,,,0.0,0.0,,0
1,1569832200000,43.3,43.3,43.3,43.3,300,2019-09-30 03:30:00,-0.18,0.18,0.18,0.0,,,,,-0.004038,-0.002244,,-300
2,1569834000000,43.51,43.51,43.5,43.5,252,2019-09-30 04:00:00,0.2,0.0,0.0,54.621849,,,,,0.001056,-0.000891,,-48
3,1569837600000,43.51,43.51,43.5,43.5,300,2019-09-30 05:00:00,0.0,0.0,0.0,54.621849,,,,,0.003347,0.000545,,-48
4,1569841200000,43.81,43.81,43.81,43.81,300,2019-09-30 06:00:00,0.31,0.0,0.0,77.238366,,,,,0.016858,0.005397,,252
5,1569844800000,43.83,43.85,43.8,43.8,2800,2019-09-30 07:00:00,-0.01,0.01,0.01,75.916117,,,,,0.024235,0.010503,,-2548
6,1569846600000,43.75,43.8499,43.75,43.8499,1667,2019-09-30 07:30:00,0.0499,0.0,0.0,77.956121,,,,,0.030809,0.015642,,-881
7,1569848400000,43.77,43.89,43.77,43.89,2946,2019-09-30 08:00:00,0.0401,0.0,0.0,79.470026,,,,,0.036629,0.020686,,2065
8,1569850200000,43.81,44.12,43.515,43.55,346790,2019-09-30 08:30:00,-0.34,0.34,0.34,48.730019,,,,,0.023233,0.021274,,-344725
9,1569852000000,43.545,43.705,43.52,43.665,193525,2019-09-30 09:00:00,0.115,0.0,0.0,55.094733,,,,,0.019316,0.020835,0.004255,-151200


In [None]:
# Hooray! Finallly building the model!