In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn import linear_model

In [6]:
#diff of previous day
def generate_data(df,lookahead_days=1):
    raw_x= []
    title=['Open','High','Low','Close']
    raw_x = df.loc[:,title].values
    X,Y = [],[]
    n = len(df)
    for i in range(0,n-lookahead_days):
        _Y = np.mean(raw_x[i+1:i+1+lookahead_days,0])
        Y.append(_Y)
    X = generate_feature(raw_x,0,n-lookahead_days)
    Y = np.array(Y)
    assert len(X) == len(Y)
    return X,Y

In [3]:
title = ['Open', 'High', 'Low', 'Close']
train_set = pd.read_csv("training.csv", header=None, names = title)

In [4]:
def generate_feature(raw_x,start,end):
    X = []
    for i in range(start,end):
        current_price = raw_x[i][0]
        avg5_price = 0
        max5_price = 0
        min5_price = 0
        if i == 0 :
            avg5_price = max5_price = min5_price = current_price     
        elif i < 5:
            max5_price = np.max(raw_x[0:i,0])
            min5_price = np.min(raw_x[0:i,0]) 
            avg5_price = np.mean(raw_x[0:i,0])         
        else:
            max5_price = np.max(raw_x[i-5:i,0])
            min5_price = np.min(raw_x[i-5:i,0])
            avg5_price = np.mean(raw_x[i-5:i,0])     
        _X = np.append(raw_x[i],[avg5_price, max5_price, min5_price])
        X.append(_X)
    return np.array(X)

In [7]:
lookahead_days = 3
one_day_X, one_day_Y = generate_data(train_set,1)
lookahead_X, lookahead_Y = generate_data(train_set,lookahead_days)

In [8]:
# use PolynomialFeatures to try more features
degree = 2
poly = PolynomialFeatures(degree)
poly_one_day_X = poly.fit_transform(one_day_X)
poly_lookahead_X = poly.fit_transform(lookahead_X)

In [9]:
poly_one_day_X.shape

(1487, 36)

In [10]:
poly_lookahead_X.shape

(1485, 36)

In [11]:
# use linear model -> ridge regression
predict_tomorrow = linear_model.Ridge(alpha=1)
predict_3days = linear_model.Ridge(alpha=1)

In [12]:
predict_tomorrow.fit(poly_one_day_X ,one_day_Y)
predict_3days.fit(poly_lookahead_X ,lookahead_Y)

Ridge(alpha=1, copy_X=True, fit_intercept=True, max_iter=None, normalize=False,
      random_state=None, solver='auto', tol=0.001)

In [None]:
# TODO: use Lasso regression


In [18]:
def get_testing_data(path):
    title = ['Open', 'High', 'Low', 'Close']
    df = pd.read_csv(path, header=None, names = title)
    raw_x = df.loc[:,title].values
    X = generate_feature(raw_x, 0, len(raw_x))
    return X

In [21]:
testing_data = get_testing_data("testing.csv")
poly_testing_data = poly.fit_transform(testing_data)
poly_testing_data

array([[1.00000000e+00, 1.54400000e+02, 1.55020000e+02, 1.52910000e+02,
        1.54760000e+02, 1.54400000e+02, 1.54400000e+02, 1.54400000e+02,
        2.38393600e+04, 2.39350880e+04, 2.36093040e+04, 2.38949440e+04,
        2.38393600e+04, 2.38393600e+04, 2.38393600e+04, 2.40312004e+04,
        2.37041082e+04, 2.39908952e+04, 2.39350880e+04, 2.39350880e+04,
        2.39350880e+04, 2.33814681e+04, 2.36643516e+04, 2.36093040e+04,
        2.36093040e+04, 2.36093040e+04, 2.39506576e+04, 2.38949440e+04,
        2.38949440e+04, 2.38949440e+04, 2.38393600e+04, 2.38393600e+04,
        2.38393600e+04, 2.38393600e+04, 2.38393600e+04, 2.38393600e+04],
       [1.00000000e+00, 1.55960000e+02, 1.56800000e+02, 1.55070000e+02,
        1.56460000e+02, 1.54400000e+02, 1.54400000e+02, 1.54400000e+02,
        2.43235216e+04, 2.44545280e+04, 2.41847172e+04, 2.44015016e+04,
        2.40802240e+04, 2.40802240e+04, 2.40802240e+04, 2.45862400e+04,
        2.43149760e+04, 2.45329280e+04, 2.42099200e+04, 2.42099

In [22]:
poly_testing_data.shape

(20, 36)

In [28]:
class Trader():
    def __init__(self, predict_tomorrow, predict_3days):
        self.predict_tomorrow = predict_tomorrow
        self.predict_3days = predict_3days
        self.day_count = 0    
        self.slot = 0

    def predict_action(self,current_data):
        current_price = current_data[0]
        next_price = self.predict_tomorrow.predict(current_data.reshape(1,-1))
        mean_3days_price = self.predict_3days.predict(current_data.reshape(1,-1))
        self.day_count += 1
        action = self.policy(current_price, next_price, mean_3days_price)
        return action

    def policy(self,current_price,next_price,trend):
        # you already have one
        if self.slot == 1: 
            if next_price >  trend: 
                self.slot =0
                return '-1'
        # you have nothing
        elif self.slot == 0:
            if next_price >  trend: 
                self.slot = -1
                return '-1'
            if  next_price < trend: 
                self.slot = 1
                return '1'
        # you owe one
        elif self.slot == -1:
            if next_price <  trend: 
                self.slot =0
                return '1'
        return '0'

In [29]:
trader = Trader(predict_tomorrow, predict_3days)

In [31]:
with open("output.csv", 'w') as output_file:
    for line in testing_data[0:-1]:
        # We will perform your action as the open price in the next day.

        data = poly.fit_transform(line.reshape(1,-1))
        action = trader.predict_action(data.ravel())

        output_file.write(action+"\n")