In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import datetime

from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.svm import LinearSVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

## preprocess

In [2]:
def getData(df):
    out = pd.DataFrame()
    out['open'] = df['Open Price'].iloc[1:-1].values - df['Open Price'].iloc[0:-2].values
    out['close'] = df['Close Price'].iloc[1:-1].values - df['Close Price'].iloc[0:-2].values
    out['high'] = df['High Price'].iloc[1:-1].values - df['High Price'].iloc[0:-2].values
    out['low'] = df['Low Price'].iloc[1:-1].values - df['Low Price'].iloc[0:-2].values
    out['move'] = df['Close Price'].iloc[1:-1].values - df['Open Price'].iloc[1:-1].values

    y = df['Close Price'].iloc[2:].values - df['Close Price'].iloc[1:-1].values
    y[y > 0] = 1
    y[y != 1] = 0
    out['y']  = y
    return out.drop(columns=['y']), out[['y']]

def getTime(s):
    t = time.mktime(datetime.datetime.strptime(s, "%d-%b-%Y").timetuple())
    return round(t / 5e4)

In [3]:
df1 = pd.read_csv('train.csv')
df2 = pd.read_csv('test.csv')

df1.Date = df1.Date.apply(getTime)
df2.Date = df2.Date.apply(getTime)
df1.head()

Unnamed: 0,Date,Open Price,Close Price,High Price,Low Price,Volume
0,24617,902.99,931.8,934.73,899.35,4048270080
1,24622,929.17,927.45,936.63,919.53,5413910016
2,24623,931.17,934.7,943.85,927.28,5392620032
3,24625,927.45,906.65,927.45,902.37,4704940032
4,24627,905.73,909.73,910.0,896.81,4991549952


In [4]:
x_train, y_train = getData(df1)
x_test, y_test = getData(df2)
x_train.head()

Unnamed: 0,open,close,high,low,move
0,26.18,-4.35,1.9,20.18,-1.72
1,2.0,7.25,7.22,7.75,3.53
2,-3.72,-28.05,-16.4,-24.91,-20.8
3,-21.72,3.08,-17.45,-5.56,4.0
4,4.18,-19.38,1.93,-8.5,-19.56


In [5]:
y_train.head()

Unnamed: 0,y
0,1.0
1,0.0
2,1.0
3,0.0
4,0.0


## Training
### Logistic Regression

In [6]:
LR = LogisticRegression(random_state=0, solver='liblinear', penalty='l1', C=0.1, max_iter=1000)
LR.fit(x_train, y_train.y)
print('train acc: %.4f'%LR.score(x_train, y_train))
print(' test acc: %.4f'%LR.score(x_test, y_test))

train acc: 0.5460
 test acc: 0.5400


### Neural Network

In [7]:
MLP = MLPClassifier(random_state=0, max_iter=300, hidden_layer_sizes=(10, 5,))
MLP.fit(x_train, y_train.y)
print('train acc: %.4f'%MLP.score(x_train, y_train))
print(' test acc: %.4f'%MLP.score(x_test, y_test))

train acc: 0.5597
 test acc: 0.5480


### Other Classifier - LinearSVC

In [8]:
SVC = make_pipeline(StandardScaler(), LinearSVC(random_state=0, tol=1e-5, penalty='l1', dual=False, C=0.1))
SVC.fit(x_train, y_train.y)
print('train acc: %.4f'%SVC.score(x_train, y_train))
print(' test acc: %.4f'%SVC.score(x_test, y_test))

train acc: 0.5433
 test acc: 0.5480


### Ensemble

In [9]:
model = [LR, MLP, SVC]
weight = [0.2, 0.5, 0.3]
pred_train, pred_test = [], []
for i in range(len(model)):
    pred_train.append(model[i].predict(x_train)*weight[i])
    pred_test.append(model[i].predict(x_test)*weight[i])
pred_train = np.round(np.array(pred_train).sum(0))
pred_test = np.round(np.array(pred_test).sum(0))

print('train acc: %.4f'%np.mean(pred_train == y_train.y))
print(' test acc: %.4f'%np.mean(pred_test == y_test.y))

train acc: 0.5561
 test acc: 0.5760
