In [1]:
from sklearn.linear_model import LogisticRegression
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras import models, layers, optimizers, regularizers
from sklearn import ensemble

Using TensorFlow backend.


In [2]:
df = pd.read_csv('Google_Stock_Price_Train.csv', thousands=',')
df_test = pd.read_csv('Google_Stock_Price_Test.csv', thousands=',')
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,1/3/2012,325.25,332.83,324.97,663.59,7380500
1,1/4/2012,331.27,333.87,329.08,666.45,5749400
2,1/5/2012,329.83,330.75,326.89,657.21,6590300
3,1/6/2012,328.34,328.77,323.68,648.24,5405900
4,1/9/2012,322.04,322.29,309.46,620.76,11688800


In [3]:
df['Mid'] = (df['High'] + df['Low']) / 2
df_test['Mid'] = (df_test['High'] + df_test['Low']) / 2
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Mid
0,1/3/2012,325.25,332.83,324.97,663.59,7380500,328.9
1,1/4/2012,331.27,333.87,329.08,666.45,5749400,331.475
2,1/5/2012,329.83,330.75,326.89,657.21,6590300,328.82
3,1/6/2012,328.34,328.77,323.68,648.24,5405900,326.225
4,1/9/2012,322.04,322.29,309.46,620.76,11688800,315.875


In [4]:
df['Up Down'] = np.where(df['Close'].shift(1) > df['Close'], 0, 1)
df_test['Up Down'] = np.where(df_test['Close'].shift(1) > df_test['Close'], 0, 1)

df['VolumeBand'] = pd.cut(df['Volume'], 4)
df[['VolumeBand', 'Up Down']].groupby( ['VolumeBand'], as_index=False).mean().sort_values(by='VolumeBand', ascending=True)

Unnamed: 0,VolumeBand,Up Down
0,"(-17070.0, 6250400.0]",0.508906
1,"(6250400.0, 12492900.0]",0.514286
2,"(12492900.0, 18735400.0]",0.6
3,"(18735400.0, 24977900.0]",0.25


In [5]:
df['High Volume'] = np.where(df['Volume'] > 18735400.0, 1, 0)
df = df.drop('VolumeBand', axis=1)
df_test['High Volume'] = np.where(df_test['Volume'] > 18735400.0, 1, 0)
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Mid,Up Down,High Volume
0,1/3/2012,325.25,332.83,324.97,663.59,7380500,328.9,1,0
1,1/4/2012,331.27,333.87,329.08,666.45,5749400,331.475,1,0
2,1/5/2012,329.83,330.75,326.89,657.21,6590300,328.82,0,0
3,1/6/2012,328.34,328.77,323.68,648.24,5405900,326.225,0,0
4,1/9/2012,322.04,322.29,309.46,620.76,11688800,315.875,0,0


In [6]:
feature = ['Open', 'Close', 'Mid', 'High Volume']

x = df[feature]
y = df['Up Down']

x_test = df_test[feature]
y_test = df_test['Up Down']

### Logistic Regression

In [7]:
lr = LogisticRegression(random_state=1200, solver='lbfgs', multi_class='multinomial').fit(x, y)
lr.predict(x_test)
lr.predict_proba(x_test)
lr_acc = lr.score(x_test,y_test)
print(lr_acc)

0.75


### Neural Networks

In [8]:
nn = models.Sequential()
nn.add(layers.Dense(input_dim=4,
                   units=1000,
                   activation='relu'))
nn.add(layers.Dense(units=1,
                    kernel_initializer='normal',
                    activation='softmax'))

nn.compile( optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) 
nn.fit(x, y, epochs=10, validation_split=0.2, batch_size=1000)
loss, nn_acc = nn.evaluate(x_test, y_test)
print("-------------------------------------------")
print(nn_acc)

Train on 1006 samples, validate on 252 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
-------------------------------------------
0.6000000238418579


### Adaboost

In [9]:
boost = ensemble.AdaBoostClassifier(n_estimators=100).fit(x, y)
boost.predict(x_test)
boost.predict_proba(x_test)
boost_acc = boost.score(x_test,y_test)
print(boost_acc)

0.45
