## STOCK PRICE ESTIMATION

## 1. Import the std libraries 

In [None]:
#stock estimations

#1.importing the libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import tensorflow
import sys
import os
import sklearn
import sklearn.preprocessing

In [None]:
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory
from subprocess import check_output
print(check_output(["ls", "../input"]).decode("utf8"))

In [None]:
#2.load the data
df_train = pd.read_csv('../input/train.csv')

In [None]:
df_train.head()

In [None]:
df_train.shape

In [None]:
df_train.info()

In [None]:
df_train.tail()

In [None]:
df_train.describe()

In [None]:
#check the correlations
corrmat = df_train.corr()
plt.figure(figsize=(10,8))
sns.heatmap(corrmat, vmax=1, square=True)

In [None]:
#visualize the stock over tiem
plt.figure(figsize=(15, 5));

plt.subplot(1,2,1);
plt.plot(df_train.open.values, color='red', label='open')
plt.plot(df_train.close.values, color='green', label='close')
plt.plot(df_train.low.values, color='blue', label='low')
plt.plot(df_train.high.values, color='black', label='high')
plt.title('stock price')
plt.xlabel('time [days]')
plt.ylabel('price')
plt.legend(loc='best')

plt.subplot(1,2,2);
plt.plot(df_train.volume.values, color='black', label='volume')
plt.title('stock volume')
plt.xlabel('time [days]')
plt.ylabel('volume')
plt.legend(loc='best');

In [None]:
# take a close values for the models
trainset = df_train.iloc[:,1:2].values
trainset[:5,:]

In [None]:
trainset.shape

In [None]:
#3.Manuplate the data 
#a.drop_volume
#normalize
#create train val and test data set

In [None]:
#standartization
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0,1))
training_scaled = sc.fit_transform(trainset)

In [None]:
training_scaled[:5]

In [None]:
# creating sequence data for the input
x_train = []
y_train = []

look_back=60 

for i in range(look_back,df_train.shape[0]):
    x_train.append(training_scaled[i-look_back:i])
    y_train.append(training_scaled[i,0])
    
x_train,y_train = np.array(x_train),np.array(y_train)

In [None]:
x_train.shape

In [None]:
y_train.shape

In [None]:
from keras.models import Sequential
from keras.layers import Dense,TimeDistributed,Bidirectional
from keras.layers import LSTM,Input,Conv1D,TimeDistributed
from keras.layers import Dropout,Activation
from keras.callbacks import LearningRateScheduler
from keras.optimizers import SGD,Adagrad,Adam
from keras.losses import Huber
from keras import losses

In [None]:

model = Sequential([
    LSTM(100,return_sequences=True,input_shape=(None,1)),
    Dropout(0.6),
    Bidirectional(LSTM(100,return_sequences=True)),
    Dropout(0.6),
    Bidirectional(LSTM(100,return_sequences=True)),
    Dropout(0.6),
    Bidirectional(LSTM(100)),
    Dropout(0.4),
    Dense(60),
    Dropout(0.6),
    Dense(30),
    Dropout(0.35),
    Dense(1),
])

lr_schedule = LearningRateScheduler(lambda epoch : 1e-8 * 10**(epoch/10))
optimizer = Adam(lr=1e-8)

model.compile(
    optimizer = optimizer,
    loss = losses.mean_squared_logarithmic_error,
    metrics = ['mae']
)

model.summary()

In [None]:
import time
start = time.time()

history = model.fit(x_train,
                    y_train,
                    epochs = 100,
                    batch_size = 32,
                    validation_split=0.2,
                    verbose= True,
                    callbacks=[lr_schedule])

print ('compilation time : ', time.time() - start)

In [None]:
plt.semilogx(history.history['lr'],history.history['val_loss'])
plt.semilogx(history.history['lr'],history.history['loss'])
plt.axis([1e-8,1e-1,0,0.2])

In [None]:
from keras.layers import Input,Conv1D,TimeDistributed

model = Sequential([
    LSTM(100,return_sequences=True,input_shape=(None,1)),
    Dropout(0.6),
    Bidirectional(LSTM(100,return_sequences=True)),
    Dropout(0.6),
    Bidirectional(LSTM(100,return_sequences=True)),
    Dropout(0.6),
    Bidirectional(LSTM(100)),
    Dropout(0.4),
    Dense(60),
    Dropout(0.6),
    Dense(30),
    Dropout(0.35),
    Dense(1),
])

model.summary()

optimizer = Adam(lr=7e-5)

model.compile(
    optimizer = optimizer,
    loss = losses.mean_squared_logarithmic_error,
    metrics = ['mae']
)

start = time.time()

history = model.fit(x_train,
                    y_train,
                    epochs = 250,
                    batch_size = 32,
                    validation_split=0.2,
                    verbose= True)

print ('compilation time : ', time.time() - start)

In [None]:
# list all data in history
print(history.history.keys())

In [None]:
#load model parameters and continue to train the model
from keras.models import load_model
#model = load_model('../input/model_stock_price_estimation.h5')

In [None]:
# summarize history for loss
plt.figure(figsize=(12, 5));

plt.subplot(2,2,1);
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

plt.figure(figsize=(12, 5));
plt.subplot(2,2,2);
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss ZOOMED')

plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.axis([100,400,0,0.005])
plt.show()

# summarize history for loss
plt.figure(figsize=(12, 5));
plt.subplot(2,2,3);
plt.plot(history.history['mae'])
plt.plot(history.history['val_mae'])
plt.title('model mae')
plt.ylabel('mae')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
model.save('../input/model_stock_price_estimation.h5')  # creates a HDF5 file 'my_model.h5'
# del model  # deletes the existing model

In [None]:
# returns a compiled model
model = load_model('model_stock_price_estimation.h5')

## Preparing the test data for prediction

In [None]:
#Lets prepare the test data for prediction
#load the test data and take close and volume data for model

df_test = pd.read_csv('../input/test.csv')

real_stock_price = df_test.iloc[:,1:2].values
real_stock_price.shape

In [None]:
real_stock_price[:65]

#### Plot the real stock price for 78 days

In [None]:
plt.figure(figsize=(8, 8));
plt.plot(real_stock_price[:,0])
plt.title('test_stock_price')
plt.ylabel('stock price')
plt.xlabel('days')
plt.show()

## Predict and plot the result against real data

In [None]:
from numpy import newaxis

In [None]:
def plot_results_multiple(predicted_data, true_data,lookback,length):
    plt.plot(sc.inverse_transform(true_data.reshape(-1, 1))[lookback:lookback+length])
    plt.plot(sc.inverse_transform(np.array(predicted_data).reshape(-1, 1)))
    plt.show()

In [None]:
#predict lenght consecutive values from a real one
def predict_sequences_multiple(model, firstValue,length):
    prediction_seqs = []
    curr_frame = firstValue
    print("Prediction in transformed format")
    for i in range(length): 
        predicted = []        
        
        #print(model.predict(curr_frame[newaxis,:,:]))
        predicted.append(model.predict(curr_frame[newaxis,:,:]))
        
        curr_frame = curr_frame[0:]
        #inserted prediction as an input for the next cycle
        curr_frame = np.insert(curr_frame[0:],len(curr_frame), predicted[-1], axis=0)
        #removed the first values from the top
        curr_frame = curr_frame[1:]
        
        prediction_seqs.append(predicted[-1])
        
    return prediction_seqs

In [None]:
#take the first 30 (look_back) values and predict the rest
inputs=df_test.iloc[:look_back,1:2].values
inputs

In [None]:
# transform the input values
inputs=sc.transform(inputs)
inputs

In [None]:
predict_length=40

#model = load_model('model_stock_price_estimation.h5')
predictions = predict_sequences_multiple(model, inputs, predict_length)

#print("Prediction are converted to orginal values")
print(sc.inverse_transform(np.array(predictions).reshape(-1, 1)))

In [None]:
plot_results_multiple(predictions, 
                      sc.transform(df_test.iloc[look_back:,1:2].values), 
                      look_back,predict_length)