In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import copy
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt #For visualization and plotting Graph
from sklearn.preprocessing import MinMaxScaler # For scaling the data
import tensorflow as tf
from tensorflow.keras import Sequential,layers # Layers for building up the LSTM models


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
data_path = "/kaggle/input/stock-time-series-20050101-to-20171231/AAPL_2006-01-01_to_2018-01-01.csv"

In [None]:
df_init = pd.read_csv(data_path) # Loading the Apple Stock price data
df_init.head()

In [None]:
# This method converts the date column to date time object, and set the Date as the index of the data frame.
def to_timesr(df_wrk,dt_column, dt_format=None):
    
    df=copy.deepcopy(df_wrk) # creating a deep copy of the data, so that the initial dataframe can be kept intact.
    
    if dt_format is None:
        df[dt_column]=pd.to_datetime(df[dt_column],infer_datetime_format=True)
    else:       
        df[dt_column]=pd.to_datetime(df[dt_column],format=dt_format)
        
    
    df.set_index(dt_column,inplace=True)
    return df

In [None]:
#Executing the above method
df_wrk = to_timesr(df_init,"Date",dt_format=None)
df_wrk.head()

In [None]:
#Method Visualizing the closing value of the stocks for Apple
def plot_stk(df,column,title=None):
    df[column].plot(kind="line",color='b',figsize=(12,8))
    plt.title(title)
    plt.show()
    
plot_stk(df_wrk,'Close',title="APPLE Stock Closing Value")

In [None]:
def data_scaling(data):
    ''' This method takes series or dataframe as input and scales the data using MinMaxScaler'''
    # Initializing the scalar instant
    scaler = MinMaxScaler()
    scaler.fit(data)
    return scaler

In [None]:
def train_data_prep(data, time_step,scaler = None,val_split=None):
    '''
    This method aims to scale the data prepare the train data with the help of another method - data_prep_tm_sr(data, time_step); 
    
    Input:
    data : dataframe or series-like
    time_step: no. of obeservations to be fed or the view window
    Scaler : instance of MinMaxScaler() which will be used for scaling data - Calculated from the module - data_scaling(data):
    val_split : no.of observation to be kept aside for testing purpose - it can be provided as fraction or as integer
    
    Output:
    x_train_data, y_train_data : training dataset pair
    val_data_nos : no.of observation in testing data (it is calculated value when val_split is given as fraction)
    
    '''    
    #Generation of the index no. upto which training set will be considered
    if val_split:
        if isinstance(val_split,int): #when val split is integer
            val_data_nos=val_split
            train_idx_lim=data.shape[0]-val_split
            #Preparation of training:
            if scaler:
                data_temp=data[:train_idx_lim]
                data_scaled= scaler.transform(data_temp) # only training data is scaled.
            
        
        else:
            train_idx_lim=int(data.shape[0]*(1-val_split)) #when val_split is fraction
            val_data_nos=data.shape[0]-train_idx_lim
        
            #Preparation of training:
            if scaler:
                data_temp=data[0:train_idx_lim]
                data_scaled= scaler.transform(data_temp) # only training data is scaled.
    else:
        data_scaled=scaler.transform(data) #whole series in scaled
        val_data_nos =0
        
    x_train_data,y_train_data=data_prep_tm_sr(data_scaled, time_step)
    
        
    return x_train_data,y_train_data,val_data_nos

In [None]:
def data_prep_tm_sr(data, time_step):
    '''
    This method aims to prepare the data, as required to be fed to a LSTM.
    Ex. 
    X=array([[10.68],[10.71],[10.63],[10.9 ],[10.86],[11.55],[11.99],[12.04],[12.23],[12.1 ],[11.78],[11.29],[10.87],[11.1 ],[10.86],[10.6 ],[10.33],[10.29],[10.71],[10.79]])
    y=[10.77]
    
    in X we are taking 20 consecutive values, and the output we are feeding for supervised learning in 10.77
    For defining the layers of LSTM, 2 inputs are important, Time steps and no.of features, rest no.of batches or no. of observation are basically inferred.
    Here for our example:
    Time Step = 20
    No.of Feature = 1 (Becuase only one feature - "Close is considered")
    The problem framing in this case will be "many to one"
    
    So this method prepars the data like (many-to-one):
    X1=values in index (0-19), y = value in 20th index
    X2 = Values in index (1-20), y = value in 21st index
    
    Input:
    data : dataframe or series-like
    time_step: no. of obeservations to be fed or the view window
    val_data_nos : no.of observation in testing data (it is calculated value when val_split is given as fraction)
        
    Output:
    x_data,y_data : training dataset pair
    
    '''
    x_data,y_data=[],[]
    
    for i in range(time_step,len(data)):
            x_data.append(data[i-time_step:i,0])
            y_data.append(data[i,0])
            
    x_data,y_data=np.array(x_data),np.array(y_data)
    x_data=np.reshape(x_data,(x_data.shape[0],x_data.shape[1],1)) # Reshaping the array as (no.of observations, time_steps, no.of features)
    
    return x_data,y_data

In [None]:
def val_data_prep(data,time_step,val_data_nos,scaler=None):
    '''
    This method aims to scale and prepare the validation data.
    Input:
    data : dataframe or series like
    time_step: no. of obeservations to be fed or the view window
    Scaler : instance of MinMaxScaler() which will be used for scaling data.
    
    '''
    data = data[data.shape[0]-val_data_nos-time_step:]
    
    if scaler:
        data_scaled = scaler.transform(data)    
        x_val_data,_=data_prep_tm_sr(data_scaled, time_step)
    else:
        x_val_data,_=data_prep_tm_sr(data, time_step) #Validation data is prepared in the same way as training data, but the target data is not required in this casesince it will be predicted.
    
    return x_val_data

In [None]:
def model_lstm(input_shape,loss,optimizer):
    '''
    This method prepares the LSTM model
    Input:
    input_shape: tuple containing time_step and no.of feature, Ex. (20,1)
    loss: loss function to be used
    optimizer: optimiser to be used
    
    Output:
    
    lstm model
    '''
    lstm_model=Sequential()
    lstm_model.add(layers.LSTM(units=50,return_sequences=True,input_shape=input_shape))
    lstm_model.add(layers.LSTM(units=50))
    lstm_model.add(layers.Dense(1))
    
    lstm_model.compile(loss=loss,optimizer=optimizer)
    
    print(lstm_model.summary())
    
    return lstm_model

In [None]:
def rmse_loss_pred(valid_data_df):
    '''
    This module calculates the rmse loss
    Input:
    valid_data_df: dataframe like containing the actual values under column "Close" and predicted values under "Predicted" column.
    
    Output:
    
    rmse value
    
    '''
    
    valid_data_df["sqd_err"] =(valid_data_df["Close"]-valid_data_df["Predicted"])**2
    rmse = np.sqrt(valid_data_df["sqd_err"].mean())
    return rmse

In [None]:
def plot_predicted_curve(data,predicted_closing_price,columns):
    
    '''
    This module is to plot the full stock variation against time along with predicted values.
    Input:
    data: dataframe like or series - orginal 
    predicted_closing_price : predicted values received from the model
    columns: string, basically the name of the column in the dataframe to be provided which has the corresponding actual or true value.
    
    Output:
    
    plot
        
    '''
    val_data_len=len(predicted_closing_price)
    
    valid_data_df=data[data.shape[0]-val_data_len:]
    valid_data_df["Predicted"]=predicted_closing_price
    
    #loss Calculation
    rmse_loss = rmse_loss_pred(valid_data_df) # rmse loss calculation.
    plt.figure(figsize=(12,8))
    plt.plot(data[columns])
    plt.plot(valid_data_df["Predicted"])
    plt.title("Actual vs Prediction Curve")
    plt.show()
    
    print ("RMSE Loss : ", rmse_loss)

In [None]:
def execution(data,time_step,loss,optimizer,epochs,batch_size,verbose,columns,val_split):
    '''
    This module executes the compiles and execute all the modules.
    '''
    
    scaler = data_scaling(data)
    
    print("Preparing training data.....\n ")
    x_train_data,y_train_data,val_data_nos = train_data_prep(data, time_step,scaler,val_split)
    
    print("Preparing validation data.....\n ")
    x_val_data = val_data_prep(data,time_step, val_data_nos, scaler)
    
    input_shape = (x_train_data.shape[1],x_train_data.shape[2])
    print("Input Shape : ",input_shape,"\n")
        
    print("Creating the model.....\n ")
    model = model_lstm(input_shape,loss,optimizer)
    
    callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3) # defining call back, to stop training of there is no significance difference in loss
    
    print("\n Fitting the model.....\n ")
    history = model.fit(x_train_data,y_train_data,epochs=epochs,batch_size=batch_size,verbose=verbose)    
           
    print("Predicting the unseen data.....\n ")
    predicted_closing_price=model.predict(x_val_data)
    
    predicted_closing_price=scaler.inverse_transform(predicted_closing_price)
    
    print("Printing Actual vs Prediction Curve.....\n ")
    plot_predicted_curve(data,predicted_closing_price,columns)

In [None]:
time_step = 60
val_split = 251
loss = "mean_squared_error"
optimizer = "adam"
epochs = 3
batch_size = 1
verbose = 2
columns = "Close"

In [None]:
# Condidering only the closing stock price for the current analysis
df_wrk_1=df_wrk[["Close"]]

In [None]:
execution(df_wrk_1,time_step,loss,optimizer,epochs,batch_size,verbose,columns,val_split)