In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from tensorflow import keras
from keras.layers import Dense, Dropout
from tensorflow.keras import layers
from keras.layers import Activation
from keras.models import Sequential
from keras.wrappers.scikit_learn import KerasClassifier
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import precision_score, make_scorer

from sklearn import metrics
from sklearn.metrics import confusion_matrix
from math import sqrt
import traceback
from multiprocessing.pool import ThreadPool


In [None]:
import warnings; warnings.simplefilter('ignore')

In [None]:
def pre_process_data(data,null_threshold):
    """
    Drops Date and Unix Date columns from the data.
    Drops the columns which has null values more than specified null_threshold.
    Replaces infinite values with NAN.
    Drops the rows which has null values.

    Parameters
    ----------
    data : dataframe

    null_threshold : numeric
        numeric value describing the amount of null values that can be present.

    Returns
    -------
    data : dataframe
        an updated dataframe after performing all the opertaions.
    """
    
    data.drop(columns=['Unix Date','Date'],axis=1,inplace=True)
    total = data.shape[0]
    for col in data.columns:
        if null_threshold * total / 100 < data[col].isnull().sum():
            data.drop(columns=[col],axis=1,inplace=True)
    data.replace([np.inf, -np.inf], np.nan, inplace=True)
    data = data.apply(pd.to_numeric,errors='coerce')
    data.dropna(axis=0,inplace=True)
    return data

In [None]:
def dependent_column(data,column):
    """
    Removes all the Next Day columns.
    Removes all the non Growth Rate Columns (GR)
    add the predictor column to list of columns.

    Parameters
    ----------
    data : dataframe

    column : string
        name of the predictor column 

    Returns
    -------
    data : dataframe
        an updated dataframe after performing all the opertaions.
    column : string
        name of the predictor column
    """
    cols = [col for col in data.columns if "next" not in col.lower() and col.lower().endswith("gr")]
    cols.append(column)
    data = data[cols]
    return (data,column)

In [None]:
def error_metrics(y_true, y_pred):
    rmse = sqrt(metrics.mean_squared_error(y_true, y_pred))
    mae = metrics.mean_absolute_error(y_true, y_pred)
    mse = metrics.mean_squared_error(y_true, y_pred)
    return {"root_mean_squared_error":rmse,"mean_absolute_error":mae,"mean_squared_error":mse}

In [None]:
def create_confusion_matrix(y_pred,y_true):
    
    cm = confusion_matrix(y_true,y_pred)
    accuracy = metrics.accuracy_score(y_true,y_pred)
    precision = metrics.precision_score(y_true,y_pred)
    recall = metrics.recall_score(y_true,y_pred)
    f1_score = metrics.f1_score(y_true,y_pred)
    return {"accuracy":accuracy,"precision":precision,"recall":recall,"f1_score":f1_score,"confusion matrix":cm}

In [None]:
def create_bpnn_classification(df,column,epochs,batch_size,threshold):
    df["Target"] = df[column].apply(lambda x : 1 if x >= threshold else 0)
    X = df.drop(columns=["Target",column])
    Y = df["Target"]
    x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.3,random_state = 0)
    input_dim = x_train.shape[1]

    # return (x_train, x_test, y_train, y_test)
    x_train = np.array(x_train)
    x_train = np.reshape(x_train,(x_train.shape[0],x_train.shape[1]))
    x_test = np.array(x_test)
    x_test = np.reshape(x_test,(x_test.shape[0],x_test.shape[1]))
    y_test = np.array(y_test)
    y_test = np.reshape(y_test,(y_test.shape[0],1))
    
#     model = KerasClassifier(build_fn = create_model_b, batch_size=batch_size, epochs=epochs,input_dim = input_dim) 
    model = create_model_b(input_dim)
    history = model.fit(x_train,y_train,epochs = epochs,batch_size = batch_size, validation_data = (x_test,y_test),shuffle = False,verbose=0)

    y_pred = model.predict(x_test)
    y_pred = np.array(y_pred)
    y_pred = np.reshape(y_pred,(y_pred.shape[0],1)).round()
    
    result = {}
    error = error_metrics(y_test, y_pred)
    confusion = create_confusion_matrix(y_test,y_pred)
    result.update(error)
    result.update(confusion)
    return result

In [None]:
def create_model_b(input_dim,layers=3,units = 32):
    model = Sequential()
    model.add(Dense(units=units,input_dim=(input_dim),activation = 'relu'))
    model.add(Dense(units=units,activation = 'relu'))
    model.add(Dense(units = 1,activation = 'sigmoid'))
    model.compile(loss="binary_crossentropy", optimizer="Adam", metrics=[tf.keras.metrics.Precision()])
    return model

In [None]:
def best_params_bpnn(X,Y):
    custom_scorer = make_scorer(precision_score, greater_is_better=True,pos_label = 1)
    
    input_dim = X.shape[1]
    X = np.array(X)
    X = np.reshape(X,(X.shape[0],X.shape[1]))
    Y = np.array(Y)
    Y = np.reshape(Y,(Y.shape[0],1))
    
#     model = create_model_b(input_dim)
    model = KerasClassifier(build_fn = create_model_b, batch_size=32, epochs=5,input_dim = input_dim) 
    
    batch_size = [25,32,48,64,100]
    epochs = [25,50,75,100]
    param_grid = dict(epochs=epochs,batch_size=batch_size)   
    
    grid = GridSearchCV(estimator=model, param_grid=param_grid,n_jobs=-1,scoring=custom_scorer,verbose=0)
    grid_result = grid.fit(X,Y)
    batch_size, epochs = grid_result.best_params_['batch_size'],grid_result.best_params_['epochs']
    
    return batch_size, epochs

In [None]:
def bpnn_classification(df, column = "Next Day Close Price GR"):
    threshold = [0.01,0.02,0.03,0.04,0.05,0.1]
    solution = list()
    for t in threshold:
        df["Target"] = df[column].apply(lambda x : 1 if x >= 0.01 else 0)
        X = df.drop(columns=["Target",column])
        Y = df["Target"]
        X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=0)
        batch_size, epochs = best_params_bpnn(X_train,y_train)
        result = create_bpnn_classification(df,column = "Next Day Close Price GR",epochs = epochs,batch_size = batch_size,threshold = t)
        result.update({"batch_size":batch_size,"epochs":epochs,"threshold":t})
        solution.append(result)
    return solution

In [None]:
total = pd.read_csv("/kaggle/input/stockdata/all.csv")
inputpath  = "/kaggle/input/stockdata/Data/Stock"

In [None]:
%%time
for _,row in total.iterrows():
    security_id = row['security id']
    try:
        df = pd.read_csv(os.path.join(inputpath,"gr"+str(security_id)+".csv"))
        df = pre_process_data(df,60)
        df,column = dependent_column(df, column = "Next Day Close Price GR")
        bpnn_res = bpnn_classification(df)
        bpnn_df = pd.DataFrame(bpnn_res)
        bpnn_df.to_csv('bpnn_'+str(security_id)+".csv",index=None)
    except Exception as e:
        traceback.print_exc() 