In [90]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/stocks/gr500182.csv
/kaggle/input/stocks/gr500209.csv
/kaggle/input/stocks/gr500680.csv
/kaggle/input/stocks/gr530965.csv
/kaggle/input/stocks/gr532174.csv
/kaggle/input/stocks/gr532210.csv
/kaggle/input/stocks/gr532540.csv
/kaggle/input/stocks/gr500325.csv
/kaggle/input/stocks/gr500112.csv
/kaggle/input/stocks/gr500180.csv
/kaggle/input/stocks/gr507685.csv


In [91]:
# !pip install neupy
import warnings; warnings.simplefilter('ignore')

In [92]:
import numpy as np
import pandas as pd
from sklearn import datasets, preprocessing
from sklearn.model_selection import train_test_split
from neupy import algorithms
from sklearn import metrics
from math import sqrt
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from multiprocessing.pool import ThreadPool


In [93]:
def pre_process_data(data,null_threshold):
    """
    Drops Date and Unix Date columns from the data.
    Drops the columns which has null values more than specified null_threshold.
    Replaces infinite values with NAN.
    Drops the rows which has null values.

    Parameters
    ----------
    data : dataframe

    null_threshold : numeric
        numeric value describing the amount of null values that can be present.

    Returns
    -------
    data : dataframe
        an updated dataframe after performing all the opertaions.
    """
    
    data.drop(columns=['Unix Date','Date'],axis=1,inplace=True)
    total = data.shape[0]
    for col in data.columns:
        if null_threshold * total / 100 < data[col].isnull().sum():
            data.drop(columns=[col],axis=1,inplace=True)
    data.replace([np.inf, -np.inf], np.nan, inplace=True)
    data = data.apply(pd.to_numeric,errors='coerce')
    data.dropna(axis=0,inplace=True)
    return data

In [94]:
def dependent_column(data,column):
    """
    Removes all the Next Day columns.
    Removes all the non Growth Rate Columns (GR)
    add the predictor column to list of columns.

    Parameters
    ----------
    data : dataframe

    column : string
        name of the predictor column 

    Returns
    -------
    data : dataframe
        an updated dataframe after performing all the opertaions.
    column : string
        name of the predictor column
    """
    cols = [col for col in data.columns if "next" not in col.lower() and col.lower().endswith("gr")]
    cols.append(column)
    data = data[cols]
    return (data,column)

In [95]:
def bestParameters(X,Y,std = 0.1):
    model = RandomizedSearchCV(algorithms.GRNN(std=std, verbose=False), param_distributions={'std': np.arange(1e-2, 1, 1e-3)}, scoring='neg_mean_squared_error',)
    X_train, X_test, y_train, y_test = train_test_split(preprocessing.minmax_scale(X), preprocessing.minmax_scale(Y), test_size=0.3, random_state=0)
    model.fit(X_train,y_train)
    return model.best_params_

In [96]:
def generalized_neural_network(X,Y,std=0.1):
    best = bestParameters(X,Y,std)
    x_train, x_test, y_train, y_test = train_test_split(preprocessing.minmax_scale(X), preprocessing.minmax_scale(Y), test_size=0.3, random_state=0)
    model = algorithms.GRNN(std=best['std'])
    model.train(x_train,y_train)
    y_pred = model.predict(x_test)
    rmse = sqrt(metrics.mean_squared_error(y_test, y_pred))
    mae = metrics.mean_absolute_error(y_test, y_pred)
    mse = metrics.mean_squared_error(y_test, y_pred)
    r2 = metrics.r2_score(y_test, y_pred)
    c = 0
    for a,b in zip(y_test, y_pred):
        if a*b >= 0:
            c += 1
    direction = c/len(y_test)
    myres =  {"root_mean_squared_error":rmse,"mean_absolute_error":mae,"mean_squared_error":mse,"rsquared_adj":r2,"std":std, "best-std":best['std'],"direction":direction}
    print("done")
    return myres

In [97]:
%%time
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        filepath = os.path.join(dirname, filename)
        df = pd.read_csv(filepath)
        df = pre_process_data(df,60)
        column = "Next Day Close Price GR"
        (df,column) = dependent_column(df,column)
        X = df.drop(columns=[column])
        Y = df[column]
        data = list()
        stds = np.linspace(0,1,11)
        arguments = list()
        for s in stds:
            data = [X,Y,s]
            arguments.append(data)
        threads = ThreadPool(4)
        result = threads.starmap(generalized_neural_network,arguments)
        resultdf = pd.DataFrame(result)
        resultdf.to_csv(os.path.join(os.getcwd(),str(filename[2:8])+"_gnn"+".csv"),index=None)

done
done
done
done
done
done
done
done
done
done
done
done
done
done
CPU times: user 1min 48s, sys: 18.7 s, total: 2min 7s
Wall time: 1min 17s


In [98]:
result

[{'root_mean_squared_error': 0.0708600019460271,
  'mean_absolute_error': 0.0496251409847374,
  'mean_squared_error': 0.005021139875790964,
  'rsquared_adj': 0.00031531855204247616,
  'std': 0.0,
  'best-std': 0.8119999999999993,
  'direction': 1.0},
 {'root_mean_squared_error': 0.07096396672763561,
  'mean_absolute_error': 0.04973828549210608,
  'mean_squared_error': 0.0050358845737209735,
  'rsquared_adj': -0.0026202795427796577,
  'std': 0.1,
  'best-std': 0.4069999999999997,
  'direction': 1.0},
 {'root_mean_squared_error': 0.07093484664682585,
  'mean_absolute_error': 0.049699823011352215,
  'mean_squared_error': 0.005031752468808701,
  'rsquared_adj': -0.0017975974257464777,
  'std': 0.2,
  'best-std': 0.4479999999999996,
  'direction': 1.0},
 {'root_mean_squared_error': 0.0708588404740358,
  'mean_absolute_error': 0.0496263747423878,
  'mean_squared_error': 0.005020975273324855,
  'rsquared_adj': 0.0003480901074995879,
  'std': 0.30000000000000004,
  'best-std': 0.82199999999999

done
done
done
done
done
done
done
