In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn import metrics
from math import sqrt
from sklearn.neighbors import DistanceMetric
from sklearn.model_selection import GridSearchCV
import prettytable
from prettytable import PrettyTable

import statsmodels.api as sm

import warnings; warnings.simplefilter('ignore')

In [3]:
path = os.getcwd()
path

'C:\\Users\\venu\\Desktop\\Stock Market Analysis'

# Pre-process the data

In [4]:
def pre_process_data(data,null_threshold):
    """
    Drops Date and Unix Date columns from the data.
    Drops the columns which has null values more than specified null_threshold.
    Replaces infinite values with NAN.
    Drops the rows which has null values.

    Parameters
    ----------
    data : dataframe

    null_threshold : numeric
        numeric value describing the amount of null values that can be present.

    Returns
    -------
    data : dataframe
        an updated dataframe after performing all the opertaions.
    """
    
    data.drop(columns=['Unix Date','Date'],axis=1,inplace=True)
    total = data.shape[0]
    for col in data.columns:
        if null_threshold * total / 100 < data[col].isnull().sum():
            data.drop(columns=[col],axis=1,inplace=True)
    data.replace([np.inf, -np.inf], np.nan, inplace=True)
    data = data.apply(pd.to_numeric,errors='coerce')
    data.dropna(axis=0,inplace=True)
    return data


# Removing columns based on the dependent column

In [5]:
def dependent_column(data,column):
    """
    Removes all the Next Day columns.
    Removes all the non Growth Rate Columns (GR)
    add the predictor column to list of columns.

    Parameters
    ----------
    data : dataframe

    column : string
        name of the predictor column 

    Returns
    -------
    data : dataframe
        an updated dataframe after performing all the opertaions.
    column : string
        name of the predictor column
    """
    cols = [col for col in data.columns if "next" not in col.lower() and col.lower().endswith("gr")]
    cols.append(column)
    data = data[cols]
    return (data,column)

# Feature Selection Methods

In [6]:
def forward_selection(data, target, significance_level=0.05):
    initial_features = data.columns.tolist()
    best_features = []
    while (len(initial_features)>0):
        remaining_features = list(set(initial_features)-set(best_features))
        new_pval = pd.Series(index=remaining_features)
        for new_column in remaining_features:
            model = sm.OLS(target, sm.add_constant(data[best_features+[new_column]]).astype(float)).fit()
            new_pval[new_column] = model.pvalues[new_column]
        min_p_value = new_pval.min()
        if(min_p_value<significance_level):
            best_features.append(new_pval.idxmin())
        else:
            break
    return best_features

In [7]:
def backward_elimination(data, target,significance_level = 0.05):
    features = data.columns.tolist()
    while(len(features)>0):
        features_with_constant = sm.add_constant(data[features]).astype(float)
        p_values = sm.OLS(target, features_with_constant).fit().pvalues[1:]
        max_p_value = p_values.max()
        if(max_p_value >= significance_level):
            excluded_feature = p_values.idxmax()
            features.remove(excluded_feature)
        else:
            break 
    return features

# KNN Model

In [8]:
def best_parameters(X,Y):
    params = {'n_neighbors':np.arange(1,105,5), 'weights':['uniform', 'distance'], 'metric':['euclidean', 'manhattan']}
    knn = KNeighborsRegressor()    
    model = GridSearchCV(knn, params)
    model.fit(X,Y)
    k = model.best_params_['n_neighbors']
    params = {'n_neighbors':np.arange(k-5, k+5), 'weights':['uniform', 'distance'], 'metric':['euclidean', 'manhattan']}
    knn = KNeighborsRegressor()
    model = GridSearchCV(knn, params)
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=0)
    model.fit(X_train,y_train)
    return model.best_params_

In [9]:
def k_nearest_neighbours(X,Y, method, value, name):
    params = best_parameters(X,Y)
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=0)
    knn = KNeighborsRegressor(**params)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    
    rmse = sqrt(metrics.mean_squared_error(y_test, y_pred))
    mae = metrics.mean_absolute_error(y_test, y_pred)
    mse = metrics.mean_squared_error(y_test, y_pred)
    r2 = metrics.r2_score(y_test, y_pred)
    
    pred_actual = pd.DataFrame(list(zip(y_pred, y_test)), 
                   columns =['Predicted Values', 'Actual Values'])
    pred_actual.to_csv("C:\\Users\\venu\\Desktop\\Stock Market Analysis\\Data\\Models_Results\\" + name[2:8] + "_KNN_Regression_" + "FI_" + method + "_sd_all_" + str(value) + ".csv" , index = False)
    
    c = 0
    for a,b in zip(y_test, y_pred):
#         if (float(a) * float(b) >= 0):
#             c += 1
        if (a > 0 and b > 0) or (a < 0 and b < 0):
#             print(a, b)
            c += 1
    
    print(c, len(y_test), len(y_train) + len(y_test))
    direction = c / len(y_test)
    
    myres =  {"RMSE":rmse,"MAE":mae,"MSE":mse,"rsquared_adj":r2}
    myres.update(params)
    myres.update({"Percentage":direction})
    
    return myres

# Finding results from each set of important features

In [10]:
def fit_KNN(df, column, method, value, name, results):
    print("KNN Model fitted using columns obtained from feature importance using " + method + " : ")
    X = df[df.columns[:-1]]
    Y = df[column].values
    
    model_result = k_nearest_neighbours(X,Y, method, value, name)
    
    print("Percentage of correct direction : ", model_result["Percentage"])
    
    results["KNN_Regression_FI_" + method + "_" + str(value)] = model_result["Percentage"]
    
    create_pretty_table(name , "KNN_Regression", method + " " + value, model_result)

In [11]:
def get_results_from_FI_ForwardSelection(df1, name, column, results):
    print("Features Importance using Forward Selection Method")
    print("*****************************************************************************************")
    method = "ForwardSelection"
    X = df1[df1.columns[:-1]]
    Y = df1[column].values
    forward_features = forward_selection(X,Y)
    print("Features obtained from Forward Selection method : ") 
    print("--------------------------------------")
    print(forward_features)
    if (len(forward_features) != 0):
        forward_features.append(column)
        df_fs = df1[forward_features]
        fit_KNN(df_fs, column, method, '', name, results)
    print("*****************************************************************************************")

In [12]:
def get_results_from_FI_BackwardElimination(df1, name, column, results):
    print("Features Importance using Backward Elimination Method")
    print("*****************************************************************************************")
    method = "BackwardElimination"
    X = df1[df1.columns[:-1]]
    Y = df1[column].values
    backward_features = backward_elimination(X,Y)
    print("Features obtained from Backward Elimination method : ") 
    print("--------------------------------------")
    print(backward_features)
    if (len(backward_features) != 0):
        backward_features.append(column)
        df_be = df1[backward_features]
        fit_KNN(df_be, column, method, '', name, results)
    print("*****************************************************************************************")

In [13]:
def get_results_from_FI_AllFeatures(df1, name, column, results):
    print("All Features are considered : ")
    print("*****************************************************************************************")
    method = "AllFeaturesConsideration"
    X = df1[df1.columns[:-1]]
    Y = df1[column].values
    all_features = list(X.columns)
    print("All Features are --->>") 
    print("--------------------------------------")
    print(all_features)
    if (len(all_features) != 0):
        all_features.append(column)
        df_all = df1[all_features]
        fit_KNN(df_all, column, method, '', name, results)
    print("*****************************************************************************************")

In [14]:
def get_results_from_each_set(data, name, final_df):
    df = pre_process_data(data, 60)
    column = "Next Day Close Price GR"
    (df1, column) = dependent_column(df, column)
    results = {}
    get_results_from_FI_ForwardSelection(df1, name, column, results)
    get_results_from_FI_BackwardElimination(df1, name, column, results)
    get_results_from_FI_AllFeatures(df1, name, column, results)
    sorted_results = sorted(results.items(), key=lambda item: item[1])
    max_row = {'Company' : name[2 : 8] + "-" + companies[name[2 : 8]], 'Model' : 'KNN-Regression', 'Method' : sorted_results[-1][0], 'Percentage' : sorted_results[-1][1]}
    final_df = final_df.append(max_row, ignore_index = True)
    print("Maximum correct direction values are obtained for {} with a percentage of {}.".format(sorted_results[-1][0], sorted_results[-1][1]))
    return final_df

# Process of getting results

In [15]:
def create_pretty_table(name, model, method, result):
    values = [name[2 : 8 ] + "-" + companies[name[2 : 8]], method] + [round(v, 6) if (not isinstance(v, str)) else v for k,v in result.items()]
    tables[model].add_row(values)
    tables[model].title = model

In [16]:
columns =['Company','Method', 'RMSE','MAE','MSE','rsquared_adj','n_neighbors','weights','metric','Percentage']

In [17]:
# companies = {"500112" : "SBIN" ,
# "500325" : "RELIANCE INDUSTRIES LTD",
# "532540" : "TATA CONSULTANCY SERVICES LTD" ,
# "500209" : "INFOSYS LTD", 
# "532174" : "ICICI BANK LTD", 
# "507685" : "WIPRO LTD", 
# "530965" : "INDIAN OIL CORPORATION LTD", 
# "500182" : "HERO MOTOCORP LTD", 
# "532210" : "CITY UNION BANK LTD", 
# "500180" : "HDFC Bank Ltd",
# "500680" : "PFIZER LTD", 
# "506395" : "COROMANDEL iNTERNATIONAL LTD",
# "500770" : "TATA CHEMICALS LTD", 
# "500085" : "CHAMBAL FERTILISERS & CHEMICALS LTD", 
# "501425" : "BOMBAY BURMAH TRADING CORP.LTD", 
# "532899" : "KAVERI SEED COMPANY LTD", 
# "537291" : "NATH BIO-GENES (INDIA) LTD", 
# "500790" : "NESTLE INDIA LTD", 
# "500825" : "BRITANNIA INDUSTRIES LTD", 
# "533155" : "JUBILANT FOODWORKS LTD", 
# "533287" : "ZEE LEARN LTD", 
# "533260" : "CAREER POINT LTD", 
# "539921" : "SHANTI EDUCATIONAL INITIATIVES LTD", 
# "542602" : "EMBASSY OFFICE PARKS REIT", 
# "543217" : "MINDSPACE BUSINESS PARKS REIT", 
# "543261" : "BROOKFIELD INDIA REAL ESTATE TRUST REIT", 
# "532538" : "ULTRATECH CEMENT LTD", 
# "500387" : "SHREE CEMENT LTD", 
# "500425" : "AMBUJA CEMENTS LTD", 
# "532689" : "PVR LTD", 
# "532706" : "INOX LEISURE LTD", 
# "532163" : "SAREGAMA INDIA LTD", 
# "524715" : "SUN PHARMACEUTICAL INDUSTRIES LTD", 
# "532488" : "DIVI'S LABORATORIES LTD",
# "500124" : "DR.REDDY'S LABORATORIES LTD"}

In [18]:
df_equity = pd.read_csv("C:\\Users\\venu\\Desktop\\Stock Market Analysis\\Data\\Equity.csv")
security_numbers = df_equity["Security Code"].tolist()
security_names = df_equity["Security Name"].tolist()
companies = {str(k) : v for (k, v) in list(zip(security_numbers, security_names))}
companies["542602"] = "Embassy Office Parks REIT"
# companies["542602"]

In [19]:
models = ["KNN_Regression"]
tables = {model:PrettyTable() for model in models}
for name,table in tables.items():
    table.field_names = columns

In [20]:
final_df = pd.read_csv("C:\\Users\\venu\\Desktop\\Stock Market Analysis\\Data\\Models_Results\\All_Companies_Final_Results.csv")
# final_df.drop('Unnamed: 0', inplace = True, axis = 'columns')
final_df

Unnamed: 0,Company,Model,Method,Percentage
0,500002-ABB India Limited,Linear Regression,LinearFIFValue1,0.541713
1,500002-ABB India Limited,Ridge Regression,RidgeFIFValue1,0.540601
2,500002-ABB India Limited,Lasso Regression,LassoFIFValue1,0.537264
3,500002-ABB India Limited,Elastic Net Regression,ElasticFIFValue1,0.536151
4,500003-AEGIS LOGISTICS LTD.,Lasso Regression,LassoFIFValue1000,0.539299
...,...,...,...,...
612,539921-Shanti Educational Initiatives Ltd,Lasso Regression,LassoFIFValue10,0.495726
613,542602-Embassy Office Parks REIT,Lasso Regression,LassoFIPValue0.1,0.569231
614,542602-Embassy Office Parks REIT,Elastic Net Regression,ElasticFIPValue0.1,0.569231
615,542602-Embassy Office Parks REIT,Linear Regression,LinearFICoefficients0.1,0.538462


In [21]:
%%time
for filename in os.listdir(os.path.join(path,"Data/Stock")):
    if filename.startswith("gr"):
        df_knn = pd.read_csv(os.path.join(path,"Data\Stock\\" + filename))
        name = os.path.join(path, "Data\Stock\\" + filename).split("\\")[-1]
        stock = name[2 : 8]
        fd_df = pd.DataFrame(columns = final_df.columns)
        print("For stock : ", stock)
        print("#################################################################################################################")
        f_df = get_results_from_each_set(df_knn, name, fd_df)
        final_df = final_df.append(f_df, ignore_index = True)
        print("#################################################################################################################")
#         break
        
final_df = final_df.sort_values(by = ['Company', 'Percentage'], ascending = [True, False])
final_df.to_csv('C:\\Users\\venu\\Desktop\\Stock Market Analysis\\Data\\Models_Results\\All_Companies_Final_Results.csv', index = False) 

For stock :  500002
#################################################################################################################
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['High Price GR', 'No. of Trades GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
465 899 2995
Percentage of correct direction :  0.5172413793103449
*****************************************************************************************
Features Importance using Backward Elimination Method
*****************************************************************************************
Features obtained from Backward Elimination method : 
--------------------------------------
['High Price GR', 'Low Price GR', 'No.of Shares GR', 'Total Turnover (Rs.) GR', 'Spread High-Low GR']
KNN Model f

103 205 683
Percentage of correct direction :  0.5024390243902439
*****************************************************************************************
Maximum correct direction values are obtained for KNN_Regression_FI_AllFeaturesConsideration_ with a percentage of 0.5024390243902439.
#################################################################################################################
For stock :  500010
#################################################################################################################
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['Beta GR', 'Low Price GR', 'Alpha GR', 'Open Price GR', 'Spread High-Low GR', 'Close Price GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
382 776 2586
Percentage of correct directio

452 885 2947
Percentage of correct direction :  0.5107344632768361
*****************************************************************************************
All Features are considered : 
*****************************************************************************************
All Features are --->>
--------------------------------------
['Open Price GR', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'No.of Shares GR', 'No. of Trades GR', 'Total Turnover (Rs.) GR', 'Deliverable Quantity GR', '% Deli. Qty to Traded Qty GR', 'Spread High-Low GR', 'Spread Close-Open GR', 'Alpha GR', 'Beta GR', 'Revenue GR', 'Dividend Value GR', 'Income GR', 'Expenditure GR', 'Net Profit GR', 'EPS GR']
KNN Model fitted using columns obtained from feature importance using AllFeaturesConsideration : 
469 885 2947
Percentage of correct direction :  0.5299435028248588
*****************************************************************************************
Maximum correct direction values are ob

Features obtained from Forward Selection method : 
--------------------------------------
['Close Price GR', 'Beta GR', 'Spread High-Low GR', 'Low Price GR', 'WAP GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
441 878 2925
Percentage of correct direction :  0.5022779043280182
*****************************************************************************************
Features Importance using Backward Elimination Method
*****************************************************************************************
Features obtained from Backward Elimination method : 
--------------------------------------
['Low Price GR', 'Close Price GR', 'WAP GR', 'Spread High-Low GR', 'Beta GR']
KNN Model fitted using columns obtained from feature importance using BackwardElimination : 
441 878 2925
Percentage of correct direction :  0.5022779043280182
*****************************************************************************************
All Features are con

342 656 2186
Percentage of correct direction :  0.5213414634146342
*****************************************************************************************
Maximum correct direction values are obtained for KNN_Regression_FI_BackwardElimination_ with a percentage of 0.5304878048780488.
#################################################################################################################
For stock :  500038
#################################################################################################################
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['Beta GR', 'High Price GR', 'EPS GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
438 872 2904
Percentage of correct direction :  0.5022935779816514
**************************************

367 803 2676
Percentage of correct direction :  0.45703611457036114
*****************************************************************************************
Maximum correct direction values are obtained for KNN_Regression_FI_AllFeaturesConsideration_ with a percentage of 0.45703611457036114.
#################################################################################################################
For stock :  500042
#################################################################################################################
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['High Price GR', 'Open Price GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
388 760 2531
Percentage of correct direction :  0.5105263157894737
***********************************

433 898 2991
Percentage of correct direction :  0.4821826280623608
*****************************************************************************************
All Features are considered : 
*****************************************************************************************
All Features are --->>
--------------------------------------
['Open Price GR', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'No.of Shares GR', 'No. of Trades GR', 'Total Turnover (Rs.) GR', 'Deliverable Quantity GR', '% Deli. Qty to Traded Qty GR', 'Spread High-Low GR', 'Spread Close-Open GR', 'Alpha GR', 'Beta GR', 'Revenue GR', 'Dividend Value GR', 'Income GR', 'Expenditure GR', 'Net Profit GR', 'EPS GR']
KNN Model fitted using columns obtained from feature importance using AllFeaturesConsideration : 
442 898 2991
Percentage of correct direction :  0.4922048997772829
*****************************************************************************************
Maximum correct direction values are ob

Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['Beta GR', 'Close Price GR', 'Expenditure GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
482 894 2980
Percentage of correct direction :  0.5391498881431768
*****************************************************************************************
Features Importance using Backward Elimination Method
*****************************************************************************************
Features obtained from Backward Elimination method : 
--------------------------------------
['Close Price GR', 'Beta GR', 'Expenditure GR']
KNN Model fitted using columns obtained from feature importance using BackwardElimination : 
482 894 2980
Percentage of correct direction :  0.5391498881431768
*****************************

417 821 2734
Percentage of correct direction :  0.5079171741778319
*****************************************************************************************
Maximum correct direction values are obtained for KNN_Regression_FI_BackwardElimination_ with a percentage of 0.5420219244823387.
#################################################################################################################
For stock :  500078
#################################################################################################################
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['EPS GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
423 870 2900
Percentage of correct direction :  0.4862068965517241
******************************************************************

399 818 2725
Percentage of correct direction :  0.4877750611246944
*****************************************************************************************
Maximum correct direction values are obtained for KNN_Regression_FI_BackwardElimination_ with a percentage of 0.5244498777506112.
#################################################################################################################
For stock :  500086
#################################################################################################################
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['Alpha GR', 'Beta GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
463 891 2969
Percentage of correct direction :  0.5196408529741863
*****************************************************

375 778 2591
Percentage of correct direction :  0.4820051413881748
*****************************************************************************************
Maximum correct direction values are obtained for KNN_Regression_FI_BackwardElimination_ with a percentage of 0.5051413881748072.
#################################################################################################################
For stock :  500093
#################################################################################################################
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['Beta GR', 'Alpha GR', 'High Price GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
426 871 2901
Percentage of correct direction :  0.4890929965556831
************************************

434 855 2848
Percentage of correct direction :  0.5076023391812865
*****************************************************************************************
Maximum correct direction values are obtained for KNN_Regression_FI_AllFeaturesConsideration_ with a percentage of 0.5076023391812865.
#################################################################################################################
For stock :  500102
#################################################################################################################
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['Close Price GR', 'Alpha GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
350 720 2400
Percentage of correct direction :  0.4861111111111111
*****************************************

351 762 2537
Percentage of correct direction :  0.46062992125984253
*****************************************************************************************
All Features are considered : 
*****************************************************************************************
All Features are --->>
--------------------------------------
['Open Price GR', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'No.of Shares GR', 'No. of Trades GR', 'Total Turnover (Rs.) GR', 'Deliverable Quantity GR', '% Deli. Qty to Traded Qty GR', 'Spread High-Low GR', 'Spread Close-Open GR', 'Alpha GR', 'Beta GR', 'Revenue GR', 'Dividend Value GR', 'Income GR', 'Expenditure GR', 'Net Profit GR', 'EPS GR']
KNN Model fitted using columns obtained from feature importance using AllFeaturesConsideration : 
348 762 2537
Percentage of correct direction :  0.4566929133858268
*****************************************************************************************
Maximum correct direction values are o

369 743 2476
Percentage of correct direction :  0.49663526244952894
*****************************************************************************************
Features Importance using Backward Elimination Method
*****************************************************************************************
Features obtained from Backward Elimination method : 
--------------------------------------
['Close Price GR', 'No. of Trades GR', '% Deli. Qty to Traded Qty GR', 'Beta GR']
KNN Model fitted using columns obtained from feature importance using BackwardElimination : 
369 743 2476
Percentage of correct direction :  0.49663526244952894
*****************************************************************************************
All Features are considered : 
*****************************************************************************************
All Features are --->>
--------------------------------------
['Open Price GR', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'No.of Sha

416 830 2764
Percentage of correct direction :  0.5012048192771085
*****************************************************************************************
Maximum correct direction values are obtained for KNN_Regression_FI_BackwardElimination_ with a percentage of 0.5108433734939759.
#################################################################################################################
For stock :  500116
#################################################################################################################
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['No. of Trades GR', 'Close Price GR', 'Beta GR', 'Deliverable Quantity GR', 'WAP GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
458 883 2942
Percentage of correct direction :  0.5186862

KNN Model fitted using columns obtained from feature importance using AllFeaturesConsideration : 
398 817 2721
Percentage of correct direction :  0.48714810281517745
*****************************************************************************************
Maximum correct direction values are obtained for KNN_Regression_FI_AllFeaturesConsideration_ with a percentage of 0.48714810281517745.
#################################################################################################################
For stock :  500123
#################################################################################################################
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['WAP GR', '% Deli. Qty to Traded Qty GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection

422 859 2861
Percentage of correct direction :  0.4912689173457509
*****************************************************************************************
All Features are considered : 
*****************************************************************************************
All Features are --->>
--------------------------------------
['Open Price GR', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'No.of Shares GR', 'No. of Trades GR', 'Total Turnover (Rs.) GR', 'Deliverable Quantity GR', '% Deli. Qty to Traded Qty GR', 'Spread High-Low GR', 'Spread Close-Open GR', 'Alpha GR', 'Beta GR', 'Revenue GR', 'Dividend Value GR', 'Income GR', 'Expenditure GR', 'Net Profit GR', 'EPS GR']
KNN Model fitted using columns obtained from feature importance using AllFeaturesConsideration : 
422 859 2861
Percentage of correct direction :  0.4912689173457509
*****************************************************************************************
Maximum correct direction values are ob

389 795 2650
Percentage of correct direction :  0.48930817610062893
*****************************************************************************************
Features Importance using Backward Elimination Method
*****************************************************************************************
Features obtained from Backward Elimination method : 
--------------------------------------
['Close Price GR', '% Deli. Qty to Traded Qty GR', 'Income GR']
KNN Model fitted using columns obtained from feature importance using BackwardElimination : 
389 795 2650
Percentage of correct direction :  0.48930817610062893
*****************************************************************************************
All Features are considered : 
*****************************************************************************************
All Features are --->>
--------------------------------------
['Open Price GR', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'No.of Shares GR', 'No. of T

Features obtained from Forward Selection method : 
--------------------------------------
[]
*****************************************************************************************
Features Importance using Backward Elimination Method
*****************************************************************************************
Features obtained from Backward Elimination method : 
--------------------------------------
[]
*****************************************************************************************
All Features are considered : 
*****************************************************************************************
All Features are --->>
--------------------------------------
['Open Price GR', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'No.of Shares GR', 'No. of Trades GR', 'Total Turnover (Rs.) GR', 'Deliverable Quantity GR', '% Deli. Qty to Traded Qty GR', 'Spread High-Low GR', 'Spread Close-Open GR', 'Alpha GR', 'Beta GR', 'Revenue GR', 'Dividend Value G

429 843 2809
Percentage of correct direction :  0.5088967971530249
*****************************************************************************************
Features Importance using Backward Elimination Method
*****************************************************************************************
Features obtained from Backward Elimination method : 
--------------------------------------
['Open Price GR', 'Alpha GR', 'Net Profit GR', 'EPS GR']
KNN Model fitted using columns obtained from feature importance using BackwardElimination : 
431 843 2809
Percentage of correct direction :  0.5112692763938316
*****************************************************************************************
All Features are considered : 
*****************************************************************************************
All Features are --->>
--------------------------------------
['Open Price GR', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'No.of Shares GR', 'No. of Trades GR'

For stock :  500171
#################################################################################################################
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['Close Price GR', 'WAP GR', 'Low Price GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
434 877 2922
Percentage of correct direction :  0.49486887115165334
*****************************************************************************************
Features Importance using Backward Elimination Method
*****************************************************************************************
Features obtained from Backward Elimination method : 
--------------------------------------
['Low Price GR', 'Close Price GR', 'WAP GR']
KNN Model fitted using columns obtained from feature importa

458 897 2987
Percentage of correct direction :  0.5105908584169454
*****************************************************************************************
Maximum correct direction values are obtained for KNN_Regression_FI_BackwardElimination_ with a percentage of 0.5484949832775919.
#################################################################################################################
For stock :  500182
#################################################################################################################
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['Net Profit GR', 'Alpha GR', 'No. of Trades GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
471 900 2997
Percentage of correct direction :  0.5233333333333333
***************************

436 861 2868
Percentage of correct direction :  0.5063879210220673
*****************************************************************************************
All Features are considered : 
*****************************************************************************************
All Features are --->>
--------------------------------------
['Open Price GR', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'No.of Shares GR', 'No. of Trades GR', 'Total Turnover (Rs.) GR', 'Deliverable Quantity GR', '% Deli. Qty to Traded Qty GR', 'Spread High-Low GR', 'Spread Close-Open GR', 'Alpha GR', 'Beta GR', 'Revenue GR', 'Dividend Value GR', 'Income GR', 'Expenditure GR', 'Net Profit GR', 'EPS GR']
KNN Model fitted using columns obtained from feature importance using AllFeaturesConsideration : 
408 861 2868
Percentage of correct direction :  0.4738675958188153
*****************************************************************************************
Maximum correct direction values are ob

390 799 2661
Percentage of correct direction :  0.4881101376720901
*****************************************************************************************
Features Importance using Backward Elimination Method
*****************************************************************************************
Features obtained from Backward Elimination method : 
--------------------------------------
['Close Price GR', '% Deli. Qty to Traded Qty GR', 'Alpha GR', 'Beta GR']
KNN Model fitted using columns obtained from feature importance using BackwardElimination : 
418 799 2661
Percentage of correct direction :  0.523153942428035
*****************************************************************************************
All Features are considered : 
*****************************************************************************************
All Features are --->>
--------------------------------------
['Open Price GR', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'No.of Shares GR', 'N

Features obtained from Forward Selection method : 
--------------------------------------
['Beta GR', 'Alpha GR', 'WAP GR', 'No. of Trades GR', 'Spread High-Low GR', 'Net Profit GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
459 899 2994
Percentage of correct direction :  0.5105672969966629
*****************************************************************************************
Features Importance using Backward Elimination Method
*****************************************************************************************
Features obtained from Backward Elimination method : 
--------------------------------------
['High Price GR', 'WAP GR', 'No.of Shares GR', 'Total Turnover (Rs.) GR', 'Alpha GR', 'Beta GR', 'Net Profit GR']
KNN Model fitted using columns obtained from feature importance using BackwardElimination : 
438 899 2994
Percentage of correct direction :  0.4872080088987764
***********************************************************

Features obtained from Forward Selection method : 
--------------------------------------
['Alpha GR', 'WAP GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
378 734 2446
Percentage of correct direction :  0.5149863760217984
*****************************************************************************************
Features Importance using Backward Elimination Method
*****************************************************************************************
Features obtained from Backward Elimination method : 
--------------------------------------
['WAP GR', 'Alpha GR']
KNN Model fitted using columns obtained from feature importance using BackwardElimination : 
378 734 2446
Percentage of correct direction :  0.5149863760217984
*****************************************************************************************
All Features are considered : 
*****************************************************************************************
All Featu

340 701 2336
Percentage of correct direction :  0.48502139800285304
*****************************************************************************************
Maximum correct direction values are obtained for KNN_Regression_FI_BackwardElimination_ with a percentage of 0.5449358059914408.
#################################################################################################################
For stock :  500227
#################################################################################################################
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['Beta GR', 'Alpha GR', 'Close Price GR', 'Open Price GR', 'Revenue GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
474 894 2978
Percentage of correct direction :  0.5302013422818792
***

398 774 2578
Percentage of correct direction :  0.5142118863049095
*****************************************************************************************
Maximum correct direction values are obtained for KNN_Regression_FI_AllFeaturesConsideration_ with a percentage of 0.5142118863049095.
#################################################################################################################
For stock :  500234
#################################################################################################################
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['Close Price GR', 'High Price GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
192 354 1177
Percentage of correct direction :  0.5423728813559322
************************************

284 568 1893
Percentage of correct direction :  0.5
*****************************************************************************************
All Features are considered : 
*****************************************************************************************
All Features are --->>
--------------------------------------
['Open Price GR', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'No.of Shares GR', 'No. of Trades GR', 'Total Turnover (Rs.) GR', 'Deliverable Quantity GR', '% Deli. Qty to Traded Qty GR', 'Spread High-Low GR', 'Spread Close-Open GR', 'Alpha GR', 'Beta GR', 'Revenue GR', 'Dividend Value GR', 'Income GR', 'Expenditure GR', 'Net Profit GR', 'EPS GR']
KNN Model fitted using columns obtained from feature importance using AllFeaturesConsideration : 
289 568 1893
Percentage of correct direction :  0.5088028169014085
*****************************************************************************************
Maximum correct direction values are obtained for KNN_

333 618 2057
Percentage of correct direction :  0.5388349514563107
*****************************************************************************************
Maximum correct direction values are obtained for KNN_Regression_FI_AllFeaturesConsideration_ with a percentage of 0.5388349514563107.
#################################################################################################################
For stock :  500247
#################################################################################################################
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['Beta GR', 'Close Price GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
442 899 2995
Percentage of correct direction :  0.4916573971078977
******************************************

431 907 3022
Percentage of correct direction :  0.47519294377067256
*****************************************************************************************
Maximum correct direction values are obtained for KNN_Regression_FI_AllFeaturesConsideration_ with a percentage of 0.47519294377067256.
#################################################################################################################
For stock :  500251
#################################################################################################################
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['Open Price GR', 'High Price GR', 'Low Price GR', 'Spread High-Low GR', 'Income GR', 'Alpha GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
273 551 1835
Percentage of correct dire

477 899 2994
Percentage of correct direction :  0.5305895439377085
*****************************************************************************************
All Features are considered : 
*****************************************************************************************
All Features are --->>
--------------------------------------
['Open Price GR', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'No.of Shares GR', 'No. of Trades GR', 'Total Turnover (Rs.) GR', 'Deliverable Quantity GR', '% Deli. Qty to Traded Qty GR', 'Spread High-Low GR', 'Spread Close-Open GR', 'Alpha GR', 'Beta GR', 'Revenue GR', 'Dividend Value GR', 'Income GR', 'Expenditure GR', 'Net Profit GR', 'EPS GR']
KNN Model fitted using columns obtained from feature importance using AllFeaturesConsideration : 
445 899 2994
Percentage of correct direction :  0.4949944382647386
*****************************************************************************************
Maximum correct direction values are ob

Features obtained from Forward Selection method : 
--------------------------------------
['Deliverable Quantity GR', 'Close Price GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
165 360 1197
Percentage of correct direction :  0.4583333333333333
*****************************************************************************************
Features Importance using Backward Elimination Method
*****************************************************************************************
Features obtained from Backward Elimination method : 
--------------------------------------
['Low Price GR', 'Close Price GR', 'No. of Trades GR', 'Deliverable Quantity GR', 'Net Profit GR', 'EPS GR']
KNN Model fitted using columns obtained from feature importance using BackwardElimination : 
177 360 1197
Percentage of correct direction :  0.49166666666666664
*****************************************************************************************
All Features are con

Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['Close Price GR', 'Beta GR', 'Low Price GR', '% Deli. Qty to Traded Qty GR', 'Income GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
428 845 2816
Percentage of correct direction :  0.506508875739645
*****************************************************************************************
Features Importance using Backward Elimination Method
*****************************************************************************************
Features obtained from Backward Elimination method : 
--------------------------------------
['Low Price GR', 'Close Price GR', '% Deli. Qty to Traded Qty GR', 'Beta GR', 'Income GR']
KNN Model fitted using columns obtained from feature importance using BackwardElimination : 
428 845 281

255 506 1686
Percentage of correct direction :  0.5039525691699605
*****************************************************************************************
Maximum correct direction values are obtained for KNN_Regression_FI_BackwardElimination_ with a percentage of 0.5118577075098815.
#################################################################################################################
For stock :  500680
#################################################################################################################
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['Low Price GR', 'No. of Trades GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
442 880 2932
Percentage of correct direction :  0.5022727272727273
****************************************

365 745 2483
Percentage of correct direction :  0.4899328859060403
*****************************************************************************************
All Features are considered : 
*****************************************************************************************
All Features are --->>
--------------------------------------
['Open Price GR', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'No.of Shares GR', 'No. of Trades GR', 'Total Turnover (Rs.) GR', 'Deliverable Quantity GR', '% Deli. Qty to Traded Qty GR', 'Spread High-Low GR', 'Spread Close-Open GR', 'Alpha GR', 'Beta GR', 'Revenue GR', 'Dividend Value GR', 'Income GR', 'Expenditure GR', 'Net Profit GR', 'EPS GR']
KNN Model fitted using columns obtained from feature importance using AllFeaturesConsideration : 
379 745 2483
Percentage of correct direction :  0.508724832214765
*****************************************************************************************
Maximum correct direction values are obt

Features obtained from Forward Selection method : 
--------------------------------------
['Beta GR', 'WAP GR', 'Alpha GR', 'Close Price GR', 'Open Price GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
422 866 2886
Percentage of correct direction :  0.48729792147806006
*****************************************************************************************
Features Importance using Backward Elimination Method
*****************************************************************************************
Features obtained from Backward Elimination method : 
--------------------------------------
['Open Price GR', 'Close Price GR', 'Alpha GR', 'Beta GR']
KNN Model fitted using columns obtained from feature importance using BackwardElimination : 
420 866 2886
Percentage of correct direction :  0.48498845265588914
*****************************************************************************************
All Features are considered : 
***************

460 898 2992
Percentage of correct direction :  0.512249443207127
*****************************************************************************************
Maximum correct direction values are obtained for KNN_Regression_FI_BackwardElimination_ with a percentage of 0.5167037861915368.
#################################################################################################################
For stock :  532210
#################################################################################################################
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['No. of Trades GR', 'Open Price GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
421 868 2893
Percentage of correct direction :  0.48502304147465436
***************************************

429 832 2771
Percentage of correct direction :  0.515625
*****************************************************************************************
Maximum correct direction values are obtained for KNN_Regression_FI_AllFeaturesConsideration_ with a percentage of 0.515625.
#################################################################################################################
For stock :  532689
#################################################################################################################
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['Beta GR', 'Revenue GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
344 692 2304
Percentage of correct direction :  0.49710982658959535
*****************************************************************

382 712 2371
Percentage of correct direction :  0.5365168539325843
*****************************************************************************************
All Features are considered : 
*****************************************************************************************
All Features are --->>
--------------------------------------
['Open Price GR', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'No.of Shares GR', 'No. of Trades GR', 'Total Turnover (Rs.) GR', 'Deliverable Quantity GR', '% Deli. Qty to Traded Qty GR', 'Spread High-Low GR', 'Spread Close-Open GR', 'Alpha GR', 'Beta GR', 'Revenue GR', 'Dividend Value GR', 'Income GR', 'Expenditure GR', 'Net Profit GR', 'EPS GR']
KNN Model fitted using columns obtained from feature importance using AllFeaturesConsideration : 
343 712 2371
Percentage of correct direction :  0.48174157303370785
*****************************************************************************************
Maximum correct direction values are o

Features obtained from Forward Selection method : 
--------------------------------------
['Close Price GR', 'Deliverable Quantity GR', 'WAP GR', 'Low Price GR', 'Spread High-Low GR', 'No. of Trades GR']
KNN Model fitted using columns obtained from feature importance using ForwardSelection : 
74 117 388
Percentage of correct direction :  0.6324786324786325
*****************************************************************************************
Features Importance using Backward Elimination Method
*****************************************************************************************
Features obtained from Backward Elimination method : 
--------------------------------------
['Open Price GR', 'High Price GR', 'WAP GR', 'No.of Shares GR', 'No. of Trades GR', 'Total Turnover (Rs.) GR', 'Deliverable Quantity GR', '% Deli. Qty to Traded Qty GR', 'Spread High-Low GR', 'Spread Close-Open GR', 'Dividend Value GR']
KNN Model fitted using columns obtained from feature importance using Backwar

In [22]:
# %%time

# mydf = pd.DataFrame(columns=columns)
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         filepath = os.path.join(dirname, filename)
#         df = pd.read_csv(filepath)
#         df = pre_process_data(df,60)
#         column = "Next Day Close Price GR"
#         (df,column) = dependent_column(df,column)
#         X = df.drop(columns=[column])
#         Y = df[column]
#         result = k_nearest_neighbours(X,Y)
#         result.update({"Company":filename[2:8] + "-" + companies[filename[2:8]]})
#         mydf = mydf.append(result,ignore_index=True)
# mydf.to_csv(os.path.join(os.getcwd(),"best_knn"+".csv"),index=None)

In [23]:
for name,table in tables.items():
    print(table)

+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|                                                                                  KNN_Regression                                                                                 |
+-------------------------------------------------+---------------------------+-----------+----------+-------------+--------------+-------------+---------+----------+------------+
|                     Company                     |           Method          |    RMSE   |   MAE    |     MSE     | rsquared_adj | n_neighbors | weights |  metric  | Percentage |
+-------------------------------------------------+---------------------------+-----------+----------+-------------+--------------+-------------+---------+----------+------------+
|             500002-ABB India Limited            |     ForwardSelection      |  0.02226  | 0.014846

In [24]:
final_df

Unnamed: 0,Company,Model,Method,Percentage
0,500002-ABB India Limited,Linear Regression,LinearFIFValue1,0.541713
1,500002-ABB India Limited,Ridge Regression,RidgeFIFValue1,0.540601
2,500002-ABB India Limited,Lasso Regression,LassoFIFValue1,0.537264
3,500002-ABB India Limited,Elastic Net Regression,ElasticFIFValue1,0.536151
617,500002-ABB India Limited,KNN-Regression,KNN_Regression_FI_ForwardSelection_,0.517241
...,...,...,...,...
613,542602-Embassy Office Parks REIT,Lasso Regression,LassoFIPValue0.1,0.569231
614,542602-Embassy Office Parks REIT,Elastic Net Regression,ElasticFIPValue0.1,0.569231
615,542602-Embassy Office Parks REIT,Linear Regression,LinearFICoefficients0.1,0.538462
616,542602-Embassy Office Parks REIT,Ridge Regression,RidgeFICoefficients0.1,0.538462
