In [1]:
# %pip install mlxtend

In [2]:
import pandas as pd
import numpy as np 
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from math import sqrt
import statsmodels.api as sm
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import LassoCV
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from prettytable import PrettyTable
import time 
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cmx
import matplotlib.colors as colors
import matplotlib.patches as patches
import warnings; warnings.simplefilter('ignore')
import sys
from sklearn.feature_selection import f_classif
from sklearn.feature_selection import f_regression
from prettytable import PrettyTable

In [3]:
path = os.getcwd()
path

'C:\\Users\\venu\\Desktop\\Stock Market Analysis'

# PreProcessing Data

In [4]:
def pre_process_data(data,null_threshold):
    data.drop(columns=['Unix Date','Date'],axis=1,inplace=True)
    total = data.shape[0]
    for col in data.columns:
        if ((null_threshold * total / 100) < data[col].isnull().sum()):
            data.drop(columns=[col],axis=1,inplace=True)
    data.replace([np.inf, -np.inf], np.nan, inplace=True)
    data.dropna(axis=0,inplace=True)
    return data

# Removing columns based on dependent column

In [5]:
def dependent_column(data,column):
    cols = [col for col in data.columns if ("next" not in col.lower() and col.lower().endswith("gr"))]
    cols.append(column)
    data = data[cols]
    return (data,column)

# OLS Regression

In [6]:
def OLS_Regression(X_train,Y_train):
    X_train = np.array(X_train, dtype=float)
    ols_model = sm.OLS(Y_train, X_train).fit()
#     print(list(zip(list(cols),ols_model.pvalues)))
    rsquared_adj = ols_model.rsquared_adj
    aic = ols_model.aic
    bic = ols_model.bic
    fvalue = ols_model.fvalue
    return {"rsquared_adj":rsquared_adj,"aic":aic,"bic":bic,"fvalue":fvalue}

# Linear Regression

In [7]:
def linear_regression(data, y):
    # print("------ Linear Regression ------")
    X = data[data.columns[:-1]]
    Y = data[y].values
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state = 0)
    model = LinearRegression(fit_intercept = True)  
    model.fit(X_train, Y_train)
    pred = model.predict(X_test)
    confidence = model.score(X_test, Y_test)
    rmse = sqrt(metrics.mean_squared_error(Y_test, pred))
    mae = metrics.mean_absolute_error(Y_test, pred)
    mse = metrics.mean_squared_error(Y_test, pred)
    ols_values = OLS_Regression(X_train,Y_train)
    return {"root_mean_squared_error":rmse,"mean_absolute_error":mae,"mean_squared_error":mse,"OLS":ols_values, "Confidence" : confidence, "Predicted" : pred, "Actual" : Y_test}

# linear regression with forward selection

In [8]:
def forward_selection(data, target, significance_level=0.05):
    initial_features = data.columns.tolist()
    best_features = []
    while (len(initial_features)>0):
        remaining_features = list(set(initial_features)-set(best_features))
        new_pval = pd.Series(index=remaining_features)
        for new_column in remaining_features:
            model = sm.OLS(target, sm.add_constant(data[best_features+[new_column]]).astype(float)).fit()
            new_pval[new_column] = model.pvalues[new_column]
        min_p_value = new_pval.min()
        if(min_p_value<significance_level):
            best_features.append(new_pval.idxmin())
        else:
            break
    return best_features

In [9]:
def linear_regression_forward_selection(data,y):
    # print("------ Linear Regression Forward Selection ------")
    X = data[data.columns[:-1]]
    Y = data[y].values
    forward_features = forward_selection(X,Y)
    print("Features obtained from Forward Selection : ")
    print(forward_features)
    return linear_regression(data[forward_features+[y]],y)

# linear regression with backward elimination

In [10]:
def backward_elimination(data, target,significance_level = 0.05):
    features = data.columns.tolist()
    while(len(features)>0):
        features_with_constant = sm.add_constant(data[features]).astype(float)
        p_values = sm.OLS(target, features_with_constant).fit().pvalues[1:]
        max_p_value = p_values.max()
        if(max_p_value >= significance_level):
            excluded_feature = p_values.idxmax()
            features.remove(excluded_feature)
        else:
            break 
    return features

In [11]:
def linear_regression_backward_selection(data,y):
    # print("------ Linear Regression Backward Selection ------")
    X = data[data.columns[:-1]]
    Y = data[y].values
    backward_features = backward_elimination(X,Y)
    print("Features obtained from Backward Elimination : ")
    print(backward_features)
    return linear_regression(data[backward_features+[y]],y)

# Using Inbuilt Forward Selection Method

In [12]:
def forward_selection_inbuilt(X,Y,k,score):
    sfs = SFS(LinearRegression(),k_features=k,forward=True,floating=False,scoring = score,cv = 0)
    sfs.fit(X, Y)
    lst = list(sfs.k_feature_names_)
    return lst

In [13]:
def linear_regression_forward_selection_inbuit(data,y):
    # print("------ Linear Regression Forward Selection Inbuilt ------")

    X = data[data.columns[:-1]]
    Y = data[y].values
    scores = ['explained_variance','max_error','neg_mean_absolute_error','neg_mean_squared_error',
                  'neg_root_mean_squared_error','neg_median_absolute_error','r2']
    df = pd.DataFrame(columns=scores,index=range(1,data.shape[1]+1))
    for k in range(1,data.shape[1]+1):
        for score in scores:
            sfs = forward_selection_inbuilt(X,Y,k,score)
            df.loc[k,score] = sfs
    df.to_csv("forwardFeatures.csv",index=None)
    return df

# Using Inbuilt Backward Elimination Method

In [14]:
def backward_selection_inbuilt(X,Y,k,score):
    sfs = SFS(LinearRegression(),k_features=k,forward=False,floating=False,scoring = score,cv = 0)
    sfs.fit(X, Y)
    lst = list(sfs.k_feature_names_)
    return lst

In [15]:
def linear_regression_backward_selection_inbuit(data,y):
    # print("------ Linear Regression Backward Selection Inbuilt ------")
    X = data[data.columns[:-1]]
    Y = data[y].values
    scores = ['explained_variance','max_error','neg_mean_absolute_error','neg_mean_squared_error',
                  'neg_root_mean_squared_error','neg_median_absolute_error','r2']
    df = pd.DataFrame(columns=scores,index=range(1,data.shape[1]+1))
    for k in range(1,data.shape[1]+1):
        for score in scores:
            sfs = backward_selection_inbuilt(X,Y,k,score)
            df.loc[k,score] = sfs
    df.to_csv("backwardFeatures.csv",index=None)
    return df

# Ridge Regression

In [16]:
def bestparams_ridge(alpha,X_train,Y_train):
    
    ridge = Ridge(alpha=1).fit(X_train,Y_train)
    
    param_grid = dict(alpha=alpha)
    
    grid = GridSearchCV(estimator=ridge, param_grid=param_grid, scoring='r2')
    
    grid.fit(X_train,Y_train)
    
    alpha_val = grid.best_estimator_.alpha
    
    return alpha_val

In [17]:
def ridge_regression(data,y):
    
    # print("------ Ridge Regression ------")

    X = data[data.columns[:-1]]
    Y = data[y].values
    
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)
    
    # selection of alpha value from the respective array values
    alpha = np.array([1,0.1,0.01,0.001,0.0001,0])
    best = bestparams_ridge(alpha,X_train,Y_train)
    # print("Best Alpha:", best) # best alpha value
    
    # Re-selecting the alpha value based on the above selected alpha value
    alpha1 = np.arange(best-10,best+10)
    best_alpha = bestparams_ridge(alpha1,X_train,Y_train)
    # print("Best Alpha after tuning : ", best_alpha)
    # Ridge regression with the above best alpha value and the train datasets.
    clf = Ridge(alpha=best_alpha)
    clf.fit(X_train, Y_train)
    
    pred = clf.predict(X_test)
    
    confidence = clf.score(X_test, Y_test)
    rmse = sqrt(metrics.mean_squared_error(Y_test, pred))
    mae = metrics.mean_absolute_error(Y_test, pred)
    mse = metrics.mean_squared_error(Y_test, pred)

    ols_values = OLS_Regression(X_train,Y_train)

    return {"root_mean_squared_error":rmse,"mean_absolute_error":mae,"mean_squared_error":mse,"OLS":ols_values, "Confidence" : confidence, "Predicted" : pred, "Actual" : Y_test}

# Lasso Regression

In [18]:
def bestparams_lasso(alpha,X_train,Y_train):
    
    lasso = Lasso(alpha=1).fit(X_train,Y_train)
    
    param_grid = dict(alpha=alpha)
    
    grid = GridSearchCV(estimator=lasso, param_grid=param_grid, scoring='r2')
    
    grid.fit(X_train,Y_train)
    
    alpha_val = grid.best_estimator_.alpha
    
    return alpha_val

In [19]:
def lasso_regression(data,y):
    
    # print("------ Lasso Regression ------")

    X = data[data.columns[:-1]]
    Y = data[y].values
    
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)
    
    # selection of alpha value from the respective array values
    alpha = np.array([1,0.1,0.01,0.001,0.0001,0])
    best = bestparams_lasso(alpha,X_train,Y_train)
    # print("Best Alpha:", best) # best alpha value
    
    # Re-selecting the alpha value based on the above selected alpha value
    alpha1 = np.arange(best-10,best+10)
    best_alpha = bestparams_lasso(alpha1,X_train,Y_train)
    # print("Best Alpha after tuning : ", best_alpha)
    # Lasso regression with the above best alpha value and the train datasets.
    clf = Lasso(alpha=best_alpha)
    clf.fit(X_train, Y_train)
    
    pred = clf.predict(X_test)
    
    confidence = clf.score(X_test, Y_test)
    rmse = sqrt(metrics.mean_squared_error(Y_test, pred))
    mae = metrics.mean_absolute_error(Y_test, pred)
    mse = metrics.mean_squared_error(Y_test, pred)
    ols_values = OLS_Regression(X_train,Y_train)

    return {"root_mean_squared_error":rmse,"mean_absolute_error":mae,"mean_squared_error":mse,"OLS":ols_values, "Confidence" : confidence, "Predicted" : pred, "Actual" : Y_test}

# Elastic Regression

In [20]:
def bestparams_elastic(alphas,l1,X_train,Y_train):
    
    elastic_net = ElasticNet(alpha=1, l1_ratio=0.2).fit(X_train, Y_train)
    param_grid = dict(alpha=alphas, l1_ratio=l1)
    
    grid = GridSearchCV(estimator=elastic_net, param_grid=param_grid, scoring='r2')
    
    grid_result = grid.fit(X_train, Y_train)
    
    alpha_val = grid_result.best_estimator_.alpha
    l1_val = grid_result.best_estimator_.l1_ratio
    
    return (alpha_val,l1_val)

In [21]:
def elastic_net_regression(data,y):

    # print("------ Elastic Net Regression ------")
    
    X = data[data.columns[:-1]]
    Y = data[y].values
    
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)
    
    # selection of alpha value from the respective array values
    
    alpha = np.array([0,0.1,0.001,0.0001,1])
    l1_ratio = np.array([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
    
    best = bestparams_elastic(alpha,l1_ratio,X_train,Y_train)
#     print("Best Alpha:", best[0]) # best alpha value
    # print("Best l1 - value:", best[1])
    
    # Re-selecting the alpha value based on the above selected alpha value
    
    if (best[0] == 0):
        clf = ElasticNet(alpha=best[0],l1_ratio = best[1])
        clf.fit(X_train, Y_train)
    else:
        alpha1 = np.arange(best[0]/10,best[0]*10)
        best_alpha = bestparams_elastic(alpha1,l1_ratio,X_train,Y_train)
        # print("Best Alpha after tuning : ", best_alpha[0])
        # print("Best l1 after tuning : ", best_alpha[1])
        clf = ElasticNet(alpha=best_alpha[0],l1_ratio = best_alpha[1])
        clf.fit(X_train, Y_train)
    
    pred = clf.predict(X_test)
    
    confidence = clf.score(X_test, Y_test)
    
    rmse = sqrt(metrics.mean_squared_error(Y_test, pred))
    mae = metrics.mean_absolute_error(Y_test, pred)
    mse = metrics.mean_squared_error(Y_test, pred)
    ols_values = OLS_Regression(X_train,Y_train)
    
    # coeff_vs_Regularization(X_train,Y_train)

    return {"root_mean_squared_error":rmse,"mean_absolute_error":mae,"mean_squared_error":mse,"OLS":ols_values, "Confidence" : confidence, "Predicted" : pred, "Actual" : Y_test}
    

In [22]:
def coeff_vs_Regularization(X_train,Y_train):
    coefs = []
    n_alphas = 200
    alphas = np.logspace(-10, -2, n_alphas)

    for a in alphas:
        elastic = ElasticNet(alpha=a)
        elastic.fit(X_train, Y_train)
        coefs.append(elastic.coef_)
    
    ax = plt.gca()

    ax.plot(alphas, coefs)
    ax.set_xscale('log')
    ax.set_xlim(ax.get_xlim()[::-1])  # reverse axis
    plt.xlabel('alpha(log scale)')
    plt.ylabel('Coefficients')
    plt.title('ElasticNet - Coefficients Vs Regularization')
    plt.axis('tight')
    plt.show()

# Finding results from each set of important features

In [23]:
columns = ['Company','Method','Percentage', 'RMSE', 'MAE', 'MSE','Confidence', 'rsquared_adj']

In [24]:
companies = {"500112" : "SBIN" ,
"500325" : "RELIANCE INDUSTRIES LTD",
"532540" : "TATA CONSULTANCY SERVICES LTD" ,
"500209" : "INFOSYS LTD", 
"532174" : "ICICI BANK LTD", 
"507685" : "WIPRO LTD", 
"530965" : "INDIAN OIL CORPORATION LTD", 
"500182" : "HERO MOTOCORP LTD", 
"532210" : "CITY UNION BANK LTD", 
"500180" : "HDFC Bank Ltd",
"500680" : "PFIZER LTD", 
"506395" : "COROMANDEL iNTERNATIONAL LTD",
"500770" : "TATA CHEMICALS LTD", 
"500085" : "CHAMBAL FERTILISERS & CHEMICALS LTD", 
"501425" : "BOMBAY BURMAH TRADING CORP.LTD", 
"532899" : "KAVERI SEED COMPANY LTD", 
"537291" : "NATH BIO-GENES (INDIA) LTD", 
"500790" : "NESTLE INDIA LTD", 
"500825" : "BRITANNIA INDUSTRIES LTD", 
"533155" : "JUBILANT FOODWORKS LTD", 
"533287" : "ZEE LEARN LTD", 
"533260" : "CAREER POINT LTD", 
"539921" : "SHANTI EDUCATIONAL INITIATIVES LTD", 
"542602" : "EMBASSY OFFICE PARKS REIT", 
"543217" : "MINDSPACE BUSINESS PARKS REIT", 
"543261" : "BROOKFIELD INDIA REAL ESTATE TRUST REIT", 
"532538" : "ULTRATECH CEMENT LTD", 
"500387" : "SHREE CEMENT LTD", 
"500425" : "AMBUJA CEMENTS LTD", 
"532689" : "PVR LTD", 
"532706" : "INOX LEISURE LTD", 
"532163" : "SAREGAMA INDIA LTD", 
"524715" : "SUN PHARMACEUTICAL INDUSTRIES LTD", 
"532488" : "DIVI'S LABORATORIES LTD",
"500124" : "DR.REDDY'S LABORATORIES LTD"}

In [25]:
models = ["Linear Regression","Lasso Regression","Ridge Regression","Elastic Regression"]
tables = {model:PrettyTable() for model in models}
for name,table in tables.items():
    table.field_names = columns

In [26]:
final_columns = ['Company', 'Model', 'Method', 'Percentage']

In [27]:
final_df = pd.DataFrame(columns = final_columns)
final_df

Unnamed: 0,Company,Model,Method,Percentage


In [28]:
def create_pretty_table(name,model,result, method, percentage):
    values = [name[2 : 8 ] + "-" + companies[name[2 : 8]], method, round(percentage, 6)] + [round(v, 6) for k,v in result.items() if not isinstance(v,dict)] + [round(v, 6) for v in result["OLS"].values()]
    tables[model].add_row(values)
    tables[model].title = model

In [29]:
def fit_model(models, df, column, method, value, name, results):
    for model in models:
        if (model == "Linear"):
            model_result = linear_regression(df, column)
        elif (model == "Ridge"):
            model_result = ridge_regression(df, column)
        elif (model == "Lasso"):
            model_result = lasso_regression(df, column)
        else:
            model_result = elastic_net_regression(df, column)
    
        print(model + " Model fitted using columns obtained from feature importance using " + method + " : ")
        pred = model_result['Predicted']
        actual = model_result['Actual']
        pred_actual = pd.DataFrame(list(zip(pred, actual)), 
                   columns =['Predicted Values', 'Actual Values'])
        pred_actual.to_csv("C:\\Users\\venu\\Desktop\\Stock Market Analysis\\Data\\Models_Results\\" + name[2:8] + "_sd_" + model + "FI" + method + str(value) + ".csv" , index=False) 
        same_dir = 0
        diff_dir = 0
        
#         print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
        for a, b in zip(pred, actual) :
#             if (a * b > 0):
            if (a > 0 and b > 0) or (a < 0 and b < 0):
#                 print(a, b)
                same_dir += 1
            else:
                diff_dir += 1
#         print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
        
        print("Values in Same direction -----> ----->", same_dir)
        print("Values in Opposite direction <----- -----> ", diff_dir)
        print("Percentage of correct direction : ", (same_dir / (same_dir + diff_dir)))
        percentage = (same_dir / (same_dir + diff_dir))
        results[model + "FI" + method + str(value)] = (same_dir / (same_dir + diff_dir))
        del model_result['Predicted']
        del model_result['Actual']
        del model_result['OLS']['aic']
        del model_result['OLS']['bic']
        del model_result['OLS']['fvalue']
        create_pretty_table(name ,model + " Regression" ,model_result, method + " " + value, percentage)

In [30]:
def get_results_from_FI_Coeffiecients(df, name, column, results):
    print("Features Importance using Coefficients")
    print("*****************************************************************************************")
    X = df[df.columns[:-1]]
    Y = df[column].values
    model_linear = LinearRegression(fit_intercept=True)
    model_linear.fit(X, Y)
    col_coef = list(df.columns)
    res_coef = [round(i,6) for i in list(model_linear.coef_)]
    rc_coef = list(zip(col_coef, res_coef))
    coef_features = []
    coef = [0.1]
    method = "Coefficients"
    models = ["Linear", "Ridge", "Lasso", "Elastic"]
    for cf in coef:
        for i in range(len(rc_coef)):
            if ((abs(rc_coef[i][1])) > cf):
                coef_features.append(rc_coef[i][0])
        print("Features obtained from coefficients greater than " + str(cf) + " : ")
        print("--------------------------------------")
        print(coef_features)
        if (len(coef_features) == 0):
            continue
        coef_features.append(column)
        df_fic = df[coef_features]
        fit_model(models, df_fic, column, method, str(cf), name, results)
    print("*****************************************************************************************")

In [31]:
def get_results_from_FI_PValue(df, name, column, results):
    print("Features Importance using p-value")
    print("*****************************************************************************************")
    X = df[df.columns[:-1]]
    Y = df[column].values
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)
    X_train = np.array(X_train, dtype=float)
    ols_model = sm.OLS(Y_train, X_train).fit()
    col_pval = list(df.columns)
    pvals = list(ols_model.pvalues)
    pvals_cols = list(zip(col_pval, pvals))
    p = [0.02, 0.05, 0.1, 0.2]
    method = "PValue"
    models = ["Linear", "Ridge", "Lasso", "Elastic"]
    for pv in p:
        pval_features = []
        for i in range(len(pvals_cols)):
            if (pvals_cols[i][1] < pv):
                pval_features.append(pvals_cols[i][0])
        print("Features obtained from p-values less than " + str(pv) + " : ")
        print("-------------------------------------------------")
        print(pval_features)
        if (len(pval_features) == 0):
            continue
        pval_features.append(column)
        df_fip = df[pval_features]
        fit_model(models, df_fip, column, method, str(pv), name, results)
    print("*****************************************************************************************")
    

In [32]:
def get_results_from_FI_FValues(df, name, column, results):
    print("Features Importance using f-value")
    print("*****************************************************************************************")
    X = df[df.columns[:-1]]
    Y = df[column].values
    fval_cols = X.columns
    freg_res = f_regression(X, Y)
#     print(freg_res[0])
    fvals = freg_res[0]
    fc = list(zip(fval_cols, fvals))
    f = [1, 10, 100, 1000]
    method = "FValue"
    models = ["Linear", "Ridge", "Lasso", "Elastic"]
    for fv in f :
        fval_features = []
        for i in range(len(fc)):
            if ((abs(fc[i][1])) > fv):
                fval_features.append(fc[i][0])
        print("Features obtained from f-values greater than " + str(fv) + " : ")
        print("--------------------------------------")
        print(fval_features)
        if (len(fval_features) == 0):
            continue
        fval_features.append(column)
        df_fif = df[fval_features]
        fit_model(models, df_fif, column, method, str(fv), name, results)
    print("*****************************************************************************************")


In [33]:
def get_results_from_FI_ForwardSelection(df1, name, column, results):
    print("Features Importance using Forward Selection Method")
    print("*****************************************************************************************")
    method = "ForwardSelection"
    models = ["Linear", "Ridge", "Lasso", "Elastic"]
    X = df1[df1.columns[:-1]]
    Y = df1[column].values
    forward_features = forward_selection(X,Y)
    print("Features obtained from Forward Selection method : ") 
    print("--------------------------------------")
    print(forward_features)
    if (len(forward_features) != 0):
        forward_features.append(column)
        df_fs = df1[forward_features]
        fit_model(models, df_fs, column, method, '', name, results)
    print("*****************************************************************************************")

In [34]:
def get_results_from_FI_BackwardElimination(df1, name, column, results):
    print("Features Importance using Backward Elimination Method")
    print("*****************************************************************************************")
    method = "BackwardElimination"
    models = ["Linear", "Ridge", "Lasso", "Elastic"]
    X = df1[df1.columns[:-1]]
    Y = df1[column].values
    backward_features = backward_elimination(X,Y)
    print("Features obtained from Backward Elimination method : ") 
    print("--------------------------------------")
    print(backward_features)
    if (len(backward_features) != 0):
        backward_features.append(column)
        df_be = df1[backward_features]
        fit_model(models, df_be, column, method, '', name, results)
#     lfs_res = linear_regression_backward_selection(df1, column)
#     pred = lfs_res['Predicted']
#     actual = lfs_res['Actual']
#     pred_actual = pd.DataFrame(list(zip(pred, actual)), 
#                columns =['Predicted Values', 'Actual Values'])
#     pred_actual.to_csv("C:\\Users\\venu\\Desktop\\Stock Market Analysis\\Data\Models Results\\LinearFI_BE_" + name + ".csv" , index=False) 
#     same_dir = 0
#     diff_dir = 0
#     for a, b in zip(pred, actual) :
#         if (a * b > 0):
#             same_dir += 1
#         else:
#             diff_dir += 1
#     print("Values in Same direction -----> ----->", same_dir)
#     print("Values in Opposite direction <----- -----> ", diff_dir)
#     print("Percentage of correct direction : ", (same_dir / (same_dir + diff_dir)))
#     results["FI_BE"] = (same_dir / (same_dir + diff_dir))
    print("*****************************************************************************************")

In [35]:
def get_results_from_each_set(data, name, final_df):
    df = pre_process_data(data, 60)
    column = "Next Day Close Price GR"
    (df1, column) = dependent_column(df, column)
    results = {}
    get_results_from_FI_Coeffiecients(df1, name, column, results)
    get_results_from_FI_PValue(df1, name, column, results)
    get_results_from_FI_ForwardSelection(df1, name, column, results)
    get_results_from_FI_BackwardElimination(df1, name, column, results)
    get_results_from_FI_FValues(df1, name, column, results)
#     print(results)
#     print(len(results))
    linear = {k : v for (k, v) in results.items() if ("Linear" in k)}
    ridge = {k : v for (k, v) in results.items() if ("Ridge" in k)}
    lasso = {k : v for (k, v) in results.items() if ("Lasso" in k)}
    elastic = {k : v for (k, v) in results.items() if ("Elastic" in k)}
    sorted_results = sorted(results.items(), key=lambda item: item[1])
    sorted_linear = sorted(linear.items(), key=lambda item: item[1])
    sorted_ridge = sorted(ridge.items(), key=lambda item: item[1])
    sorted_lasso = sorted(lasso.items(), key=lambda item: item[1])
    sorted_elastic = sorted(elastic.items(), key=lambda item: item[1])
    linear_row = {'Company' : name[2 : 8] + "-" + companies[name[2 : 8]], 'Model' : 'Linear Regression', 'Method' : sorted_linear[-1][0], 'Percentage' : sorted_linear[-1][1]}
    ridge_row = {'Company' : name[2 : 8] + "-" + companies[name[2 : 8]], 'Model' : 'Ridge Regression', 'Method' : sorted_ridge[-1][0], 'Percentage' : sorted_ridge[-1][1]}
    lasso_row = {'Company' : name[2 : 8] + "-" + companies[name[2 : 8]], 'Model' : 'Lasso Regression', 'Method' : sorted_lasso[-1][0], 'Percentage' : sorted_lasso[-1][1]}
    elastic_row = {'Company' : name[2 : 8] + "-" + companies[name[2 : 8]], 'Model' : 'Elastic Net Regression', 'Method' : sorted_elastic[-1][0], 'Percentage' : sorted_elastic[-1][1]}
    final_df = final_df.append(linear_row, ignore_index = True)
    final_df = final_df.append(ridge_row, ignore_index = True)
    final_df = final_df.append(lasso_row, ignore_index = True)
    final_df = final_df.append(elastic_row, ignore_index = True)
    print("Maximum correct direction values are obtained for {} with a percentage of {}.".format(sorted_results[-1][0], sorted_results[-1][1]))
    print("Maximum correct direction values for Linear Model are obtained for {} with a percentage of {}.".format(sorted_linear[-1][0], sorted_linear[-1][1]))
    print("Maximum correct direction values for Ridge Model are obtained for {} with a percentage of {}.".format(sorted_ridge[-1][0], sorted_ridge[-1][1]))
    print("Maximum correct direction values for Lasso Model are obtained for {} with a percentage of {}.".format(sorted_lasso[-1][0], sorted_lasso[-1][1]))
    print("Maximum correct direction values for Elastic Model are obtained for {} with a percentage of {}.".format(sorted_elastic[-1][0], sorted_elastic[-1][1]))
    return final_df

In [36]:
%%time
for filename in os.listdir(os.path.join(path,"Data/Stock")):
    if (filename.startswith("gr")):
        df_linear = pd.read_csv(os.path.join(path,"Data\Stock\\" + filename))
        name = os.path.join(path, "Data\Stock\\" + filename).split("\\")[-1]
        stock = name[2 : 8]
#         orig_stdout = sys.stdout
#         sys.stdout = open("gr" + stock + "res.txt", "w")
        fd_df = pd.DataFrame(columns = final_columns)
        print("For stock : ", stock)
        print("#################################################################################################################")
        f_df = get_results_from_each_set(df_linear, name, fd_df)
        final_df = final_df.append(f_df, ignore_index = True)
        print("#################################################################################################################")
#         sys.stdout.close()
#         sys.stdout = orig_stdout
final_df = final_df.sort_values(by = ['Company', 'Percentage'], ascending = [True, False])
final_df.to_csv('C:\\Users\\venu\\Desktop\\Stock Market Analysis\\Data\\Models_Results\\Final_Results_dataframe.csv') 

For stock :  500085
#################################################################################################################
Features Importance using Coefficients
*****************************************************************************************
Features obtained from coefficients greater than 0.1 : 
--------------------------------------
[]
*****************************************************************************************
Features Importance using p-value
*****************************************************************************************
Features obtained from p-values less than 0.02 : 
-------------------------------------------------
[]
Features obtained from p-values less than 0.05 : 
-------------------------------------------------
[]
Features obtained from p-values less than 0.1 : 
-------------------------------------------------
[]
Features obtained from p-values less than 0.2 : 
-------------------------------------------------
['Deliverable Quan

Ridge Model fitted using columns obtained from feature importance using Coefficients : 
Values in Same direction -----> -----> 451
Values in Opposite direction <----- ----->  447
Percentage of correct direction :  0.5022271714922049
Lasso Model fitted using columns obtained from feature importance using Coefficients : 
Values in Same direction -----> -----> 448
Values in Opposite direction <----- ----->  450
Percentage of correct direction :  0.49888641425389757
Elastic Model fitted using columns obtained from feature importance using Coefficients : 
Values in Same direction -----> -----> 448
Values in Opposite direction <----- ----->  450
Percentage of correct direction :  0.49888641425389757
*****************************************************************************************
Features Importance using p-value
*****************************************************************************************
Features obtained from p-values less than 0.02 : 
---------------------------------

Ridge Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 429
Values in Opposite direction <----- ----->  469
Percentage of correct direction :  0.477728285077951
Lasso Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 448
Values in Opposite direction <----- ----->  450
Percentage of correct direction :  0.49888641425389757
Elastic Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 450
Values in Opposite direction <----- ----->  448
Percentage of correct direction :  0.5011135857461024
Features obtained from f-values greater than 10 : 
--------------------------------------
['Open Price GR', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'Spread High-Low GR', 'Alpha GR', 'Beta GR']
Linear Model fitted using columns obtained from feature importance using FValue : 
Values in Same direc

Ridge Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 449
Values in Opposite direction <----- ----->  433
Percentage of correct direction :  0.5090702947845805
Lasso Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 454
Values in Opposite direction <----- ----->  428
Percentage of correct direction :  0.5147392290249433
Elastic Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 452
Values in Opposite direction <----- ----->  430
Percentage of correct direction :  0.5124716553287982
Features obtained from p-values less than 0.2 : 
-------------------------------------------------
['Open Price GR', 'High Price GR', 'Close Price GR', 'WAP GR', '% Deli. Qty to Traded Qty GR', 'Spread High-Low GR', 'Spread Close-Open GR', 'Alpha GR', 'Beta GR']
Linear Model fitted using columns obtained from featur

Ridge Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 424
Values in Opposite direction <----- ----->  473
Percentage of correct direction :  0.47268673355629875
Lasso Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 424
Values in Opposite direction <----- ----->  473
Percentage of correct direction :  0.47268673355629875
Elastic Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 424
Values in Opposite direction <----- ----->  473
Percentage of correct direction :  0.47268673355629875
Features obtained from p-values less than 0.05 : 
-------------------------------------------------
['Alpha GR', 'Beta GR']
Linear Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 424
Values in Opposite direction <----- ----->  473
Percentage of co

Ridge Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 424
Values in Opposite direction <----- ----->  473
Percentage of correct direction :  0.47268673355629875
Lasso Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 424
Values in Opposite direction <----- ----->  473
Percentage of correct direction :  0.47268673355629875
Elastic Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 424
Values in Opposite direction <----- ----->  473
Percentage of correct direction :  0.47268673355629875
Features obtained from f-values greater than 100 : 
--------------------------------------
['Alpha GR', 'Beta GR']
Linear Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 424
Values in Opposite direction <----- ----->  473
Percentage of correct dir

Ridge Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 440
Values in Opposite direction <----- ----->  460
Percentage of correct direction :  0.4888888888888889
Lasso Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 436
Values in Opposite direction <----- ----->  464
Percentage of correct direction :  0.48444444444444446
Elastic Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 439
Values in Opposite direction <----- ----->  461
Percentage of correct direction :  0.48777777777777775
*****************************************************************************************
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
---------------------------------

Ridge Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 420
Values in Opposite direction <----- ----->  479
Percentage of correct direction :  0.4671857619577308
Lasso Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 418
Values in Opposite direction <----- ----->  481
Percentage of correct direction :  0.4649610678531702
Elastic Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 418
Values in Opposite direction <----- ----->  481
Percentage of correct direction :  0.4649610678531702
Features obtained from p-values less than 0.05 : 
-------------------------------------------------
['No.of Shares GR', 'Total Turnover (Rs.) GR', 'Alpha GR', 'Beta GR', 'Net Profit GR']
Linear Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 438
Valu

Ridge Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 432
Values in Opposite direction <----- ----->  467
Percentage of correct direction :  0.48053392658509453
Lasso Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 430
Values in Opposite direction <----- ----->  469
Percentage of correct direction :  0.4783092324805339
Elastic Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 450
Values in Opposite direction <----- ----->  449
Percentage of correct direction :  0.5005561735261401
Features obtained from f-values greater than 100 : 
--------------------------------------
['Open Price GR', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'Alpha GR', 'Beta GR']
Linear Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 4

Elastic Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 431
Values in Opposite direction <----- ----->  468
Percentage of correct direction :  0.4794215795328142
*****************************************************************************************
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['Alpha GR', 'Beta GR', 'High Price GR']
Linear Model fitted using columns obtained from feature importance using ForwardSelection : 
Values in Same direction -----> -----> 438
Values in Opposite direction <----- ----->  461
Percentage of correct direction :  0.4872080088987764
Ridge Model fitted using columns obtained from feature importance using ForwardSelection : 
Values in Same direction -----> -----> 442
Values in Opposite direction <----- 

Lasso Model fitted using columns obtained from feature importance using Coefficients : 
Values in Same direction -----> -----> 243
Values in Opposite direction <----- ----->  263
Percentage of correct direction :  0.48023715415019763
Elastic Model fitted using columns obtained from feature importance using Coefficients : 
Values in Same direction -----> -----> 235
Values in Opposite direction <----- ----->  271
Percentage of correct direction :  0.4644268774703557
*****************************************************************************************
Features Importance using p-value
*****************************************************************************************
Features obtained from p-values less than 0.02 : 
-------------------------------------------------
['Close Price GR', 'WAP GR']
Linear Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 242
Values in Opposite direction <----- ----->  264
Percentage of 

For stock :  500680
#################################################################################################################
Features Importance using Coefficients
*****************************************************************************************
Features obtained from coefficients greater than 0.1 : 
--------------------------------------
['Low Price GR', 'WAP GR']
Linear Model fitted using columns obtained from feature importance using Coefficients : 
Values in Same direction -----> -----> 464
Values in Opposite direction <----- ----->  416
Percentage of correct direction :  0.5272727272727272
Ridge Model fitted using columns obtained from feature importance using Coefficients : 
Values in Same direction -----> -----> 460
Values in Opposite direction <----- ----->  420
Percentage of correct direction :  0.5227272727272727
Lasso Model fitted using columns obtained from feature importance using Coefficients : 
Values in Same direction -----> -----> 464
Values in Opposit

Elastic Model fitted using columns obtained from feature importance using BackwardElimination : 
Values in Same direction -----> -----> 440
Values in Opposite direction <----- ----->  440
Percentage of correct direction :  0.5
*****************************************************************************************
Features Importance using f-value
*****************************************************************************************
Features obtained from f-values greater than 1 : 
--------------------------------------
['Open Price GR', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'No.of Shares GR', 'No. of Trades GR', 'Total Turnover (Rs.) GR', 'Deliverable Quantity GR', 'Dividend Value GR']
Linear Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 461
Values in Opposite direction <----- ----->  419
Percentage of correct direction :  0.5238636363636363
Ridge Model fitted using columns obtained from fe

Elastic Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 421
Values in Opposite direction <----- ----->  409
Percentage of correct direction :  0.5072289156626506
Features obtained from p-values less than 0.2 : 
-------------------------------------------------
['High Price GR', 'Low Price GR', 'Spread High-Low GR', 'Alpha GR', 'Beta GR', 'Net Profit GR', 'EPS GR']
Linear Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 414
Values in Opposite direction <----- ----->  416
Percentage of correct direction :  0.4987951807228916
Ridge Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 416
Values in Opposite direction <----- ----->  414
Percentage of correct direction :  0.5012048192771085
Lasso Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction ----

Features Importance using Coefficients
*****************************************************************************************
Features obtained from coefficients greater than 0.1 : 
--------------------------------------
['WAP GR', 'Net Profit GR', 'EPS GR']
Linear Model fitted using columns obtained from feature importance using Coefficients : 
Values in Same direction -----> -----> 450
Values in Opposite direction <----- ----->  449
Percentage of correct direction :  0.5005561735261401
Ridge Model fitted using columns obtained from feature importance using Coefficients : 
Values in Same direction -----> -----> 450
Values in Opposite direction <----- ----->  449
Percentage of correct direction :  0.5005561735261401
Lasso Model fitted using columns obtained from feature importance using Coefficients : 
Values in Same direction -----> -----> 440
Values in Opposite direction <----- ----->  459
Percentage of correct direction :  0.489432703003337
Elastic Model fitted using columns obta

Lasso Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 440
Values in Opposite direction <----- ----->  459
Percentage of correct direction :  0.489432703003337
Elastic Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 440
Values in Opposite direction <----- ----->  459
Percentage of correct direction :  0.489432703003337
Features obtained from f-values greater than 10 : 
--------------------------------------
[]
Features obtained from f-values greater than 100 : 
--------------------------------------
[]
Features obtained from f-values greater than 1000 : 
--------------------------------------
[]
*****************************************************************************************
Maximum correct direction values are obtained for LinearFIFValue1 with a percentage of 0.5139043381535039.
Maximum correct direction values for Linear Model are obtained for 

Ridge Model fitted using columns obtained from feature importance using ForwardSelection : 
Values in Same direction -----> -----> 374
Values in Opposite direction <----- ----->  371
Percentage of correct direction :  0.5020134228187919
Lasso Model fitted using columns obtained from feature importance using ForwardSelection : 
Values in Same direction -----> -----> 382
Values in Opposite direction <----- ----->  363
Percentage of correct direction :  0.512751677852349
Elastic Model fitted using columns obtained from feature importance using ForwardSelection : 
Values in Same direction -----> -----> 382
Values in Opposite direction <----- ----->  363
Percentage of correct direction :  0.512751677852349
*****************************************************************************************
Features Importance using Backward Elimination Method
*****************************************************************************************
Features obtained from Backward Elimination method : 
-

Ridge Model fitted using columns obtained from feature importance using Coefficients : 
Values in Same direction -----> -----> 373
Values in Opposite direction <----- ----->  403
Percentage of correct direction :  0.4806701030927835
Lasso Model fitted using columns obtained from feature importance using Coefficients : 
Values in Same direction -----> -----> 408
Values in Opposite direction <----- ----->  368
Percentage of correct direction :  0.5257731958762887
Elastic Model fitted using columns obtained from feature importance using Coefficients : 
Values in Same direction -----> -----> 408
Values in Opposite direction <----- ----->  368
Percentage of correct direction :  0.5257731958762887
*****************************************************************************************
Features Importance using p-value
*****************************************************************************************
Features obtained from p-values less than 0.02 : 
-----------------------------------

Elastic Model fitted using columns obtained from feature importance using BackwardElimination : 
Values in Same direction -----> -----> 408
Values in Opposite direction <----- ----->  368
Percentage of correct direction :  0.5257731958762887
*****************************************************************************************
Features Importance using f-value
*****************************************************************************************
Features obtained from f-values greater than 1 : 
--------------------------------------
['Open Price GR', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'No. of Trades GR', '% Deli. Qty to Traded Qty GR', 'Spread High-Low GR', 'Alpha GR', 'Revenue GR', 'Dividend Value GR']
Linear Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 379
Values in Opposite direction <----- ----->  397
Percentage of correct direction :  0.4884020618556701
Ridge Model fitted using co

Elastic Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 436
Values in Opposite direction <----- ----->  460
Percentage of correct direction :  0.48660714285714285
Features obtained from p-values less than 0.1 : 
-------------------------------------------------
['Open Price GR', 'Close Price GR', 'No.of Shares GR', 'Total Turnover (Rs.) GR', 'Alpha GR', 'Beta GR']
Linear Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 470
Values in Opposite direction <----- ----->  426
Percentage of correct direction :  0.5245535714285714
Ridge Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 450
Values in Opposite direction <----- ----->  446
Percentage of correct direction :  0.5022321428571429
Lasso Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction ----

Lasso Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 436
Values in Opposite direction <----- ----->  460
Percentage of correct direction :  0.48660714285714285
Elastic Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 436
Values in Opposite direction <----- ----->  460
Percentage of correct direction :  0.48660714285714285
Features obtained from f-values greater than 1000 : 
--------------------------------------
[]
*****************************************************************************************
Maximum correct direction values are obtained for LinearFIFValue10 with a percentage of 0.5301339285714286.
Maximum correct direction values for Linear Model are obtained for LinearFIFValue10 with a percentage of 0.5301339285714286.
Maximum correct direction values for Ridge Model are obtained for RidgeFIFValue10 with a percentage of 0.5145089285714286.
M

Elastic Model fitted using columns obtained from feature importance using ForwardSelection : 
Values in Same direction -----> -----> 460
Values in Opposite direction <----- ----->  436
Percentage of correct direction :  0.5133928571428571
*****************************************************************************************
Features Importance using Backward Elimination Method
*****************************************************************************************
Features obtained from Backward Elimination method : 
--------------------------------------
['Open Price GR', 'High Price GR', 'Close Price GR', 'WAP GR', 'No. of Trades GR', 'Alpha GR', 'Beta GR']
Linear Model fitted using columns obtained from feature importance using BackwardElimination : 
Values in Same direction -----> -----> 445
Values in Opposite direction <----- ----->  451
Percentage of correct direction :  0.4966517857142857
Ridge Model fitted using columns obtained from feature importance using BackwardElimina

Lasso Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 456
Values in Opposite direction <----- ----->  410
Percentage of correct direction :  0.5265588914549654
Elastic Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 453
Values in Opposite direction <----- ----->  413
Percentage of correct direction :  0.523094688221709
Features obtained from p-values less than 0.05 : 
-------------------------------------------------
['Open Price GR', 'Close Price GR', 'Alpha GR', 'Beta GR']
Linear Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 445
Values in Opposite direction <----- ----->  421
Percentage of correct direction :  0.5138568129330254
Ridge Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 444
Values in Opposite direction <---

Lasso Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 450
Values in Opposite direction <----- ----->  416
Percentage of correct direction :  0.5196304849884527
Elastic Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 447
Values in Opposite direction <----- ----->  419
Percentage of correct direction :  0.5161662817551963
Features obtained from f-values greater than 100 : 
--------------------------------------
['Alpha GR', 'Beta GR']
Linear Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 448
Values in Opposite direction <----- ----->  418
Percentage of correct direction :  0.5173210161662818
Ridge Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 446
Values in Opposite direction <----- ----->  420
Percentage of correct direct

Ridge Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 459
Values in Opposite direction <----- ----->  436
Percentage of correct direction :  0.5128491620111731
Lasso Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 463
Values in Opposite direction <----- ----->  432
Percentage of correct direction :  0.5173184357541899
Elastic Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 463
Values in Opposite direction <----- ----->  432
Percentage of correct direction :  0.5173184357541899
*****************************************************************************************
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
-----------------------------------

Ridge Model fitted using columns obtained from feature importance using Coefficients : 
Values in Same direction -----> -----> 432
Values in Opposite direction <----- ----->  438
Percentage of correct direction :  0.496551724137931
Lasso Model fitted using columns obtained from feature importance using Coefficients : 
Values in Same direction -----> -----> 466
Values in Opposite direction <----- ----->  404
Percentage of correct direction :  0.535632183908046
Elastic Model fitted using columns obtained from feature importance using Coefficients : 
Values in Same direction -----> -----> 466
Values in Opposite direction <----- ----->  404
Percentage of correct direction :  0.535632183908046
*****************************************************************************************
Features Importance using p-value
*****************************************************************************************
Features obtained from p-values less than 0.02 : 
--------------------------------------

For stock :  532174
#################################################################################################################
Features Importance using Coefficients
*****************************************************************************************
Features obtained from coefficients greater than 0.1 : 
--------------------------------------
['High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'Beta GR']
Linear Model fitted using columns obtained from feature importance using Coefficients : 
Values in Same direction -----> -----> 433
Values in Opposite direction <----- ----->  465
Percentage of correct direction :  0.4821826280623608
Ridge Model fitted using columns obtained from feature importance using Coefficients : 
Values in Same direction -----> -----> 439
Values in Opposite direction <----- ----->  459
Percentage of correct direction :  0.4888641425389755
Lasso Model fitted using columns obtained from feature importance using Coefficients : 
Values in Same

Lasso Model fitted using columns obtained from feature importance using BackwardElimination : 
Values in Same direction -----> -----> 462
Values in Opposite direction <----- ----->  436
Percentage of correct direction :  0.5144766146993318
Elastic Model fitted using columns obtained from feature importance using BackwardElimination : 
Values in Same direction -----> -----> 456
Values in Opposite direction <----- ----->  442
Percentage of correct direction :  0.5077951002227171
*****************************************************************************************
Features Importance using f-value
*****************************************************************************************
Features obtained from f-values greater than 1 : 
--------------------------------------
['Open Price GR', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'No.of Shares GR', 'Spread High-Low GR', 'Alpha GR', 'Beta GR', 'Revenue GR', 'Dividend Value GR', 'Income GR', 'Expenditure GR']
Linear

Lasso Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 415
Values in Opposite direction <----- ----->  453
Percentage of correct direction :  0.478110599078341
Elastic Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 414
Values in Opposite direction <----- ----->  454
Percentage of correct direction :  0.4769585253456221
Features obtained from p-values less than 0.2 : 
-------------------------------------------------
['Open Price GR', 'High Price GR', 'No. of Trades GR', 'Alpha GR']
Linear Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 420
Values in Opposite direction <----- ----->  448
Percentage of correct direction :  0.4838709677419355
Ridge Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 418
Values in Opposite directi

Ridge Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 449
Values in Opposite direction <----- ----->  433
Percentage of correct direction :  0.5090702947845805
Lasso Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 446
Values in Opposite direction <----- ----->  436
Percentage of correct direction :  0.5056689342403629
Elastic Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 446
Values in Opposite direction <----- ----->  436
Percentage of correct direction :  0.5056689342403629
Features obtained from p-values less than 0.05 : 
-------------------------------------------------
['Open Price GR', 'Low Price GR']
Linear Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 446
Values in Opposite direction <----- ----->  436
Percentag

Ridge Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 453
Values in Opposite direction <----- ----->  429
Percentage of correct direction :  0.5136054421768708
Lasso Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 443
Values in Opposite direction <----- ----->  439
Percentage of correct direction :  0.5022675736961452
Elastic Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 450
Values in Opposite direction <----- ----->  432
Percentage of correct direction :  0.5102040816326531
Features obtained from f-values greater than 100 : 
--------------------------------------
['Open Price GR', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR']
Linear Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 443
Values in Opposite di

Elastic Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 483
Values in Opposite direction <----- ----->  416
Percentage of correct direction :  0.5372636262513905
*****************************************************************************************
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['Alpha GR', 'Beta GR']
Linear Model fitted using columns obtained from feature importance using ForwardSelection : 
Values in Same direction -----> -----> 487
Values in Opposite direction <----- ----->  412
Percentage of correct direction :  0.5417130144605117
Ridge Model fitted using columns obtained from feature importance using ForwardSelection : 
Values in Same direction -----> -----> 483
Values in Opposite direction <----- ----->  416
Perce

Ridge Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 400
Values in Opposite direction <----- ----->  432
Percentage of correct direction :  0.4807692307692308
Lasso Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 401
Values in Opposite direction <----- ----->  431
Percentage of correct direction :  0.48197115384615385
Elastic Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 403
Values in Opposite direction <----- ----->  429
Percentage of correct direction :  0.484375
Features obtained from p-values less than 0.1 : 
-------------------------------------------------
['Open Price GR', 'High Price GR', 'Close Price GR', 'Spread High-Low GR', 'Spread Close-Open GR', 'Alpha GR', 'Beta GR']
Linear Model fitted using columns obtained from feature importance using PValue : 
Values in Same directi

Ridge Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 388
Values in Opposite direction <----- ----->  444
Percentage of correct direction :  0.46634615384615385
Lasso Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 409
Values in Opposite direction <----- ----->  423
Percentage of correct direction :  0.49158653846153844
Elastic Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 407
Values in Opposite direction <----- ----->  425
Percentage of correct direction :  0.4891826923076923
Features obtained from f-values greater than 1000 : 
--------------------------------------
[]
*****************************************************************************************
Maximum correct direction values are obtained for LinearFIPValue0.2 with a percentage of 0.49399038461538464.
Maximum correct dire

Elastic Model fitted using columns obtained from feature importance using ForwardSelection : 
Values in Same direction -----> -----> 372
Values in Opposite direction <----- ----->  320
Percentage of correct direction :  0.5375722543352601
*****************************************************************************************
Features Importance using Backward Elimination Method
*****************************************************************************************
Features obtained from Backward Elimination method : 
--------------------------------------
['Beta GR', 'Revenue GR']
Linear Model fitted using columns obtained from feature importance using BackwardElimination : 
Values in Same direction -----> -----> 375
Values in Opposite direction <----- ----->  317
Percentage of correct direction :  0.541907514450867
Ridge Model fitted using columns obtained from feature importance using BackwardElimination : 
Values in Same direction -----> -----> 378
Values in Opposite direction <

Lasso Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 386
Values in Opposite direction <----- ----->  438
Percentage of correct direction :  0.4684466019417476
Elastic Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 387
Values in Opposite direction <----- ----->  437
Percentage of correct direction :  0.4696601941747573
Features obtained from p-values less than 0.2 : 
-------------------------------------------------
['Close Price GR', 'No.of Shares GR', 'Total Turnover (Rs.) GR', 'Spread High-Low GR', 'Alpha GR', 'Beta GR', 'Revenue GR', 'Income GR']
Linear Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 393
Values in Opposite direction <----- ----->  431
Percentage of correct direction :  0.47694174757281554
Ridge Model fitted using columns obtained from feature importance using PValue 

Ridge Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 394
Values in Opposite direction <----- ----->  384
Percentage of correct direction :  0.506426735218509
Lasso Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 399
Values in Opposite direction <----- ----->  379
Percentage of correct direction :  0.512853470437018
Elastic Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 399
Values in Opposite direction <----- ----->  379
Percentage of correct direction :  0.512853470437018
Features obtained from p-values less than 0.05 : 
-------------------------------------------------
['Beta GR']
Linear Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 370
Values in Opposite direction <----- ----->  408
Percentage of correct direction : 

Ridge Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 394
Values in Opposite direction <----- ----->  384
Percentage of correct direction :  0.506426735218509
Lasso Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 399
Values in Opposite direction <----- ----->  379
Percentage of correct direction :  0.512853470437018
Elastic Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 399
Values in Opposite direction <----- ----->  379
Percentage of correct direction :  0.512853470437018
Features obtained from f-values greater than 100 : 
--------------------------------------
['Open Price GR', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'Alpha GR']
Linear Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 372
Values in Op

Elastic Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 366
Values in Opposite direction <----- ----->  346
Percentage of correct direction :  0.5140449438202247
*****************************************************************************************
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['High Price GR', 'Beta GR', 'Open Price GR']
Linear Model fitted using columns obtained from feature importance using ForwardSelection : 
Values in Same direction -----> -----> 361
Values in Opposite direction <----- ----->  351
Percentage of correct direction :  0.5070224719101124
Ridge Model fitted using columns obtained from feature importance using ForwardSelection : 
Values in Same direction -----> -----> 355
Values in Opposite direction <-

Ridge Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 326
Values in Opposite direction <----- ----->  317
Percentage of correct direction :  0.5069984447900466
Lasso Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 352
Values in Opposite direction <----- ----->  291
Percentage of correct direction :  0.5474339035769828
Elastic Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 350
Values in Opposite direction <----- ----->  293
Percentage of correct direction :  0.5443234836702955
Features obtained from p-values less than 0.05 : 
-------------------------------------------------
['High Price GR', 'Low Price GR', 'WAP GR', 'No. of Trades GR', 'Net Profit GR']
Linear Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 312
Values in 

For stock :  533287
#################################################################################################################
Features Importance using Coefficients
*****************************************************************************************
Features obtained from coefficients greater than 0.1 : 
--------------------------------------
[]
*****************************************************************************************
Features Importance using p-value
*****************************************************************************************
Features obtained from p-values less than 0.02 : 
-------------------------------------------------
['No. of Trades GR']
Linear Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 297
Values in Opposite direction <----- ----->  329
Percentage of correct direction :  0.4744408945686901
Ridge Model fitted using columns obtained from feature importance using PVal

Ridge Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 324
Values in Opposite direction <----- ----->  302
Percentage of correct direction :  0.5175718849840255
Lasso Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 308
Values in Opposite direction <----- ----->  318
Percentage of correct direction :  0.49201277955271566
Elastic Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 308
Values in Opposite direction <----- ----->  318
Percentage of correct direction :  0.49201277955271566
Features obtained from f-values greater than 10 : 
--------------------------------------
['Close Price GR', 'Alpha GR']
Linear Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 316
Values in Opposite direction <----- ----->  310
Percentage of correc

Elastic Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 229
Values in Opposite direction <----- ----->  195
Percentage of correct direction :  0.5400943396226415
*****************************************************************************************
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['WAP GR', 'Income GR']
Linear Model fitted using columns obtained from feature importance using ForwardSelection : 
Values in Same direction -----> -----> 232
Values in Opposite direction <----- ----->  192
Percentage of correct direction :  0.5471698113207547
Ridge Model fitted using columns obtained from feature importance using ForwardSelection : 
Values in Same direction -----> -----> 234
Values in Opposite direction <----- ----->  190
Perce

Ridge Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 58
Values in Opposite direction <----- ----->  59
Percentage of correct direction :  0.49572649572649574
Lasso Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 58
Values in Opposite direction <----- ----->  59
Percentage of correct direction :  0.49572649572649574
Elastic Model fitted using columns obtained from feature importance using PValue : 
Values in Same direction -----> -----> 58
Values in Opposite direction <----- ----->  59
Percentage of correct direction :  0.49572649572649574
Features obtained from p-values less than 0.05 : 
-------------------------------------------------
['Open Price GR', 'High Price GR', 'Close Price GR', 'WAP GR', 'No. of Trades GR', 'Deliverable Quantity GR', '% Deli. Qty to Traded Qty GR', 'Spread High-Low GR']
Linear Model fitted using columns obtained from feature 

Elastic Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 58
Values in Opposite direction <----- ----->  59
Percentage of correct direction :  0.49572649572649574
Features obtained from f-values greater than 10 : 
--------------------------------------
['Close Price GR', 'WAP GR', 'Deliverable Quantity GR']
Linear Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 58
Values in Opposite direction <----- ----->  59
Percentage of correct direction :  0.49572649572649574
Ridge Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 58
Values in Opposite direction <----- ----->  59
Percentage of correct direction :  0.49572649572649574
Lasso Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 58
Values in Opposite direction <----- ----->  59
Pe

Lasso Model fitted using columns obtained from feature importance using BackwardElimination : 
Values in Same direction -----> -----> 64
Values in Opposite direction <----- ----->  66
Percentage of correct direction :  0.49230769230769234
Elastic Model fitted using columns obtained from feature importance using BackwardElimination : 
Values in Same direction -----> -----> 64
Values in Opposite direction <----- ----->  66
Percentage of correct direction :  0.49230769230769234
*****************************************************************************************
Features Importance using f-value
*****************************************************************************************
Features obtained from f-values greater than 1 : 
--------------------------------------
['Open Price GR', 'Low Price GR', '% Deli. Qty to Traded Qty GR']
Linear Model fitted using columns obtained from feature importance using FValue : 
Values in Same direction -----> -----> 69
Values in Opposite directi

In [37]:
for name,table in tables.items():
    print(table)

+-------------------------------------------------------------------------------------------------------------------------------------------------+
|                                                                Linear Regression                                                                |
+--------------------------------------------+----------------------+------------+----------+----------+-----------+---------------+--------------+
|                  Company                   |        Method        | Percentage |   RMSE   |   MAE    |    MSE    |   Confidence  | rsquared_adj |
+--------------------------------------------+----------------------+------------+----------+----------+-----------+---------------+--------------+
| 500085-CHAMBAL FERTILISERS & CHEMICALS LTD |      PValue 0.2      |  0.48044   | 0.030303 | 0.020423 |  0.000918 |   -0.001206   |  -0.000534   |
| 500085-CHAMBAL FERTILISERS & CHEMICALS LTD |  ForwardSelection    |  0.501222  | 0.029949 | 0.020288 |  0.0008

In [38]:
final_df

Unnamed: 0,Company,Model,Method,Percentage
1,500085-CHAMBAL FERTILISERS & CHEMICALS LTD,Ridge Regression,RidgeFIFValue1,0.506112
0,500085-CHAMBAL FERTILISERS & CHEMICALS LTD,Linear Regression,LinearFIBackwardElimination,0.501222
2,500085-CHAMBAL FERTILISERS & CHEMICALS LTD,Lasso Regression,LassoFIFValue10,0.497555
3,500085-CHAMBAL FERTILISERS & CHEMICALS LTD,Elastic Net Regression,ElasticFIFValue10,0.497555
4,500112-SBIN,Linear Regression,LinearFIPValue0.05,0.528953
...,...,...,...,...
122,539921-SHANTI EDUCATIONAL INITIATIVES LTD,Lasso Regression,LassoFIFValue10,0.495726
126,542602-EMBASSY OFFICE PARKS REIT,Lasso Regression,LassoFIPValue0.1,0.569231
127,542602-EMBASSY OFFICE PARKS REIT,Elastic Net Regression,ElasticFIPValue0.1,0.569231
124,542602-EMBASSY OFFICE PARKS REIT,Linear Regression,LinearFICoefficients0.1,0.538462


In [39]:
# orig_stdout = sys.stdout
# sys.stdout = open("500112res.txt", "w")

In [40]:
# path = "C:\\Users\\venu\\Desktop\\Stock Market Analysis\\Data\\Stock\\gr500112.csv"
# df_lin = pd.read_csv(path)
# name = path.split("\\")[-1]
# stock = name[2 : 8]
# print("For stock : ", stock)
# print("#################################################################################################################")
# get_results_from_each_set(df_lin, name)
# print("#################################################################################################################")

In [41]:
# sys.stdout.close()
# sys.stdout=orig_stdout