In [1]:
import pandas as pd
import numpy as np 
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from math import sqrt
import statsmodels.api as sm
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import LassoCV
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from prettytable import PrettyTable
import time 
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cmx
import matplotlib.colors as colors
import matplotlib.patches as patches
import warnings; warnings.simplefilter('ignore')
import sys
from sklearn.feature_selection import f_classif
from sklearn.feature_selection import f_regression
from prettytable import PrettyTable
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

from datetime import datetime, timedelta

In [2]:
path = os.getcwd()
path

'C:\\Users\\venu\\Desktop\\Stock Market Analysis'

# PreProcessing Data

In [3]:
def pre_process_data(data,null_threshold):
    data.drop(columns=['Unixtime','Date'],axis=1,inplace=True)
    total = data.shape[0]
    for col in data.columns:
        if ((null_threshold * total / 100) < data[col].isnull().sum()):
            data.drop(columns=[col],axis=1,inplace=True)
    data.replace([np.inf, -np.inf], np.nan, inplace=True)
    data.dropna(axis=0,inplace=True)
    return data

# Necessary Columns

In [4]:
def data_with_necessary_columns(data):
    necessary_columns = ['Dividend Value', '% Return of Company', '% Return of SP500', 'Next Day Open Price', 'Next Day High Price',
                     'Next Day Low Price', 'Next Day Close Price', 'Open Price GR', 'High Price GR', 'Low Price GR', 'Close Price GR',
                     'Next Day Open Price GR', 'Next Day High Price GR', 'Next Day Low Price GR', 'Next Day Close Price GR',
                     'WAP GR', 'No.of Shares GR', 'No. of Trades GR', 'Total Turnover (Rs.) GR', 'Deliverable Quantity GR',
                     '% Deli. Qty to Traded Qty GR', 'Spread High-Low GR', 'Spread Close-Open GR', 'Alpha GR', 'Beta GR',
                     'Sequential Increase %', 'Sequential Decrease %', 'Max Inc % in 90 days', 'Max Dec % in 90 days',
                     'Min Inc % in 90 days', 'Min Dec % in 90 days', 'Avg Inc % in 90 days', 'Avg Dec % in 90 days',
                     'Max Inc % in 180 days', 'Max Dec % in 180 days', 'Min Inc % in 180 days', 'Min Dec % in 180 days',
                     'Avg Inc % in 180 days', 'Avg Dec % in 180 days', 'Max Inc % in 365 days', 'Max Dec % in 365 days',
                     'Min Inc % in 365 days', 'Min Dec % in 365 days', 'Avg Inc % in 365 days', 'Avg Dec % in 365 days',
                     'Revenue GR', 'Dividend Value GR', 'Income GR', 'Expenditure GR', 'Net Profit GR', 'EPS GR',
                     'CP % LV 7 days', 'CP % HV 7 days', 'CP % BA 7 days', 'CP % LV 30 days', 'CP % HV 30 days', 'CP % BA 30 days',
                     'CP % LV 90 days', 'CP % HV 90 days', 'CP % BA 90 days', 'CP % LV 180 days', 'CP % HV 180 days',
                     'CP % BA 180 days', 'CP % LV 365 days', 'CP % HV 365 days', 'CP % BA 365 days', 'LowerBandInLast1Months',
                     'UpperBandInLast1Months', 'LowerBandInNext1Months', 'UpperBandInNext1Months', 'LowerBandInLast3Months',
                     'UpperBandInLast3Months', 'LowerBandInNext3Months', 'UpperBandInNext3Months', 'LowerBandInLast6Months',
                     'UpperBandInLast6Months', 'LowerBandInNext6Months', 'UpperBandInNext6Months', 'LowerBandInLast9Months',
                     'UpperBandInLast9Months', 'LowerBandInNext9Months', 'UpperBandInNext9Months', 'LowerBandInLast12Months',
                     'UpperBandInLast12Months', 'LowerBandInNext12Months', 'UpperBandInNext12Months', 'LowerBandInLast24Months',
                     'UpperBandInLast24Months', 'LowerBandInNext24Months', 'UpperBandInNext24Months']
    
    
    n_cols = []
    d_cols = list(data.columns)
    for col in necessary_columns:
        if (col in d_cols):
            n_cols.append(col)
            
    data = data[n_cols]
    return data

# Removing columns based on dependent column

In [5]:
def dependent_column(data,column):
    cols = [col for col in data.columns if ("next" not in col.lower() and col.lower().endswith(" gr"))]
    cols.append(column)
    data = data[cols]
    return (data,column)

# For predicting UB, LB percentage columns

In [6]:
def remove_ycolumn_columns(data, column):
    cols = [col for col in data.columns]
    cols.remove(column)
    cols.append(column)
    data = data[cols]
    return data

In [7]:
def remove_notgr_columns(data):
    print(len(data.columns))
    df = pd.read_csv("C:\\Users\\venu\\Desktop\\Stock Market Analysis\\Data\\Stock1\\500002.csv")
    to_be_removed_columns = set(df.columns)
    total_columns = set(data.columns)
    final_columns = total_columns - to_be_removed_columns
    f_cols = list(final_columns)
    for c in f_cols:
        if ((c + " GR") in f_cols):
            f_cols.remove(c)
    data = data[f_cols]
    print(f_cols)
    print("-------------------------------------------")
    print(data)
    print("-------------------------------------------")
    return data

In [8]:
def remove_columns_startswithnext_UBLB(data):
    cols = [col for col in data.columns if (not (col.lower().startswith("next")))]
    data = data[cols]
    return data

In [9]:
def remove_next_columns_UBLB(data):
    cols = [col for col in data.columns if ("next" not in col.lower())]
    data = data[cols]
    return data

In [10]:
def remove_CP_columns_UBLB(data):
    cols = [col for col in data.columns if ("cp" not in col.lower())]
    data = data[cols]
    return data

In [11]:
def remove_LowerBand_columns_UBLB(data, column):
    cols = [col for col in data.columns if ("lv" not in col.lower() and "low" not in col.lower())]
    cols.append(column)
    data = data[cols]
    return (data, column)

In [12]:
def remove_UpperBand_columns_UBLB(data, column):
    cols = [col for col in data.columns if ("hv" not in col.lower() and "upper" not in col.lower())]
    cols.append(column)
    data = data[cols]
    return (data, column)

In [13]:
# df1 = pd.read_csv("C:\\Users\\venu\\Desktop\\Stock Market Analysis\\Data\\Stock\\UBLBPASTgr500112.csv")

In [14]:
# df1 = remove_next_columns_UBLB(df1)
# list(df1.columns)

In [15]:
# df1 = remove_CP_columns_UBLB(df1)
# list(df1.columns)

In [16]:
# df1 = remove_LowerBand_columns_UBLB(df1, "LowerBandInLast3Months")
# list(df1.columns)

# OLS Regression

In [17]:
def OLS_Regression(X_train,Y_train):
    X_train = np.array(X_train, dtype=float)
    ols_model = sm.OLS(Y_train, X_train).fit()
#     print(list(zip(list(cols),ols_model.pvalues)))
    rsquared_adj = ols_model.rsquared_adj
    aic = ols_model.aic
    bic = ols_model.bic
    fvalue = ols_model.fvalue
    return {"rsquared_adj":rsquared_adj,"aic":aic,"bic":bic,"fvalue":fvalue}

# Linear Regression

In [18]:
def linear_regression(data, y):
    # print("------ Linear Regression ------")
    X = data[data.columns[:-1]]
    Y = data[y]
    
    print(X.shape, Y.shape)
    
    X_test = X[ : int(X.shape[0] * 0.3)]
    X_train = X[int(X.shape[0] * 0.3) : ]
    Y_test = Y[ : int(X.shape[0] * 0.3)]
    Y_train = Y[int(X.shape[0] * 0.3) :]
    
    print(X.shape)
    print(X_train.shape, X_test.shape)
    

#     X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state = 0)
    model = LinearRegression(fit_intercept = True)  
    model.fit(X_train, Y_train)
    pred = model.predict(X_test)
    confidence = model.score(X_test, Y_test)
    rmse = sqrt(metrics.mean_squared_error(Y_test, pred))
    mae = metrics.mean_absolute_error(Y_test, pred)
    mse = metrics.mean_squared_error(Y_test, pred)
    r2_sc = r2_score(Y_test, pred)
    return {"root_mean_squared_error":rmse,"mean_absolute_error":mae,"mean_squared_error":mse,"Confidence" : confidence, "Predicted" : pred, "Actual" : Y_test, "r2_score" : r2_sc}

# linear regression with forward selection

In [19]:
def forward_selection(data, target, significance_level=0.05):
    initial_features = data.columns.tolist()
    best_features = []
    while (len(initial_features)>0):
        remaining_features = list(set(initial_features)-set(best_features))
        new_pval = pd.Series(index=remaining_features)
        for new_column in remaining_features:
            model = sm.OLS(target, sm.add_constant(data[best_features+[new_column]]).astype(float)).fit()
            new_pval[new_column] = model.pvalues[new_column]
        min_p_value = new_pval.min()
        if(min_p_value<significance_level):
            best_features.append(new_pval.idxmin())
        else:
            break
    return best_features

In [20]:
def linear_regression_forward_selection(data,y):
    # print("------ Linear Regression Forward Selection ------")
    X = data[data.columns[:-1]]
    Y = data[y].values
    forward_features = forward_selection(X,Y)
    print("Features obtained from Forward Selection : ")
    print(forward_features)
    return linear_regression(data[forward_features+[y]],y)

# linear regression with backward elimination

In [21]:
def backward_elimination(data, target,significance_level = 0.05):
    features = data.columns.tolist()
    while(len(features)>0):
        features_with_constant = sm.add_constant(data[features]).astype(float)
        p_values = sm.OLS(target, features_with_constant).fit().pvalues[1:]
        max_p_value = p_values.max()
        if(max_p_value >= significance_level):
            excluded_feature = p_values.idxmax()
            features.remove(excluded_feature)
        else:
            break 
    return features

In [22]:
def linear_regression_backward_selection(data,y):
    # print("------ Linear Regression Backward Selection ------")
    X = data[data.columns[:-1]]
    Y = data[y].values
    backward_features = backward_elimination(X,Y)
    print("Features obtained from Backward Elimination : ")
    print(backward_features)
    return linear_regression(data[backward_features+[y]],y)

# Using Inbuilt Forward Selection Method

In [23]:
def forward_selection_inbuilt(X,Y,k,score):
    sfs = SFS(LinearRegression(),k_features=k,forward=True,floating=False,scoring = score,cv = 0)
    sfs.fit(X, Y)
    lst = list(sfs.k_feature_names_)
    return lst

In [24]:
def linear_regression_forward_selection_inbuit(data,y):
    # print("------ Linear Regression Forward Selection Inbuilt ------")

    X = data[data.columns[:-1]]
    Y = data[y].values
    scores = ['explained_variance','max_error','neg_mean_absolute_error','neg_mean_squared_error',
                  'neg_root_mean_squared_error','neg_median_absolute_error','r2']
    df = pd.DataFrame(columns=scores,index=range(1,data.shape[1]+1))
    for k in range(1,data.shape[1]+1):
        for score in scores:
            sfs = forward_selection_inbuilt(X,Y,k,score)
            df.loc[k,score] = sfs
    df.to_csv("forwardFeatures.csv",index=None)
    return df

# Using Inbuilt Backward Elimination Method

In [25]:
def backward_selection_inbuilt(X,Y,k,score):
    sfs = SFS(LinearRegression(),k_features=k,forward=False,floating=False,scoring = score,cv = 0)
    sfs.fit(X, Y)
    lst = list(sfs.k_feature_names_)
    return lst

In [26]:
def linear_regression_backward_selection_inbuit(data,y):
    # print("------ Linear Regression Backward Selection Inbuilt ------")
    X = data[data.columns[:-1]]
    Y = data[y].values
    scores = ['explained_variance','max_error','neg_mean_absolute_error','neg_mean_squared_error',
                  'neg_root_mean_squared_error','neg_median_absolute_error','r2']
    df = pd.DataFrame(columns=scores,index=range(1,data.shape[1]+1))
    for k in range(1,data.shape[1]+1):
        for score in scores:
            sfs = backward_selection_inbuilt(X,Y,k,score)
            df.loc[k,score] = sfs
    df.to_csv("backwardFeatures.csv",index=None)
    return df

# Ridge Regression

In [27]:
def bestparams_ridge(alpha,X_train,Y_train):
    
    ridge = Ridge(alpha=1).fit(X_train,Y_train)
    
    param_grid = dict(alpha=alpha)
    
    grid = GridSearchCV(estimator=ridge, param_grid=param_grid, scoring='r2')
    
    grid.fit(X_train,Y_train)
    
    alpha_val = grid.best_estimator_.alpha
    
    return alpha_val

In [28]:
def ridge_regression(data,y):
    
    # print("------ Ridge Regression ------")

    X = data[data.columns[:-1]]
    Y = data[y]
    
    print(X.shape, Y.shape)
    
    X_test = X[ : int(X.shape[0] * 0.3)]
    X_train = X[int(X.shape[0] * 0.3) : ]
    Y_test = Y[ : int(X.shape[0] * 0.3)]
    Y_train = Y[int(X.shape[0] * 0.3) :]
    
#     X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)
    
    # selection of alpha value from the respective array values
    alpha = np.array([1,0.1,0.01,0.001,0.0001,0])
    best = bestparams_ridge(alpha,X_train,Y_train)
    # print("Best Alpha:", best) # best alpha value
    
    # Re-selecting the alpha value based on the above selected alpha value
    alpha1 = np.arange(best-10,best+10)
    best_alpha = bestparams_ridge(alpha1,X_train,Y_train)
    # print("Best Alpha after tuning : ", best_alpha)
    # Ridge regression with the above best alpha value and the train datasets.
    clf = Ridge(alpha=best_alpha)
    clf.fit(X_train, Y_train)
    
    pred = clf.predict(X_test)
    
    confidence = clf.score(X_test, Y_test)
    rmse = sqrt(metrics.mean_squared_error(Y_test, pred))
    mae = metrics.mean_absolute_error(Y_test, pred)
    mse = metrics.mean_squared_error(Y_test, pred)
    r2_sc = r2_score(Y_test, pred)
#     ols_values = OLS_Regression(X_train,Y_train)

    return {"root_mean_squared_error":rmse,"mean_absolute_error":mae,"mean_squared_error":mse,"Confidence" : confidence, "Predicted" : pred, "Actual" : Y_test, "r2_score" : r2_sc}

# Lasso Regression

In [29]:
def bestparams_lasso(alpha,X_train,Y_train):
    
    lasso = Lasso(alpha=1).fit(X_train,Y_train)
    
    param_grid = dict(alpha=alpha)
    
    grid = GridSearchCV(estimator=lasso, param_grid=param_grid, scoring='r2')
    
    grid.fit(X_train,Y_train)
    
    alpha_val = grid.best_estimator_.alpha
    
    return alpha_val

In [30]:
def lasso_regression(data,y):
    
    # print("------ Lasso Regression ------")

    X = data[data.columns[:-1]]
    Y = data[y]
    
    print(X.shape, Y.shape)
    
    
    X_test = X[ : int(X.shape[0] * 0.3)]
    X_train = X[int(X.shape[0] * 0.3) : ]
    Y_test = Y[ : int(X.shape[0] * 0.3)]
    Y_train = Y[int(X.shape[0] * 0.3) :]
    
#     X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)
    
    # selection of alpha value from the respective array values
    alpha = np.array([1,0.1,0.01,0.001,0.0001,0])
    best = bestparams_lasso(alpha,X_train,Y_train)
    # print("Best Alpha:", best) # best alpha value
    
    # Re-selecting the alpha value based on the above selected alpha value
    alpha1 = np.arange(best-10,best+10)
    best_alpha = bestparams_lasso(alpha1,X_train,Y_train)
    # print("Best Alpha after tuning : ", best_alpha)
    # Lasso regression with the above best alpha value and the train datasets.
    clf = Lasso(alpha=best_alpha)
    clf.fit(X_train, Y_train)
    
    pred = clf.predict(X_test)
    
    confidence = clf.score(X_test, Y_test)
    rmse = sqrt(metrics.mean_squared_error(Y_test, pred))
    mae = metrics.mean_absolute_error(Y_test, pred)
    mse = metrics.mean_squared_error(Y_test, pred)
    r2_sc = r2_score(Y_test, pred)
    
#     ols_values = OLS_Regression(X_train,Y_train)

    return {"root_mean_squared_error":rmse,"mean_absolute_error":mae,"mean_squared_error":mse,"Confidence" : confidence, "Predicted" : pred, "Actual" : Y_test, "r2_score" : r2_sc}

# Elastic Regression

In [31]:
def bestparams_elastic(alphas,l1,X_train,Y_train):
    
    elastic_net = ElasticNet(alpha=1, l1_ratio=0.2).fit(X_train, Y_train)
    param_grid = dict(alpha=alphas, l1_ratio=l1)
    
    grid = GridSearchCV(estimator=elastic_net, param_grid=param_grid, scoring='r2')
    
    grid_result = grid.fit(X_train, Y_train)
    
    alpha_val = grid_result.best_estimator_.alpha
    l1_val = grid_result.best_estimator_.l1_ratio
    
    return (alpha_val,l1_val)

In [32]:
def elastic_net_regression(data,y):

    # print("------ Elastic Net Regression ------")
    
    X = data[data.columns[:-1]]
    Y = data[y]
    
    print(X.shape, Y.shape)
    
    X_test = X[ : int(X.shape[0] * 0.3)]
    X_train = X[int(X.shape[0] * 0.3) : ]
    Y_test = Y[ : int(X.shape[0] * 0.3)]
    Y_train = Y[int(X.shape[0] * 0.3) :]
    
#     X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)
    
    # selection of alpha value from the respective array values
    
    alpha = np.array([0,0.1,0.001,0.0001,1])
    l1_ratio = np.array([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
    
    best = bestparams_elastic(alpha,l1_ratio,X_train,Y_train)
#     print("Best Alpha:", best[0]) # best alpha value
    # print("Best l1 - value:", best[1])
    
    # Re-selecting the alpha value based on the above selected alpha value
    
    if (best[0] == 0):
        clf = ElasticNet(alpha=best[0],l1_ratio = best[1])
        clf.fit(X_train, Y_train)
    else:
        alpha1 = np.arange(best[0]/10,best[0]*10)
        best_alpha = bestparams_elastic(alpha1,l1_ratio,X_train,Y_train)
        # print("Best Alpha after tuning : ", best_alpha[0])
        # print("Best l1 after tuning : ", best_alpha[1])
        clf = ElasticNet(alpha=best_alpha[0],l1_ratio = best_alpha[1])
        clf.fit(X_train, Y_train)
    
    pred = clf.predict(X_test)
    
    confidence = clf.score(X_test, Y_test)
    
    rmse = sqrt(metrics.mean_squared_error(Y_test, pred))
    mae = metrics.mean_absolute_error(Y_test, pred)
    mse = metrics.mean_squared_error(Y_test, pred)
    r2_sc = r2_score(Y_test, pred)
    
#     ols_values = OLS_Regression(X_train,Y_train)
    
    # coeff_vs_Regularization(X_train,Y_train)

    return {"root_mean_squared_error":rmse,"mean_absolute_error":mae,"mean_squared_error":mse,"Confidence" : confidence, "Predicted" : pred, "Actual" : Y_test, "r2_score" : r2_sc}
    

In [33]:
def coeff_vs_Regularization(X_train,Y_train):
    coefs = []
    n_alphas = 200
    alphas = np.logspace(-10, -2, n_alphas)

    for a in alphas:
        elastic = ElasticNet(alpha=a)
        elastic.fit(X_train, Y_train)
        coefs.append(elastic.coef_)
    
    ax = plt.gca()

    ax.plot(alphas, coefs)
    ax.set_xscale('log')
    ax.set_xlim(ax.get_xlim()[::-1])  # reverse axis
    plt.xlabel('alpha(log scale)')
    plt.ylabel('Coefficients')
    plt.title('ElasticNet - Coefficients Vs Regularization')
    plt.axis('tight')
    plt.show()

# Finding results from each set of important features

In [34]:
columns = ['Company','Y-Column','Method','RMSE', 'MAE', 'MSE','Confidence', 'r2_score']

In [35]:
df_equity = pd.read_csv("C:\\Users\\venu\\Desktop\\Stock Market Analysis\\Data\\Equity.csv")
security_numbers = df_equity["Security Code"].tolist()
security_names = df_equity["Security Name"].tolist()
companies = {str(k) : v for (k, v) in list(zip(security_numbers, security_names))}
companies["542602"] = "Embassy Office Parks REIT"
companies["500112"] = "State Bank of India"
# companies["542602"]

In [36]:
models = ["Linear Regression","Lasso Regression","Ridge Regression","Elastic Regression"]
tables = {model:PrettyTable() for model in models}
for name,table in tables.items():
    table.field_names = columns

In [37]:
# final_columns = ['Company', 'Model', 'Method', 'r2_score']
final_columns = ['Company', 'Y-Column', 'Model', 'Method', 'RMSE', 'MAE','MSE','r2_score']

In [38]:
final_df = pd.DataFrame(columns = final_columns)
# final_df = pd.read_csv("C:\\Users\\venu\\Desktop\\Stock Market Analysis\\Data\\NextUBLB_Results\\Results_Linear2.csv")
final_df

Unnamed: 0,Company,Y-Column,Model,Method,RMSE,MAE,MSE,r2_score


In [39]:
def create_pretty_table(name,model,result, method, column):
    values = [name[14 : 20 ] + "-" + companies[name[14 : 20]], column, method] + [round(v, 6) for k,v in result.items() if not isinstance(v,dict)]
    tables[model].add_row(values)
    tables[model].title = model

In [40]:
def fit_model(models, df, column, method, value, name, results):
    for model in models:
        if (model == "Linear"):
            model_result = linear_regression(df, column)
        elif (model == "Ridge"):
            model_result = ridge_regression(df, column)
        elif (model == "Lasso"):
            model_result = lasso_regression(df, column)
        else:
            model_result = elastic_net_regression(df, column)
    
        print(model + " Model fitted using columns obtained from feature importance using " + method + " : ")
        pred = model_result['Predicted']
        actual = model_result['Actual']
        pred_actual = pd.DataFrame(list(zip(actual, pred)), 
                   columns =['Actual Values', 'Predicted Values'])
        pred_actual.to_csv("C:\\Users\\venu\\Desktop\\Stock Market Analysis\\Data\\NextUBLB_Results\\" + name[14 : 20] + "_" + column + "_" + model + "FI" + method + str(value) + ".csv" , index=False) 
        
#         print(model_result)
        results[model + "FI" + method + str(value)] = {}
        results[model + "FI" + method + str(value)]['r2_score'] = model_result['r2_score']
        results[model + "FI" + method + str(value)]['RMSE'] = model_result['root_mean_squared_error']
        results[model + "FI" + method + str(value)]['MAE'] = model_result['mean_absolute_error']
        results[model + "FI" + method + str(value)]['MSE'] = model_result['mean_squared_error']
        
        del model_result['Predicted']
        del model_result['Actual']
        
        print(model_result)
        print("R2_Score is ---> ", model_result['r2_score'])
        
        create_pretty_table(name ,model + " Regression" ,model_result, method + " " + value, column)

In [41]:
def get_results_from_AllFeaturesConsideration(df1, name, column, results):
    print("All Features are considered : ")
    print("*****************************************************************************************")
    method = "AllFeatureConsideration"
    models = ["Linear", "Ridge", "Lasso", "Elastic"]
    X = df1[df1.columns[:-1]]
    Y = df1[column].values
    print("Features are : ") 
    print("--------------------------------------")
    print(list(df1.columns))
    df_all = df1[df1.columns]
    fit_model(models, df_all, column, method, '',name, results)

    print("*****************************************************************************************")

In [42]:
def get_results_from_FI_Coeffiecients(df, name, column, results):
    print("Features Importance using Coefficients")
    print("*****************************************************************************************")
    X = df[df.columns[:-1]]
    Y = df[column].values
    model_linear = LinearRegression(fit_intercept=True)
    model_linear.fit(X, Y)
    col_coef = list(df.columns)
#     print(list(model_linear.coef_))
    res_coef = [round(i,6) for i in list(model_linear.coef_)]
    rc_coef = list(zip(col_coef, res_coef))
    coef_features = []
    coef = [0.1]
    method = "Coefficients"
    models = ["Linear", "Ridge", "Lasso", "Elastic"]
    for cf in coef:
        for i in range(len(rc_coef)):
            if ((abs(rc_coef[i][1])) > cf):
                coef_features.append(rc_coef[i][0])
        print("Features obtained from coefficients greater than " + str(cf) + " : ")
        print("--------------------------------------")
        print(coef_features)
        if (len(coef_features) == 0):
            continue
        coef_features.append(column)
        df_fic = df[coef_features]
        fit_model(models, df_fic, column, method, str(cf), name, results)
    print("*****************************************************************************************")

In [43]:
def get_results_from_FI_PValue(df, name, column, results):
    print("Features Importance using p-value")
    print("*****************************************************************************************")
    X = df[df.columns[:-1]]
    Y = df[column].values
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)
    X_train = np.array(X_train, dtype=float)
    ols_model = sm.OLS(Y_train, X_train).fit()
    col_pval = list(df.columns)
    pvals = list(ols_model.pvalues)
    pvals_cols = list(zip(col_pval, pvals))
    p = [0.02, 0.05, 0.1, 0.2]
    method = "PValue"
    models = ["Linear", "Ridge", "Lasso", "Elastic"]
    for pv in p:
        pval_features = []
        for i in range(len(pvals_cols)):
            if (pvals_cols[i][1] < pv):
                pval_features.append(pvals_cols[i][0])
        print("Features obtained from p-values less than " + str(pv) + " : ")
        print("-------------------------------------------------")
        print(pval_features)
        if (len(pval_features) == 0):
            continue
        pval_features.append(column)
        df_fip = df[pval_features]
        fit_model(models, df_fip, column, method, str(pv), name, results)
    print("*****************************************************************************************")
    

In [44]:
def get_results_from_FI_FValues(df, name, column, results):
    print("Features Importance using f-value")
    print("*****************************************************************************************")
    X = df[df.columns[:-1]]
    Y = df[column].values
    fval_cols = X.columns
    freg_res = f_regression(X, Y)
#     print(freg_res[0])
    fvals = freg_res[0]
    fc = list(zip(fval_cols, fvals))
    f = [1, 10, 100, 1000]
    method = "FValue"
    models = ["Linear", "Ridge", "Lasso", "Elastic"]
    for fv in f :
        fval_features = []
        for i in range(len(fc)):
            if ((abs(fc[i][1])) > fv):
                fval_features.append(fc[i][0])
        print("Features obtained from f-values greater than " + str(fv) + " : ")
        print("--------------------------------------")
        print(fval_features)
        if (len(fval_features) == 0):
            continue
        fval_features.append(column)
        df_fif = df[fval_features]
        fit_model(models, df_fif, column, method, str(fv), name, results)
    print("*****************************************************************************************")


In [45]:
def get_results_from_FI_ForwardSelection(df1, name, column, results):
    print("Features Importance using Forward Selection Method")
    print("*****************************************************************************************")
    method = "ForwardSelection"
    models = ["Linear", "Ridge", "Lasso", "Elastic"]
    X = df1[df1.columns[:-1]]
    Y = df1[column].values
    forward_features = forward_selection(X,Y)
    print("Features obtained from Forward Selection method : ") 
    print("--------------------------------------")
    print(forward_features)
    if (len(forward_features) != 0):
        forward_features.append(column)
        df_fs = df1[forward_features]
        fit_model(models, df_fs, column, method, '', name, results)
    print("*****************************************************************************************")

In [46]:
def get_results_from_FI_BackwardElimination(df1, name, column, results):
    print("Features Importance using Backward Elimination Method")
    print("*****************************************************************************************")
    method = "BackwardElimination"
    models = ["Linear", "Ridge", "Lasso", "Elastic"]
    X = df1[df1.columns[:-1]]
    Y = df1[column].values
    backward_features = backward_elimination(X,Y)
    print("Features obtained from Backward Elimination method : ") 
    print("--------------------------------------")
    print(backward_features)
    if (len(backward_features) != 0):
        backward_features.append(column)
        df_be = df1[backward_features]
        fit_model(models, df_be, column, method, '', name, results)

    print("*****************************************************************************************")

In [47]:
def get_results_from_each_set(data, name, final_df, column):
    df1 = pre_process_data(data, 60)
    df1 = data_with_necessary_columns(df1)
#     print("1 ", list(df1.columns), len(df1.columns))
#     print("-------------------------------------------")
    df1 = remove_columns_startswithnext_UBLB(df1)
#     print("2 ", list(df1.columns), len(df1.columns))
#     print("-------------------------------------------")
    df1 = remove_ycolumn_columns(df1, column)
#     print("3 ", list(df1.columns), len(df1.columns))
#     print("-------------------------------------------")
    
    results = {}
#     df1_columns = df1.columns
#     scaler = StandardScaler()
#     df1 = scaler.fit_transform(df1)
#     print(df1)
#     df1 = pd.DataFrame(df1, columns = df1_columns)

    get_results_from_AllFeaturesConsideration(df1, name, column, results)
    get_results_from_FI_Coeffiecients(df1, name, column, results)
    get_results_from_FI_PValue(df1, name, column, results)
    get_results_from_FI_ForwardSelection(df1, name, column, results)
    get_results_from_FI_BackwardElimination(df1, name, column, results)
    get_results_from_FI_FValues(df1, name, column, results)
#     print(results)
#     print(len(results))
#     print(results.items())
    linear = {k : v for (k, v) in results.items() if ("Linear" in k)}
    ridge = {k : v for (k, v) in results.items() if ("Ridge" in k)}
    lasso = {k : v for (k, v) in results.items() if ("Lasso" in k)}
    elastic = {k : v for (k, v) in results.items() if ("Elastic" in k)}
    sorted_results = sorted(results.items(), key=lambda item: item[1]['r2_score'])
    sorted_linear = sorted(linear.items(), key=lambda item: item[1]['r2_score'])
    sorted_ridge = sorted(ridge.items(), key=lambda item: item[1]['r2_score'])
    sorted_lasso = sorted(lasso.items(), key=lambda item: item[1]['r2_score'])
    sorted_elastic = sorted(elastic.items(), key=lambda item: item[1]['r2_score'])
    linear_row = {'Company' : name[14 : 20] + "-" + companies[name[14 : 20]], 'Y-Column' : column, 'Model' : 'Linear Regression', 'Method' : sorted_linear[-1][0], 'RMSE' : sorted_linear[-1][1]['RMSE'], 'MAE' : sorted_linear[-1][1]['MAE'], 'MSE' : sorted_linear[-1][1]['MSE'], 'r2_score' : sorted_linear[-1][1]['r2_score']}
    ridge_row = {'Company' : name[14 : 20] + "-" + companies[name[14 : 20]], 'Y-Column' : column, 'Model' : 'Ridge Regression', 'Method' : sorted_ridge[-1][0], 'RMSE' : sorted_ridge[-1][1]['RMSE'], 'MAE' : sorted_ridge[-1][1]['MAE'], 'MSE' : sorted_ridge[-1][1]['MSE'], 'r2_score' : sorted_ridge[-1][1]['r2_score']}
    lasso_row = {'Company' : name[14 : 20] + "-" + companies[name[14 : 20]], 'Y-Column' : column, 'Model' : 'Lasso Regression', 'Method' : sorted_lasso[-1][0], 'RMSE' : sorted_lasso[-1][1]['RMSE'], 'MAE' : sorted_lasso[-1][1]['MAE'], 'MSE' : sorted_lasso[-1][1]['MSE'], 'r2_score' : sorted_lasso[-1][1]['r2_score']}
    elastic_row = {'Company' : name[14 : 20] + "-" + companies[name[14 : 20]], 'Y-Column' : column, 'Model' : 'Elastic Net Regression', 'Method' : sorted_elastic[-1][0], 'RMSE' : sorted_elastic[-1][1]['RMSE'], 'MAE' : sorted_elastic[-1][1]['MAE'], 'MSE' : sorted_elastic[-1][1]['MSE'], 'r2_score' : sorted_elastic[-1][1]['r2_score']}
    final_df = final_df.append(linear_row, ignore_index = True)
    final_df = final_df.append(ridge_row, ignore_index = True)
    final_df = final_df.append(lasso_row, ignore_index = True)
    final_df = final_df.append(elastic_row, ignore_index = True)
    print("Maximum correct direction values are obtained for {} with r2_score of {}.".format(sorted_results[-1][0], sorted_results[-1][1]['r2_score']))
    print("Maximum correct direction values for Linear Model are obtained for {} with r2_score of {}.".format(sorted_linear[-1][0], sorted_linear[-1][1]['r2_score']))
    print("Maximum correct direction values for Ridge Model are obtained for {} with r2_score of {}.".format(sorted_ridge[-1][0], sorted_ridge[-1][1]['r2_score']))
    print("Maximum correct direction values for Lasso Model are obtained for {} with r2_score of {}.".format(sorted_lasso[-1][0], sorted_lasso[-1][1]['r2_score']))
    print("Maximum correct direction values for Elastic Model are obtained for {} with r2_score of {}.".format(sorted_elastic[-1][0], sorted_elastic[-1][1]['r2_score']))
    return final_df

In [48]:
# %%time

# # c = 0
# for filename in os.listdir(os.path.join(path,"Data/Stock1")):
#     if (filename.startswith("UBLBPASTNEXTgr")):
#         df_linear = pd.read_csv(os.path.join(path,"Data\Stock1\\" + filename))
#         df_linear["Date"] = pd.to_datetime(df_linear["Date"])
#         df_linear = df_linear.sort_values(by = ['Date'], ascending = [False])
#         df_linear.to_csv("C:\\Users\\venu\\Desktop\\Stock Market Analysis\\Data\\Stock1\\" + filename, index = False)


In [49]:
%%time


comp = ["500013"]

# c = 0
for filename in os.listdir(os.path.join(path,"Data/Stock1")):
    if (filename.startswith("UBLBPASTNEXTgr")):
        print(filename)
#         c += 1
#         print(c)
#         if (c == 11):
#             break
        name = os.path.join(path, "Data\Stock1\\" + filename).split("\\")[-1]
        stock = name[14 : 20]
        if(stock not in comp):
            print("continue")
            continue
        
        fd_df = pd.DataFrame(columns = final_columns)
        y_columns = ["LowerBandInNext1Months", "LowerBandInNext3Months", "LowerBandInNext6Months",
                    "LowerBandInNext9Months", "LowerBandInNext12Months", "LowerBandInNext24Months", 
                     "UpperBandInNext1Months", "UpperBandInNext3Months", "UpperBandInNext6Months",
                     "UpperBandInNext9Months", "UpperBandInNext12Months", "UpperBandInNext24Months"]
        for i in range(0, len(y_columns)):
            col = y_columns[i]
            df_linear = pd.read_csv(os.path.join(path,"Data\Stock1\\" + filename))
        
            value = 0
            if ("24" in col):
                value = 24
            elif ("12" in col):
                value = 12
            elif ("9" in col):
                value = 9
            elif ("6" in col):
                value = 6
            elif ("3" in col):
                value = 3
            else:
                value = 1
            
            
            print(len(df_linear.columns))
            df_linear["Date"] = pd.to_datetime(df_linear["Date"])
            head_date = df_linear.loc[0, "Date"]
            end_date = head_date - timedelta(days = value * 30)
            tail = df_linear.shape[0] - 1
            tail_date = df_linear.loc[tail, "Date"]
            start_date = tail_date + timedelta(days = value * 30)
            mask = (df_linear["Date"] >= start_date) & (df_linear["Date"] <= end_date)
            df_lin = df_linear.loc[mask]
            
            
            print("For stock : ", stock, "and Y-Column is : ", col)
            print(value)
            print("#################################################################################################################")
            f_df = get_results_from_each_set(df_lin, name, fd_df, col)
            final_df = final_df.append(f_df, ignore_index = True)
            print("#################################################################################################################")

        
final_df = final_df.sort_values(by = ['Company', 'Y-Column', 'r2_score'], ascending = [True, True, False])
final_df.to_csv('C:\\Users\\venu\\Desktop\\Stock Market Analysis\\Data\\NextUBLB_Results\\Results_Linear_500013.csv', index = False) 

UBLBPASTNEXTgr500002.csv
continue
UBLBPASTNEXTgr500003.csv
continue
UBLBPASTNEXTgr500008.csv
continue
UBLBPASTNEXTgr500009.csv
continue
UBLBPASTNEXTgr500010.csv
continue
UBLBPASTNEXTgr500012.csv
continue
UBLBPASTNEXTgr500013.csv
137
For stock :  500013 and Y-Column is :  LowerBandInNext1Months
1
#################################################################################################################
All Features are considered : 
*****************************************************************************************
Features are : 
--------------------------------------
['Dividend Value', '% Return of Company', 'Open Price GR', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'No.of Shares GR', 'No. of Trades GR', 'Total Turnover (Rs.) GR', 'Deliverable Quantity GR', '% Deli. Qty to Traded Qty GR', 'Spread High-Low GR', 'Spread Close-Open GR', 'Sequential Increase %', 'Sequential Decrease %', 'Max Inc % in 90 days', 'Max Dec % in 90 days', 'Min Inc % in 90 days', 

Ridge Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 46.41126151833444, 'mean_absolute_error': 22.74817484106972, 'mean_squared_error': 2154.0051957232313, 'Confidence': -183704.9632860716, 'r2_score': -183704.9632860716}
R2_Score is --->  -183704.9632860716
(2811, 31) (2811,)
Lasso Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 23.939764598410207, 'mean_absolute_error': 12.285924244643823, 'mean_squared_error': 573.1123290272945, 'Confidence': -48877.31871720849, 'r2_score': -48877.31871720849}
R2_Score is --->  -48877.31871720849
(2811, 31) (2811,)
Elastic Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 44.97059319554747, 'mean_absolute_error': 21.01116594226333, 'mean_squared_error': 2022.3542523594208, 'Confidence': -172477.01294677425, 'r2_score': -172477.01294677425}
R2_Score is --->  -172477.01294677425
Features

Ridge Model fitted using columns obtained from feature importance using BackwardElimination : 
{'root_mean_squared_error': 2.5095690042017864, 'mean_absolute_error': 1.010825720152329, 'mean_squared_error': 6.297936586850347, 'Confidence': -536.1242881396448, 'r2_score': -536.1242881396448}
R2_Score is --->  -536.1242881396448
(2811, 30) (2811,)
Lasso Model fitted using columns obtained from feature importance using BackwardElimination : 
{'root_mean_squared_error': 2.474358526694839, 'mean_absolute_error': 0.990140404737247, 'mean_squared_error': 6.122450118627454, 'Confidence': -521.1577918876555, 'r2_score': -521.1577918876555}
R2_Score is --->  -521.1577918876555
(2811, 30) (2811,)
Elastic Model fitted using columns obtained from feature importance using BackwardElimination : 
{'root_mean_squared_error': 2.7252752317013047, 'mean_absolute_error': 1.0891737234810273, 'mean_squared_error': 7.4271250885246, 'Confidence': -632.4279840840618, 'r2_score': -632.4279840840618}
R2_Score is 

Ridge Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 0.08521246435878689, 'mean_absolute_error': 0.0656014396893207, 'mean_squared_error': 0.007261164082097526, 'Confidence': 0.3807261259497233, 'r2_score': 0.3807261259497233}
R2_Score is --->  0.3807261259497233
(2811, 1) (2811,)
Lasso Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 0.08524988807917347, 'mean_absolute_error': 0.06568532932607239, 'mean_squared_error': 0.007267543417511604, 'Confidence': 0.38018205950100026, 'r2_score': 0.38018205950100026}
R2_Score is --->  0.38018205950100026
(2811, 1) (2811,)
Elastic Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 0.08524988807917347, 'mean_absolute_error': 0.06568532932607239, 'mean_squared_error': 0.007267543417511604, 'Confidence': 0.38018205950100026, 'r2_score': 0.38018205950100026}
R2_Score is --->  0.380182059

Ridge Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 53.40103673455455, 'mean_absolute_error': 28.18158407446233, 'mean_squared_error': 2851.6707243252445, 'Confidence': -127109.21763366146, 'r2_score': -127109.21763366146}
R2_Score is --->  -127109.21763366146
(2772, 37) (2772,)
Lasso Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 56.3567595909047, 'mean_absolute_error': 18.477372290836374, 'mean_squared_error': 3176.0843515870292, 'Confidence': -141569.61322310253, 'r2_score': -141569.61322310253}
R2_Score is --->  -141569.61322310253
(2772, 37) (2772,)
Elastic Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 54.7589200526666, 'mean_absolute_error': 26.794625345781167, 'mean_squared_error': 2998.5393253343327, 'Confidence': -133655.7307631652, 'r2_score': -133655.7307631652}
R2_Score is --->  -133655.7307631652
Featur

Features obtained from Forward Selection method : 
--------------------------------------
['LowerBandInNext9Months', 'LowerBandInNext6Months', 'LowerBandInNext1Months', 'Revenue GR', 'Income GR', 'UpperBandInNext9Months', 'UpperBandInNext12Months', 'Min Dec % in 90 days', 'UpperBandInNext6Months', 'CP % LV 90 days', 'CP % LV 7 days', 'Dividend Value', 'CP % HV 90 days', 'CP % LV 30 days', 'CP % BA 90 days', 'UpperBandInLast24Months', 'Low Price GR', 'CP % HV 30 days', 'CP % BA 30 days', 'UpperBandInNext3Months', 'WAP GR', 'UpperBandInNext1Months', 'UpperBandInLast9Months', 'Avg Dec % in 180 days', 'Min Dec % in 180 days', 'LowerBandInLast24Months', 'Min Inc % in 90 days', 'Avg Inc % in 90 days', 'LowerBandInNext12Months', 'Avg Dec % in 90 days', 'LowerBandInNext24Months', 'CP % HV 365 days', 'CP % BA 365 days', 'UpperBandInNext24Months', 'CP % LV 180 days', 'High Price GR', 'CP % BA 180 days', 'CP % HV 180 days', 'LowerBandInLast1Months', 'LowerBandInLast3Months', 'LowerBandInLast12Mon

Ridge Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 36.259676555142086, 'mean_absolute_error': 17.65916725910569, 'mean_squared_error': 1314.7641438835205, 'Confidence': -58603.22630158761, 'r2_score': -58603.22630158761}
R2_Score is --->  -58603.22630158761
(2772, 46) (2772,)
Lasso Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 29.36528824883295, 'mean_absolute_error': 14.293294729412066, 'mean_squared_error': 862.3201539370465, 'Confidence': -38436.01220545581, 'r2_score': -38436.01220545581}
R2_Score is --->  -38436.01220545581
(2772, 46) (2772,)
Elastic Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 30.74486681728922, 'mean_absolute_error': 13.222720603324579, 'mean_squared_error': 945.2468356128519, 'Confidence': -42132.38165846941, 'r2_score': -42132.38165846941}
R2_Score is --->  -42132.38165846941
Features ob

Elastic Model fitted using columns obtained from feature importance using AllFeatureConsideration : 
{'root_mean_squared_error': 58.77261236870917, 'mean_absolute_error': 46.351571203373695, 'mean_squared_error': 3454.219964642546, 'Confidence': -105611.21979770008, 'r2_score': -105611.21979770008}
R2_Score is --->  -105611.21979770008
*****************************************************************************************
Features Importance using Coefficients
*****************************************************************************************
Features obtained from coefficients greater than 0.1 : 
--------------------------------------
['Low Price GR', 'WAP GR', 'Avg Inc % in 90 days', 'Avg Dec % in 90 days', 'Avg Inc % in 365 days', 'Revenue GR', 'Income GR', 'CP % HV 30 days', 'CP % HV 90 days', 'CP % HV 365 days', 'LowerBandInLast9Months', 'LowerBandInNext9Months', 'LowerBandInLast12Months']
(2694, 13) (2694,)
(2694, 13)
(1886, 13) (808, 13)
Linear Model fitted using columns

Ridge Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 81.59517475860861, 'mean_absolute_error': 63.76968360269541, 'mean_squared_error': 6657.772543887879, 'Confidence': -203559.32460745238, 'r2_score': -203559.32460745238}
R2_Score is --->  -203559.32460745238
(2694, 40) (2694,)
Lasso Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 50.168619534272004, 'mean_absolute_error': 35.65344001378412, 'mean_squared_error': 2516.8903859745387, 'Confidence': -76952.5187020924, 'r2_score': -76952.5187020924}
R2_Score is --->  -76952.5187020924
(2694, 40) (2694,)
Elastic Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 74.34747717007056, 'mean_absolute_error': 57.695345263713946, 'mean_squared_error': 5527.547361554164, 'Confidence': -169002.87145758004, 'r2_score': -169002.87145758004}
R2_Score is --->  -169002.87145758004
Features 

Elastic Model fitted using columns obtained from feature importance using BackwardElimination : 
{'root_mean_squared_error': 77.42368188748247, 'mean_absolute_error': 64.22508310909501, 'mean_squared_error': 5994.426517014081, 'Confidence': -183277.626536908, 'r2_score': -183277.626536908}
R2_Score is --->  -183277.626536908
*****************************************************************************************
Features Importance using f-value
*****************************************************************************************
Features obtained from f-values greater than 1 : 
--------------------------------------
['Dividend Value', '% Return of Company', 'Open Price GR', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'No.of Shares GR', 'Total Turnover (Rs.) GR', 'Deliverable Quantity GR', 'Spread Close-Open GR', 'Max Dec % in 90 days', 'Min Inc % in 90 days', 'Min Dec % in 90 days', 'Avg Inc % in 90 days', 'Avg Dec % in 90 days', 'Max Dec % in 180 days', 'Min Inc

Ridge Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 0.08485684762579102, 'mean_absolute_error': 0.05867465708993897, 'mean_squared_error': 0.007200684588986713, 'Confidence': 0.7798402269426193, 'r2_score': 0.7798402269426193}
R2_Score is --->  0.7798402269426193
(2694, 7) (2694,)
Lasso Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 0.08562353905348373, 'mean_absolute_error': 0.05733930789693468, 'mean_squared_error': 0.007331390440043452, 'Confidence': 0.7758439165709727, 'r2_score': 0.7758439165709727}
R2_Score is --->  0.7758439165709727
(2694, 7) (2694,)
Elastic Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 0.08474750774555889, 'mean_absolute_error': 0.05810638372945289, 'mean_squared_error': 0.007182140069083564, 'Confidence': 0.7804072226557186, 'r2_score': 0.7804072226557186}
R2_Score is --->  0.7804072226557

Ridge Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 64.78317869927167, 'mean_absolute_error': 41.339927727302786, 'mean_squared_error': 4196.860242381767, 'Confidence': -105783.85281466682, 'r2_score': -105783.85281466682}
R2_Score is --->  -105783.85281466682
(2620, 39) (2620,)
Lasso Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 33.02021335002462, 'mean_absolute_error': 21.40475682543802, 'mean_squared_error': 1090.3344896811443, 'Confidence': -27481.657712761345, 'r2_score': -27481.657712761345}
R2_Score is --->  -27481.657712761345
(2620, 39) (2620,)
Elastic Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 67.81572427780819, 'mean_absolute_error': 45.7351834811589, 'mean_squared_error': 4598.972459323702, 'Confidence': -115919.37776129729, 'r2_score': -115919.37776129729}
R2_Score is --->  -115919.37776129729
Featu

Elastic Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 69.43972559137556, 'mean_absolute_error': 33.98731028612235, 'mean_squared_error': 4821.875490205539, 'Confidence': -121537.80748065255, 'r2_score': -121537.80748065255}
R2_Score is --->  -121537.80748065255
*****************************************************************************************
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['CP % HV 365 days', 'LowerBandInNext12Months', 'LowerBandInNext6Months', 'CP % BA 180 days', 'CP % HV 180 days', 'CP % BA 90 days', 'CP % BA 365 days', 'LowerBandInLast3Months', 'Dividend Value GR', 'CP % HV 90 days', 'Dividend Value', 'Avg Inc % in 180 days', 'Revenue GR', 'Sequential Decrease %', 'LowerBandInLast1Months', 'UpperBandInNext3Months', 'Upper

Ridge Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 130.97400057357916, 'mean_absolute_error': 65.68084332424372, 'mean_squared_error': 17154.188826247915, 'Confidence': -432382.55230761773, 'r2_score': -432382.55230761773}
R2_Score is --->  -432382.55230761773
(2620, 67) (2620,)
Lasso Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 127.48486816320418, 'mean_absolute_error': 64.31607657161925, 'mean_squared_error': 16252.391610589553, 'Confidence': -409652.11092581047, 'r2_score': -409652.11092581047}
R2_Score is --->  -409652.11092581047
(2620, 67) (2620,)
Elastic Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 127.17101891156557, 'mean_absolute_error': 65.32609529301942, 'mean_squared_error': 16172.468051005766, 'Confidence': -407637.58065825095, 'r2_score': -407637.58065825095}
R2_Score is --->  -407637.58065825095


Ridge Model fitted using columns obtained from feature importance using AllFeatureConsideration : 
{'root_mean_squared_error': 275.2108848778866, 'mean_absolute_error': 151.98138077589397, 'mean_squared_error': 75741.03115526936, 'Confidence': -1493913.1077129666, 'r2_score': -1493913.1077129666}
R2_Score is --->  -1493913.1077129666
(2502, 78) (2502,)
Lasso Model fitted using columns obtained from feature importance using AllFeatureConsideration : 
{'root_mean_squared_error': 75.34325206911059, 'mean_absolute_error': 42.30382873810625, 'mean_squared_error': 5676.605632349537, 'Confidence': -111964.22028733793, 'r2_score': -111964.22028733793}
R2_Score is --->  -111964.22028733793
(2502, 78) (2502,)
Elastic Model fitted using columns obtained from feature importance using AllFeatureConsideration : 
{'root_mean_squared_error': 91.18227122893295, 'mean_absolute_error': 43.18482947776884, 'mean_squared_error': 8314.206586466693, 'Confidence': -163988.1921790735, 'r2_score': -163988.192179

Ridge Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 94.07884606080549, 'mean_absolute_error': 54.23014873383494, 'mean_squared_error': 8850.829276132738, 'Confidence': -174572.52400534027, 'r2_score': -174572.52400534027}
R2_Score is --->  -174572.52400534027
(2502, 43) (2502,)
Lasso Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 72.29471432907047, 'mean_absolute_error': 37.8503385849006, 'mean_squared_error': 5226.525719921908, 'Confidence': -103086.85592461268, 'r2_score': -103086.85592461268}
R2_Score is --->  -103086.85592461268
(2502, 43) (2502,)
Elastic Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 109.94162662578002, 'mean_absolute_error': 65.97291582638728, 'mean_squared_error': 12087.16126512242, 'Confidence': -238405.85109172616, 'r2_score': -238405.85109172616}
R2_Score is --->  -238405.85109172616
Featur

Lasso Model fitted using columns obtained from feature importance using BackwardElimination : 
{'root_mean_squared_error': 66.22686959148767, 'mean_absolute_error': 36.49835231097635, 'mean_squared_error': 4385.998255887915, 'Confidence': -86508.31431661098, 'r2_score': -86508.31431661098}
R2_Score is --->  -86508.31431661098
(2502, 44) (2502,)
Elastic Model fitted using columns obtained from feature importance using BackwardElimination : 
{'root_mean_squared_error': 88.94846690036931, 'mean_absolute_error': 45.295710506140985, 'mean_squared_error': 7911.829763926095, 'Confidence': -156051.72230744176, 'r2_score': -156051.72230744176}
R2_Score is --->  -156051.72230744176
*****************************************************************************************
Features Importance using f-value
*****************************************************************************************
Features obtained from f-values greater than 1 : 
--------------------------------------
['Dividend Value

Ridge Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 0.07986220859487232, 'mean_absolute_error': 0.05089285504736782, 'mean_squared_error': 0.006377972361650898, 'Confidence': 0.8742010407787961, 'r2_score': 0.8742010407787961}
R2_Score is --->  0.8742010407787961
(2502, 7) (2502,)
Lasso Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 0.08067314420549403, 'mean_absolute_error': 0.05001179981340533, 'mean_squared_error': 0.006508156196000434, 'Confidence': 0.8716332982518658, 'r2_score': 0.8716332982518658}
R2_Score is --->  0.8716332982518658
(2502, 7) (2502,)
Elastic Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 0.07984970007121221, 'mean_absolute_error': 0.05066947250747037, 'mean_squared_error': 0.006375974601462549, 'Confidence': 0.8742404445482416, 'r2_score': 0.8742404445482416}
R2_Score is --->  0.8742404445482

Ridge Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 2.2296444093156467, 'mean_absolute_error': 1.2445492954233996, 'mean_squared_error': 4.971314191992518, 'Confidence': -132.41416463309824, 'r2_score': -132.41416463309824}
R2_Score is --->  -132.41416463309824
(2127, 38) (2127,)
Lasso Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 0.26674015081047986, 'mean_absolute_error': 0.2458939196934922, 'mean_squared_error': 0.07115030805439755, 'Confidence': -0.9094465861270467, 'r2_score': -0.9094465861270467}
R2_Score is --->  -0.9094465861270467
(2127, 38) (2127,)
Elastic Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 85.54171615405511, 'mean_absolute_error': 33.53397459459099, 'mean_squared_error': 7317.385202580932, 'Confidence': -196374.2030948678, 'r2_score': -196374.2030948678}
R2_Score is --->  -196374.2030948678
Fe

Elastic Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 304.3090089338587, 'mean_absolute_error': 208.08548285195445, 'mean_squared_error': 92603.9729183073, 'Confidence': -2485193.2990250285, 'r2_score': -2485193.2990250285}
R2_Score is --->  -2485193.2990250285
*****************************************************************************************
Features Importance using Forward Selection Method
*****************************************************************************************
Features obtained from Forward Selection method : 
--------------------------------------
['CP % LV 365 days', 'CP % HV 365 days', 'Expenditure GR', 'CP % BA 365 days', 'Income GR', 'UpperBandInLast24Months', 'EPS GR', 'LowerBandInNext12Months', 'UpperBandInNext12Months', 'CP % LV 180 days', 'Dividend Value', 'Dividend Value GR', 'Net Profit GR', 'Revenue GR', 'UpperBandInLast9Months', 'UpperBandInLast12Months', 'UpperBandInLast6Months', 'Sequen

Ridge Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 156.09374081795409, 'mean_absolute_error': 105.08950928821567, 'mean_squared_error': 24365.255922542627, 'Confidence': -653884.4997766285, 'r2_score': -653884.4997766285}
R2_Score is --->  -653884.4997766285
(2127, 70) (2127,)
Lasso Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 129.74808704791292, 'mean_absolute_error': 87.44187504781381, 'mean_squared_error': 16834.56609259279, 'Confidence': -451784.88306118635, 'r2_score': -451784.88306118635}
R2_Score is --->  -451784.88306118635
(2127, 70) (2127,)
Elastic Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 270.4719576462172, 'mean_absolute_error': 184.88489008300664, 'mean_squared_error': 73155.07987297712, 'Confidence': -1963247.2464378322, 'r2_score': -1963247.2464378322}
R2_Score is --->  -1963247.2464378322
Feat

All Features are considered : 
*****************************************************************************************
Features are : 
--------------------------------------
['Dividend Value', '% Return of Company', 'Open Price GR', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'No.of Shares GR', 'No. of Trades GR', 'Total Turnover (Rs.) GR', 'Deliverable Quantity GR', '% Deli. Qty to Traded Qty GR', 'Spread High-Low GR', 'Spread Close-Open GR', 'Sequential Increase %', 'Sequential Decrease %', 'Max Inc % in 90 days', 'Max Dec % in 90 days', 'Min Inc % in 90 days', 'Min Dec % in 90 days', 'Avg Inc % in 90 days', 'Avg Dec % in 90 days', 'Max Inc % in 180 days', 'Max Dec % in 180 days', 'Min Inc % in 180 days', 'Min Dec % in 180 days', 'Avg Inc % in 180 days', 'Avg Dec % in 180 days', 'Max Inc % in 365 days', 'Max Dec % in 365 days', 'Min Inc % in 365 days', 'Min Dec % in 365 days', 'Avg Inc % in 365 days', 'Avg Dec % in 365 days', 'Revenue GR', 'Dividend Value GR', 'Inc

Lasso Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 415.8283062285069, 'mean_absolute_error': 133.76285496909372, 'mean_squared_error': 172913.18026086892, 'Confidence': 0.2805256635083099, 'r2_score': 0.2805256635083099}
R2_Score is --->  0.2805256635083099
(2811, 20) (2811,)
Elastic Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 419.20272821706067, 'mean_absolute_error': 138.12518253713313, 'mean_squared_error': 175730.92734462683, 'Confidence': 0.2688013015456787, 'r2_score': 0.2688013015456787}
R2_Score is --->  0.2688013015456787
Features obtained from p-values less than 0.05 : 
-------------------------------------------------
['High Price GR', 'Low Price GR', 'WAP GR', 'No. of Trades GR', 'Spread High-Low GR', 'Avg Dec % in 365 days', 'Revenue GR', 'Income GR', 'Expenditure GR', 'CP % LV 180 days', 'CP % HV 180 days', 'CP % HV 365 days', 'UpperBandInLast1Months', 'L

Ridge Model fitted using columns obtained from feature importance using ForwardSelection : 
{'root_mean_squared_error': 428.9687570228501, 'mean_absolute_error': 142.49520918691354, 'mean_squared_error': 184014.194501729, 'Confidence': 0.234335460752927, 'r2_score': 0.234335460752927}
R2_Score is --->  0.234335460752927
(2811, 23) (2811,)
Lasso Model fitted using columns obtained from feature importance using ForwardSelection : 
{'root_mean_squared_error': 422.210928888694, 'mean_absolute_error': 128.23441962173075, 'mean_squared_error': 178262.06847305383, 'Confidence': 0.258269478111546, 'r2_score': 0.258269478111546}
R2_Score is --->  0.258269478111546
(2811, 23) (2811,)
Elastic Model fitted using columns obtained from feature importance using ForwardSelection : 
{'root_mean_squared_error': 425.18491150222195, 'mean_absolute_error': 147.75263568990528, 'mean_squared_error': 180782.20896915233, 'Confidence': 0.24778342720113633, 'r2_score': 0.24778342720113633}
R2_Score is --->  0.24

Ridge Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 422.5727298084294, 'mean_absolute_error': 152.04570584876177, 'mean_squared_error': 178567.71197774788, 'Confidence': 0.2569977262565921, 'r2_score': 0.2569977262565921}
R2_Score is --->  0.2569977262565921
(2811, 7) (2811,)
Lasso Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 421.4791567152652, 'mean_absolute_error': 124.69300438555862, 'mean_squared_error': 177644.6795454111, 'Confidence': 0.2608383712890523, 'r2_score': 0.2608383712890523}
R2_Score is --->  0.2608383712890523
(2811, 7) (2811,)
Elastic Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 425.3577086899975, 'mean_absolute_error': 152.16127593694392, 'mean_squared_error': 180929.18034200475, 'Confidence': 0.24717189411379892, 'r2_score': 0.24717189411379892}
R2_Score is --->  0.24717189411379892
Features 

Ridge Model fitted using columns obtained from feature importance using Coefficients : 
{'root_mean_squared_error': 693.5362717016274, 'mean_absolute_error': 332.88999080950737, 'mean_squared_error': 480992.56016579346, 'Confidence': 0.34825713452110574, 'r2_score': 0.34825713452110574}
R2_Score is --->  0.34825713452110574
(2772, 71) (2772,)
Lasso Model fitted using columns obtained from feature importance using Coefficients : 
{'root_mean_squared_error': 782.4523090077291, 'mean_absolute_error': 301.7754874158606, 'mean_squared_error': 612231.6158715268, 'Confidence': 0.17042877435080261, 'r2_score': 0.17042877435080261}
R2_Score is --->  0.17042877435080261
(2772, 71) (2772,)
Elastic Model fitted using columns obtained from feature importance using Coefficients : 
{'root_mean_squared_error': 701.2392905317081, 'mean_absolute_error': 326.2651029704081, 'mean_squared_error': 491736.5425854134, 'Confidence': 0.3336990842127926, 'r2_score': 0.3336990842127926}
R2_Score is --->  0.333699

Ridge Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 661.0973282262255, 'mean_absolute_error': 295.41172732009454, 'mean_squared_error': 437049.6773878538, 'Confidence': 0.4077995530758247, 'r2_score': 0.4077995530758247}
R2_Score is --->  0.4077995530758247
(2772, 35) (2772,)
Lasso Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 799.2989362482666, 'mean_absolute_error': 303.9777180454912, 'mean_squared_error': 638878.7894876106, 'Confidence': 0.13432196786823714, 'r2_score': 0.13432196786823714}
R2_Score is --->  0.13432196786823714
(2772, 35) (2772,)
Elastic Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 719.6978283513572, 'mean_absolute_error': 330.4155226691443, 'mean_squared_error': 517964.96413365967, 'Confidence': 0.2981596849943302, 'r2_score': 0.2981596849943302}
R2_Score is --->  0.2981596849943302
**********

Elastic Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 738.0266335107681, 'mean_absolute_error': 363.58623216020743, 'mean_squared_error': 544683.3117712375, 'Confidence': 0.26195643801651003, 'r2_score': 0.26195643801651003}
R2_Score is --->  0.26195643801651003
Features obtained from f-values greater than 10 : 
--------------------------------------
['No. of Trades GR', 'Spread High-Low GR', 'Min Inc % in 365 days', 'Avg Dec % in 365 days', 'Revenue GR', 'Dividend Value GR', 'Income GR', 'CP % HV 30 days', 'CP % BA 30 days', 'CP % HV 90 days', 'CP % BA 90 days', 'CP % HV 365 days', 'CP % BA 365 days', 'UpperBandInNext1Months', 'UpperBandInLast3Months', 'UpperBandInLast6Months', 'UpperBandInNext6Months', 'UpperBandInLast9Months', 'LowerBandInNext9Months', 'UpperBandInNext9Months', 'UpperBandInLast12Months', 'LowerBandInNext12Months', 'UpperBandInNext12Months', 'LowerBandInLast24Months', 'UpperBandInLast24Months', 'LowerBandInNex

Ridge Model fitted using columns obtained from feature importance using AllFeatureConsideration : 
{'root_mean_squared_error': 468.79029784723554, 'mean_absolute_error': 315.8268418709404, 'mean_squared_error': 219764.34335569982, 'Confidence': 0.8409137679015178, 'r2_score': 0.8409137679015178}
R2_Score is --->  0.8409137679015178
(2694, 78) (2694,)
Lasso Model fitted using columns obtained from feature importance using AllFeatureConsideration : 
{'root_mean_squared_error': 736.3311605813349, 'mean_absolute_error': 384.9893905122326, 'mean_squared_error': 542183.5780430556, 'Confidence': 0.6075162093200139, 'r2_score': 0.6075162093200139}
R2_Score is --->  0.6075162093200139
(2694, 78) (2694,)
Elastic Model fitted using columns obtained from feature importance using AllFeatureConsideration : 
{'root_mean_squared_error': 480.48926256928183, 'mean_absolute_error': 331.6554934586433, 'mean_squared_error': 230869.93144437225, 'Confidence': 0.8328744921150673, 'r2_score': 0.832874492115067

Lasso Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 568.5011570161842, 'mean_absolute_error': 286.75743689687823, 'mean_squared_error': 323193.5655287401, 'Confidence': 0.7660419074662066, 'r2_score': 0.7660419074662066}
R2_Score is --->  0.7660419074662066
(2694, 38) (2694,)
Elastic Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 470.25497752934865, 'mean_absolute_error': 250.27923015510393, 'mean_squared_error': 221139.7438911282, 'Confidence': 0.839918122814301, 'r2_score': 0.839918122814301}
R2_Score is --->  0.839918122814301
Features obtained from p-values less than 0.1 : 
-------------------------------------------------
['Dividend Value', '% Return of Company', 'High Price GR', 'Close Price GR', 'WAP GR', 'No.of Shares GR', 'Total Turnover (Rs.) GR', 'Sequential Increase %', 'Sequential Decrease %', 'Max Inc % in 90 days', 'Max Dec % in 90 days', 'Min Inc % in 90 da

Elastic Model fitted using columns obtained from feature importance using ForwardSelection : 
{'root_mean_squared_error': 1100.3678085479737, 'mean_absolute_error': 528.2030959315392, 'mean_squared_error': 1210809.31408867, 'Confidence': 0.12350161711017948, 'r2_score': 0.12350161711017948}
R2_Score is --->  0.12350161711017948
*****************************************************************************************
Features Importance using Backward Elimination Method
*****************************************************************************************
Features obtained from Backward Elimination method : 
--------------------------------------
['Dividend Value', '% Return of Company', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'Max Inc % in 90 days', 'Max Dec % in 90 days', 'Min Inc % in 90 days', 'Avg Inc % in 90 days', 'Avg Dec % in 90 days', 'Max Inc % in 180 days', 'Max Dec % in 180 days', 'Min Inc % in 180 days', 'Min Dec % in 180 days', 'Avg Dec % in 180 da

Elastic Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 526.1255789451594, 'mean_absolute_error': 272.6109154351562, 'mean_squared_error': 276808.12482037913, 'Confidence': 0.7996200797658728, 'r2_score': 0.7996200797658728}
R2_Score is --->  0.7996200797658728
Features obtained from f-values greater than 100 : 
--------------------------------------
['Min Inc % in 365 days', 'Avg Inc % in 365 days', 'Revenue GR', 'Income GR', 'CP % HV 365 days', 'CP % BA 365 days', 'UpperBandInNext1Months', 'UpperBandInLast3Months', 'UpperBandInNext3Months', 'UpperBandInLast6Months', 'UpperBandInLast9Months', 'UpperBandInNext9Months', 'UpperBandInNext12Months', 'LowerBandInLast24Months', 'LowerBandInNext24Months', 'UpperBandInNext24Months']
(2694, 16) (2694,)
(2694, 16)
(1886, 16) (808, 16)
Linear Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 502.6090848904726, 'mean_absolute_error': 348.6

Ridge Model fitted using columns obtained from feature importance using Coefficients : 
{'root_mean_squared_error': 725.2961923873878, 'mean_absolute_error': 399.02151645114895, 'mean_squared_error': 526054.5666916427, 'Confidence': 0.6342807006135154, 'r2_score': 0.6342807006135154}
R2_Score is --->  0.6342807006135154
(2620, 69) (2620,)
Lasso Model fitted using columns obtained from feature importance using Coefficients : 
{'root_mean_squared_error': 718.6289713988842, 'mean_absolute_error': 395.1277802419509, 'mean_squared_error': 516427.59853381844, 'Confidence': 0.640973481691413, 'r2_score': 0.640973481691413}
R2_Score is --->  0.640973481691413
(2620, 69) (2620,)
Elastic Model fitted using columns obtained from feature importance using Coefficients : 
{'root_mean_squared_error': 699.8820358062782, 'mean_absolute_error': 384.8791014146592, 'mean_squared_error': 489834.86404434044, 'Confidence': 0.6594610623380867, 'r2_score': 0.6594610623380867}
R2_Score is --->  0.65946106233808

Ridge Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 645.4945692488265, 'mean_absolute_error': 387.44287649741494, 'mean_squared_error': 416663.238929728, 'Confidence': 0.7103308335866891, 'r2_score': 0.7103308335866891}
R2_Score is --->  0.7103308335866891
(2620, 49) (2620,)
Lasso Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 594.7676356464915, 'mean_absolute_error': 338.69991012355297, 'mean_squared_error': 353748.54041251773, 'Confidence': 0.7540698692679692, 'r2_score': 0.7540698692679692}
R2_Score is --->  0.7540698692679692
(2620, 49) (2620,)
Elastic Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 591.4488518871392, 'mean_absolute_error': 336.7317302537824, 'mean_squared_error': 349811.74439861515, 'Confidence': 0.7568067759905676, 'r2_score': 0.7568067759905676}
R2_Score is --->  0.7568067759905676
************

Ridge Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 664.4743384754162, 'mean_absolute_error': 397.72676292576506, 'mean_squared_error': 441526.146492342, 'Confidence': 0.6930458488907192, 'r2_score': 0.6930458488907192}
R2_Score is --->  0.6930458488907192
(2620, 62) (2620,)
Lasso Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 558.7288240813351, 'mean_absolute_error': 359.30654436796584, 'mean_squared_error': 312177.8988593115, 'Confidence': 0.7829702664254315, 'r2_score': 0.7829702664254315}
R2_Score is --->  0.7829702664254315
(2620, 62) (2620,)
Elastic Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 558.7288240813452, 'mean_absolute_error': 359.30654436796715, 'mean_squared_error': 312177.8988593228, 'Confidence': 0.7829702664254237, 'r2_score': 0.7829702664254237}
R2_Score is --->  0.7829702664254237
Features obta

Ridge Model fitted using columns obtained from feature importance using AllFeatureConsideration : 
{'root_mean_squared_error': 376.8983243302559, 'mean_absolute_error': 287.16902708837614, 'mean_squared_error': 142052.34688295476, 'Confidence': 0.9028627877760108, 'r2_score': 0.9028627877760108}
R2_Score is --->  0.9028627877760108
(2502, 78) (2502,)
Lasso Model fitted using columns obtained from feature importance using AllFeatureConsideration : 
{'root_mean_squared_error': 342.6469064764971, 'mean_absolute_error': 239.80325066397242, 'mean_squared_error': 117406.90251791338, 'Confidence': 0.919715657947977, 'r2_score': 0.919715657947977}
R2_Score is --->  0.919715657947977
(2502, 78) (2502,)
Elastic Model fitted using columns obtained from feature importance using AllFeatureConsideration : 
{'root_mean_squared_error': 341.87284634107755, 'mean_absolute_error': 239.96064646757918, 'mean_squared_error': 116877.04306535002, 'Confidence': 0.9200779826206904, 'r2_score': 0.920077982620690

Lasso Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 342.77258487535545, 'mean_absolute_error': 240.4024642171917, 'mean_squared_error': 117493.04494213274, 'Confidence': 0.919656752656185, 'r2_score': 0.919656752656185}
R2_Score is --->  0.919656752656185
(2502, 37) (2502,)
Elastic Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 342.0659439127088, 'mean_absolute_error': 240.5348202855546, 'mean_squared_error': 117009.10998489245, 'Confidence': 0.919987673571436, 'r2_score': 0.919987673571436}
R2_Score is --->  0.919987673571436
Features obtained from p-values less than 0.1 : 
-------------------------------------------------
['Dividend Value', '% Return of Company', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'Avg Dec % in 90 days', 'Max Inc % in 180 days', 'Max Dec % in 180 days', 'Min Dec % in 180 days', 'Avg Inc % in 180 days', 'Revenue GR', 'Dividend Va

Features obtained from Backward Elimination method : 
--------------------------------------
['Dividend Value', '% Return of Company', 'High Price GR', 'Low Price GR', 'Close Price GR', 'WAP GR', 'Min Dec % in 90 days', 'Avg Dec % in 90 days', 'Max Inc % in 180 days', 'Max Dec % in 180 days', 'Min Dec % in 180 days', 'Avg Inc % in 180 days', 'Avg Dec % in 180 days', 'Min Dec % in 365 days', 'Avg Inc % in 365 days', 'Avg Dec % in 365 days', 'Revenue GR', 'Dividend Value GR', 'Income GR', 'Expenditure GR', 'Net Profit GR', 'EPS GR', 'CP % HV 7 days', 'CP % LV 30 days', 'CP % HV 90 days', 'CP % BA 90 days', 'CP % LV 180 days', 'CP % HV 180 days', 'CP % BA 180 days', 'CP % LV 365 days', 'CP % HV 365 days', 'CP % BA 365 days', 'UpperBandInLast1Months', 'UpperBandInNext1Months', 'LowerBandInLast3Months', 'LowerBandInNext3Months', 'LowerBandInLast6Months', 'UpperBandInLast6Months', 'LowerBandInNext6Months', 'UpperBandInNext6Months', 'LowerBandInLast9Months', 'UpperBandInLast9Months', 'LowerBa

Ridge Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 349.4552359181712, 'mean_absolute_error': 251.47322851694616, 'mean_squared_error': 122118.96191062467, 'Confidence': 0.9164934914488978, 'r2_score': 0.9164934914488978}
R2_Score is --->  0.9164934914488978
(2502, 19) (2502,)
Lasso Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 328.4360342982144, 'mean_absolute_error': 214.84753503005317, 'mean_squared_error': 107870.22862553787, 'Confidence': 0.926236957568306, 'r2_score': 0.926236957568306}
R2_Score is --->  0.926236957568306
(2502, 19) (2502,)
Elastic Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 326.7678845097243, 'mean_absolute_error': 210.41009281899005, 'mean_squared_error': 106777.25034696053, 'Confidence': 0.9269843500988205, 'r2_score': 0.9269843500988205}
R2_Score is --->  0.9269843500988205
Features obt

Ridge Model fitted using columns obtained from feature importance using Coefficients : 
{'root_mean_squared_error': 665.5179474201979, 'mean_absolute_error': 452.85544058711736, 'mean_squared_error': 442914.1383383933, 'Confidence': -0.34713600123938915, 'r2_score': -0.34713600123938915}
R2_Score is --->  -0.34713600123938915
(2127, 77) (2127,)
Lasso Model fitted using columns obtained from feature importance using Coefficients : 
{'root_mean_squared_error': 678.8456361817038, 'mean_absolute_error': 462.10230581866176, 'mean_squared_error': 460831.39776294207, 'Confidence': -0.4016318574902329, 'r2_score': -0.4016318574902329}
R2_Score is --->  -0.4016318574902329
(2127, 77) (2127,)
Elastic Model fitted using columns obtained from feature importance using Coefficients : 
{'root_mean_squared_error': 676.5546311112507, 'mean_absolute_error': 460.6953298711101, 'mean_squared_error': 457726.1688780806, 'Confidence': -0.3921872151526038, 'r2_score': -0.3921872151526038}
R2_Score is --->  -0

Elastic Model fitted using columns obtained from feature importance using PValue : 
{'root_mean_squared_error': 582.8221493764161, 'mean_absolute_error': 367.2515556793888, 'mean_squared_error': 339681.6578037455, 'Confidence': -0.033151463407322224, 'r2_score': -0.033151463407322224}
R2_Score is --->  -0.033151463407322224
Features obtained from p-values less than 0.2 : 
-------------------------------------------------
['Dividend Value', 'WAP GR', 'Spread Close-Open GR', 'Sequential Increase %', 'Max Dec % in 90 days', 'Min Dec % in 90 days', 'Max Dec % in 180 days', 'Min Inc % in 180 days', 'Avg Inc % in 180 days', 'Avg Dec % in 180 days', 'Max Inc % in 365 days', 'Max Dec % in 365 days', 'Min Inc % in 365 days', 'Avg Inc % in 365 days', 'Revenue GR', 'Dividend Value GR', 'Income GR', 'Expenditure GR', 'Net Profit GR', 'EPS GR', 'CP % HV 7 days', 'CP % BA 7 days', 'CP % LV 30 days', 'CP % HV 30 days', 'CP % BA 30 days', 'CP % LV 90 days', 'CP % HV 90 days', 'CP % LV 180 days', 'CP %

Elastic Model fitted using columns obtained from feature importance using BackwardElimination : 
{'root_mean_squared_error': 675.6435956191432, 'mean_absolute_error': 460.1048701785258, 'mean_squared_error': 456494.2683011642, 'Confidence': -0.38844035436523416, 'r2_score': -0.38844035436523416}
R2_Score is --->  -0.38844035436523416
*****************************************************************************************
Features Importance using f-value
*****************************************************************************************
Features obtained from f-values greater than 1 : 
--------------------------------------
['Dividend Value', 'High Price GR', 'WAP GR', 'No. of Trades GR', 'Spread Close-Open GR', 'Sequential Increase %', 'Sequential Decrease %', 'Max Inc % in 90 days', 'Max Dec % in 90 days', 'Min Inc % in 90 days', 'Avg Dec % in 90 days', 'Max Inc % in 180 days', 'Max Dec % in 180 days', 'Avg Inc % in 180 days', 'Avg Dec % in 180 days', 'Max Inc % in 365 days', 

Ridge Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 571.7101360933684, 'mean_absolute_error': 337.30436963026034, 'mean_squared_error': 326852.4797118979, 'Confidence': 0.005868848156169393, 'r2_score': 0.005868848156169393}
R2_Score is --->  0.005868848156169393
(2127, 2) (2127,)
Lasso Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 572.7258637086763, 'mean_absolute_error': 341.8923360428361, 'mean_squared_error': 328014.9149608493, 'Confidence': 0.0023332681477126194, 'r2_score': 0.0023332681477126194}
R2_Score is --->  0.0023332681477126194
(2127, 2) (2127,)
Elastic Model fitted using columns obtained from feature importance using FValue : 
{'root_mean_squared_error': 572.5657905271138, 'mean_absolute_error': 341.23458328748956, 'mean_squared_error': 327831.58448193874, 'Confidence': 0.0028908730351775436, 'r2_score': 0.0028908730351775436}
R2_Score is --->  0.0028908730

In [50]:
for name,table in tables.items():
    print(table)

+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|                                                                                Linear Regression                                                                                 |
+-----------------------------------------------+-------------------------+--------------------------+------------+------------+---------------+-----------------+-----------------+
|                    Company                    |         Y-Column        |          Method          |    RMSE    |    MAE     |      MSE      |    Confidence   |     r2_score    |
+-----------------------------------------------+-------------------------+--------------------------+------------+------------+---------------+-----------------+-----------------+
| 500013-ANSAL PROPERTIES & INFRASTRUCTURE LTD. |  LowerBandInNext1Months | AllFeatureConsidera

+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|                                                                                 Ridge Regression                                                                                 |
+-----------------------------------------------+-------------------------+--------------------------+------------+------------+---------------+-----------------+-----------------+
|                    Company                    |         Y-Column        |          Method          |    RMSE    |    MAE     |      MSE      |    Confidence   |     r2_score    |
+-----------------------------------------------+-------------------------+--------------------------+------------+------------+---------------+-----------------+-----------------+
| 500013-ANSAL PROPERTIES & INFRASTRUCTURE LTD. |  LowerBandInNext1Months | AllFeatureConsidera

In [51]:
final_df

Unnamed: 0,Company,Y-Column,Model,Method,RMSE,MAE,MSE,r2_score
18,500013-ANSAL PROPERTIES & INFRASTRUCTURE LTD.,LowerBandInNext12Months,Lasso Regression,LassoFICoefficients0.1,0.079842,0.059202,0.006375,0.874266
19,500013-ANSAL PROPERTIES & INFRASTRUCTURE LTD.,LowerBandInNext12Months,Elastic Net Regression,ElasticFIFValue1000,0.07985,0.050669,0.006376,0.87424
17,500013-ANSAL PROPERTIES & INFRASTRUCTURE LTD.,LowerBandInNext12Months,Ridge Regression,RidgeFIFValue1000,0.079862,0.050893,0.006378,0.874201
16,500013-ANSAL PROPERTIES & INFRASTRUCTURE LTD.,LowerBandInNext12Months,Linear Regression,LinearFIFValue1000,0.08009,0.050967,0.006414,0.873482
1,500013-ANSAL PROPERTIES & INFRASTRUCTURE LTD.,LowerBandInNext1Months,Ridge Regression,RidgeFIFValue1000,0.085212,0.065601,0.007261,0.380726
0,500013-ANSAL PROPERTIES & INFRASTRUCTURE LTD.,LowerBandInNext1Months,Linear Regression,LinearFIFValue1000,0.08525,0.065685,0.007268,0.380182
2,500013-ANSAL PROPERTIES & INFRASTRUCTURE LTD.,LowerBandInNext1Months,Lasso Regression,LassoFIFValue1000,0.08525,0.065685,0.007268,0.380182
3,500013-ANSAL PROPERTIES & INFRASTRUCTURE LTD.,LowerBandInNext1Months,Elastic Net Regression,ElasticFIFValue1000,0.08525,0.065685,0.007268,0.380182
22,500013-ANSAL PROPERTIES & INFRASTRUCTURE LTD.,LowerBandInNext24Months,Lasso Regression,LassoFIFValue1000,0.1896,0.1523,0.035948,0.035263
23,500013-ANSAL PROPERTIES & INFRASTRUCTURE LTD.,LowerBandInNext24Months,Elastic Net Regression,ElasticFIFValue1000,0.197456,0.162128,0.038989,-0.04634


In [52]:
# final_df = final_df.sort_values(by = ['Company', 'Y-Column', 'r2_score'], ascending = [True, True, False])
# final_df.to_csv('C:\\Users\\venu\\Desktop\\Stock Market Analysis\\Data\\NextUBLB_Results\\Results_AFC_Linear_500112.csv', index = False) 

In [53]:
# final_df = final_df.sort_values(by = ['Company', 'Y-Column', 'r2_score'], ascending = [True, True, False])
# final_df.to_csv('C:\\Users\\venu\\Desktop\\Stock Market Analysis\\Data\\NextUBLB_Results\\Results_Linear3.csv', index = False) 