In [22]:
import pandas as pd 
import numpy as np 
import seaborn as sns 
import matplotlib.pyplot as plt 
import statsmodels.formula.api as sm 
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge,Lasso,RidgeCV,LassoCV,ElasticNet,ElasticNetCV,LinearRegression
from pandas_profiling import ProfileReport

In [None]:

class Eda_vinu: 
    '''
    Thid module helps the user for doing Exploratory data analysis by calling some sort of functions
    '''
    def make_frame(self,file_location): 
        """Make Dataframe from given file input 
        ====================================================
        file_location : Path of file 
        """
        try : 
            df = pd.read_csv(r"{}".format(file_location))
            return df 
        except Exception as e : 
            raise Exception(f"(make_frame)- failed to make a frame.\n" + str(e))
            
    
    def null_val_summary(self,df): 
        """Make a Summary of null values of dataframe 
        =============================================
        df : Data 
        file location : Path of file 
        """
        try : 
            summary = df.isnull().sum()
            return summary 
        except Exception as e : 
            raise Exception(f"(null_val_summary) - failed to create summary of null values.\n" + str(e))
            
    def profile_report_to_widgets(self,df): 
        
        """ Create a pandas profile widget 
        ================================================
        df : Data
        
        """
        try : 
            report = ProfileReport(df,title="Pandas Profile Report")
            return report.to_widgets()
        except Exception as e : 
            raise Exception(f"(profile_report_to_widgets) - failed to craete pandas profile report.\n" + str(e))
            
            
    def profile_report_to_notebook_iframe(self,df): 
        
        """ Create a notebook iframe report using dataframe 
        =======================================================
        df : Data
        """
        try : 
            report = ProfileReport(df)
            return report.to_notebook_iframe()
        
        except Exception as e : 
            raise Exception(f"(profile_report_to_notebook_iframe) - failed to create notebook iframe.\n" + str(e))
            
    def unwanted_index_col(self,df):
        
        """ Remove unwanted index columns which do not required for training purpose
           This function Automatically finds out and drop  the column which is indexed as seriel number
        ===========================================================================================
        df : Data
        """
       
        try : 
            import copy 
            df1 = copy.deepcopy(df)
            for j in range(df.shape[1]):
                var  =False
                a = list(df1.iloc[:,j] )
                for i in range(1,len(a)+1): 
                    if a[i-1] == i : 
                        pass
                    else : 
                        #print("not match")
                        var = True
                        break 
                if var == False : 
                    #print("required")
                    df.drop(columns=[df1.columns[j]],inplace=True)
                    
            return True 
        except Exception as e : 
            raise Exception(f"(unwanted_index_col) - failed to remove columns.\n" + str(e))
            
    def float_int_cols(self,df): 
        """This Dataframe contains only contains columns having  Float and Int datatypes
        this is usefull for doing regression on data 
        ======================================================================================
        Input : 
            df --> Data
        Output : 
            df --> Dataframe with only int and float columns 
        """
        
        try : 
            df1 = df[df.dtypes[(df.dtypes != object )].index]
            return df1 
        
        except Exception as e : 
            raise Exception(f"(float_int_cols) - failed to return required dataframe.\n" + str(e))
            
            
    def remove_null_val(self,cd,df): 
        """ This function checks if there are any null values and replace with centrel tendancy provided by user 
        ===========================================================================================
        Input : 
            df --> original Data 
            cd --> Centrel tendancy -- mean,mode,median
            
        Output :
            df --> Data without null values 
            
        """
        
        try : 
        
            if cd == mean : 

                for i in range(df.shape[1]): 
                    if df.isnull().sum()[i] >0: 

                        df["{}".format(df.isnull().sum().index[i])] = df["{}".format(df.isnull().sum().index[i])].fillna(df["{}".format(df.isnull().sum().index[i])].mean()) 
                        return df 
                    
            elif cd == mode : 
                for i in range(df.shape[1]): 
                    if df.isnull().sum()[i] >0: 
                        df["{}".format(df.isnull().sum().index[i])] = df["{}".format(df.isnull().sum().index[i])].fillna(df["{}".format(df.isnull().sum().index[i])].mode()) 
                        return df 
                    
            elif cd == median : 
                for i in range(df.shape[1]): 
                    if df.isnull().sum()[i] >0: 
                        df["{}".format(df.isnull().sum().index[i])] = df["{}".format(df.isnull().sum().index[i])].fillna(df["{}".format(df.isnull().sum().index[i])].median()) 
                         return df 
                        
        except Exception as e : 
            raise Exception(f"(remove_null_val) - failed to remove null values.\n" + str(e))
            
    ###### Now Linear Regression #######
    
    def predic_feature_col_split(self,df,pred_col): 
        """This function returns prediction column and feature columns
        ===============================================
        Inputs : 
            df --> Data 
            pred_col --> Prediction Column name 
        Output : 
            x --> feature columns
            y --> prediction column 
        """
        try : 
            y = df[pred_col]
            x = df.drop(columns=[pred_col])
            
            return x,y 
        except Exception as e : 
            raise Exception(f"(predic_feature_col_split) - failed to split data into prediction and feature columns.\n" + str(e))
            
    def Standardizing_dataset(self,df,pred_col): 
        
        """This function return Standardized dataset containing only feature columns
        ==============================================================================
        Input : 
            df --> Data with prediction column
            pred_col--> prediction column name 
        Output : 
            df2 --> Standardise dataset with only feature columns and no prediction column 
        
        
        """
        
        try : 
            x,y = predic_feature_col_split(df,pred_col)
            scaler = StandardScaler()
            arr1 = scaler.fit_transform(x)
            df2 = pd.DataFrame(arr1,columns=x.columns)
            return df2 
        except Exception as e : 
            raise Exception(f"(Standardizing_dataset) - failed to standardize dataset.\n" + str(e))
            
    def VIF(self,df,pred_col): 
        
        """This function creates a dataframe containing VIF of given data
        ============================================================================
        Input : 
            df --> data with prediction column
            
        Output : 
            df_vif --> dataframe containing VIF of all features
        """
        try : 
            df1 = Standardizing_dataset(df,pred_col)
            from statsmodels.stats.outliers_influence import variance_inflation_factor
            df_vif = pd.DataFrame()
            df_vif["Features"] = df1.columns
            df_vif["VIF"] = [variance_inflation_factor(df1,i) for i in range(df1.shape[1])]

            return df_vif 
        except Exception as e : 
            raise Exception(f"(VIF)- failed to find VIF of data.\n" + str(e))
            
    def give_high_VIF_col(self,df,pred_col): 
        from statsmodels.stats.outliers_influence import variance_inflation_factor
        """ This function provides name of columns which has high variance inflation factor
        ============================================================================
        Input : 
            df --> Data with prediction column 
            pred_col --> Prediction column name 
            
        Output : 
            l --> List containing names of columns which has VIF > 10 
        """
        
        try : 
            df_vif = VIF(df,pred_col)
            l = []
            for i in range(df_vif.shape[0]): 
                if df_vif.iloc[i,1] > 10 : 
                    l.append(df_vif.iloc[i,0])
            return l 
        except Exception as e : 
            raise Exception(f"(give_high_VIF_col)-failed to provide high VIF column names")
            
            
    def training_dataset(self,df,pred_col,test_size): 
        """This function create a seperate training dataset with standardizing the dataset
        ========================================================================================
        ****IMPORTANT - standardization is used here 
        Input : 
            df --> Data without prediction column 
            pred_col --> prediction column name 
            test_size --> size of test-train split 
            random state = 100 
        output : 
            train_x --> x training data 
            train_y --> y training data 
        
        """
        try : 
            x,y = predic_feature_col_split(df,pred_col)
            df2 = Standardizing_dataset(df,pred_col)
            x_train,x_test,y_train,y_test = train_test_split(df2,y,test_size=test_size,random_state=100)
            return x_train,y_train
        except Exception as e : 
            raise Exception(f"(training_dataset)-failed to find training dataset.\n" + str(e))
            
    def testing_dataset(self,df,pred_col,test_size): 
        """This function create a seperate testing dataset with standardizing the dataset
        ========================================================================================
        ****IMPORTANT - standardization is used here 
        Input : 
            df --> Data without prediction column 
            pred_col --> prediction column name 
            random state = 100 
        output : 
            train_x --> x training data 
            train_y --> y training data 
            test ataset --> test train split size 
        
        """
        try : 
            x,y = predic_feature_col_split(df,pred_col)
            df2 = Standardizing_dataset(df,pred_col)
            x_train,x_test,y_train,y_test = train_test_split(df2,y,test_size=test_size,random_state=100)
            return x_test,y_test
        except Exception as e : 
            raise Exception(f"(testing_dataset)-failed to find testing dataset.\n" + str(e))
    
        
            
    def fit_LinearRegression(self,df,pred_col,test_size): 
        """This function fit Linear regression model 
        ========================================================================================
        ****IMPORTANT - standardization is used here 
        Input : 
            df --> Data without prediction column 
            pred_col --> prediction column name 
            random state = 100 
            test_size = test train split size 
        output : 
            linear = Linear regression fitted model 
        """
        
        try : 
            x,y = predic_feature_col_split(df,pred_col)
            df2 = Standardizing_dataset(df,pred_col)
            x_train,y_train = training_dataset(df,pred_col,test_size)
            x_test,y_test = testing_dataset(df,pred_col,test_size)
            linear = LinearRegression()
            linear.fit(x_train,y_train)
            return linear 
        except Exception as e : 
            raise Exception(f"(fit_LinearRegression) - failed to fit the model.\n" + str(e))
            
    def r2_linear(self,df,pred_col,test_size): 
        """This function find r2 of  Linear regression model 
        ========================================================================================
        ****IMPORTANT - standardization is used here 
        Input : 
            df --> Data without prediction column 
            pred_col --> prediction column name 
            random state = 100 
            test_size = test train split size 
        output : 
            r2_lin = r2 value of linear model 
        """
        try : 
            linear =  fit_LinearRegression(df,pred_col=pred_col,test_size=test_size)
            x_test,y_test = testing_dataset(df,pred_col,test_size)
            r2_lin = linear.score(x_test,y_test)
            return r2_lin 
        except Exception as e : 
            raise Exception(f"(r2_linear) - failed to find r2 value of linear model")
            
    def adj_r2(self,df,pred_col,test_size): 
        """This function find adjusted r2 of  Linear regression model 
        ========================================================================================
        ****IMPORTANT - standardization is used here 
        Input : 
            df --> Data without prediction column 
            pred_col --> prediction column name 
            random state = 100 
            test_size = test train split size 
        output : 
            adjusted_r2 = adjusted r2 value of linear model 
        """
        try : 
            x,y = predic_feature_col_split(df,pred_col)
            n = x.shape[0]
            p = x.shape[1]
            fit_LinearRegression(df,pred_col,test_size) 
            r2 = linear.score(x,y)
            adjusted_r2 = 1-(1-r2)*(n-1)/(n-p-1)
            return adjusted_r2
        except Exception as e : 
            raise Exception(f"(adj_r2) -- failed to find adjusted r2.\n" + str(e))
            
            
    def lasso_alpha(self,df,pred_col,test_size): 
        """This function does cross validation with dataset and finds out alpha value
        ========================================================================================
        ****IMPORTANT - standardization is used here 
        Input : 
            df --> Data without prediction column 
            pred_col --> prediction column name 
            random state = 100 
            test_size = test train split size 
            max_iter = 2000000
            
        output : 
            alpha = regularizing coefficient  
        """
        
        try : 
            x_train,y_train = training_dataset(df,pred_col,test_size)
            lassocv = LassoCV(alphas=None,max_iter=2000000,normalize=True)
            lassocv.fit(x_train,y_train)
            alpha = lassocv.alpha_
            return alpha 
        except Exception as e : 
            raise Exception(f"(lasso_alpha) - falied to find out alpha value ")
            
            
    def r2_lasso(self,df,pred_col,test_size): 
        """This function  finds out lasso r2 value
        ========================================================================================
        ****IMPORTANT - standardization is used here 
        Input : 
            df --> Data without prediction column 
            pred_col --> prediction column name 
            random state = 100 
            test_size = test train split size 
           
            
        output : 
            r2_lass = lasso r2 value   
        """
        try : 
            x_train,y_train = training_dataset(df,pred_col,test_size)
            x_test,y_test = testing_dataset(df,pred_col,test_size)
            alpha = lasso_alpha(self,df,pred_col,test_size)
            lasso = Lasso(alpha=alpha)
            lasso.fit(x_train,y_train)
            r2_lass = lasso.score(x_test,y_test)
            return r2_lass 
        except Exception as e : 
            raise Exception(f"(r2-lasso) - failed to find out lasso r2 value.\n" + str(e))
            
            
    def ridge_alpha(self,df,pred_col,test_size): 
        """This function  finds out ridge alpha value
        ========================================================================================
        ****IMPORTANT - standardization is used here 
        Input : 
            df --> Data without prediction column 
            pred_col --> prediction column name 
            random state = 100 
            test_size = test train split size 
            cv=10
            
           
        output : 
            alpha = Ridge alpha value    
        """
        try : 
            
            x_train,y_train = training_dataset(df,pred_col,test_size)
            x_test,y_test = testing_dataset(df,pred_col,test_size)
            ridgecv = RidgeCV(alphas=np.random.uniform(1,10,50),cv=10,normalize=True)
            ridgecv.fit(x_train,y_train)
            alpha = ridgecv.alpha_
            return alpha 
        
        except Exception as e : 
            raise Exception(f"(ridge_alpha) - failed to find out ridge alpha value.\n" + str(e))
            
    def r2_ridge(self,df,pred_col,test_size): 
        """This function  finds out ridge r2 value
        ========================================================================================
        ****IMPORTANT - standardization is used here 
        Input : 
            df --> Data without prediction column 
            pred_col --> prediction column name 
            random state = 100 
            test_size = test train split size 
            cv=10
            
           
        output : 
            r2_rid = ridge r2 value   
        """
        try : 
            x_train,y_train = training_dataset(df,pred_col,test_size)
            x_test,y_test = testing_dataset(df,pred_col,test_size)
            alpha = ridge_alpha(df,pred_col,test_size) 
            ridge = Ridge(alpha=alpha)
            ridge.fit(x_train,y_train)
            r2_rid = ridge.score(x_test,y_test)
            return r2_rid 
        
        except Exception as e : 
            raise Exception(f"(r2_ridge) - failed to find out ridge r2 value.\n" + str(e))
            
        
    def elastic_net_alpha(self,df,pred_col,test_size): 
        """This function  finds out elastic net alpha  value
        ========================================================================================
        ****IMPORTANT - standardization is used here 
        Input : 
            df --> Data without prediction column 
            pred_col --> prediction column name 
            random state = 100 
            test_size = test train split size 
            cv=10
            
           
        output : 
            alpha = elastic net alpha value   
        """
        
        try : 
            x_train,y_train = training_dataset(df,pred_col,test_size)
            x_test,y_test = testing_dataset(df,pred_col,test_size)
            elasticnetcv = ElasticNetCV(alphas=None,cv = 10)
            elasticnetcv.fit(x_train,y_train)
            alpha = elasticnetcv.alpha_
            return alpha 
        except Exception as e : 
            raise Exception(f"(elastic_net_alpha) - failed to find elastic net alpha value.\n" + str(e))
            
    
    
    def elastic_net_l1_ratio(self,df,pred_col,test_size): 
        """This function  finds out elastic net alpha  value
        ========================================================================================
        ****IMPORTANT - standardization is used here 
        Input : 
            df --> Data without prediction column 
            pred_col --> prediction column name 
            random state = 100 
            test_size = test train split size 
            cv=10
            
           
        output : 
            l1_ratio = elastic net l1_ratio  
        """
        
        try : 
            x_train,y_train = training_dataset(df,pred_col,test_size)
            x_test,y_test = testing_dataset(df,pred_col,test_size)
            elasticnetcv = ElasticNetCV(alphas=None,cv = 10)
            elasticnetcv.fit(x_train,y_train)
            l1_ratio = elasticnetcv.l1_ratio_
            return l1_ratio 
        except Exception as e : 
            raise Exception(f"(elastic_net_l1_ratio) - failed to find elastic net l1 ratio .\n" + str(e))
            
    def r2_elastic_net(self,df,pred_col,test_size): 
        
        
        try : 
            alpha = elastic_net_alpha(df,pred_col,test_size)
            l1_ratio = elastic_net_l1_ratio(self,df,pred_col,test_size)
            x_train,y_train = training_dataset(df,pred_col,test_size)
            x_test,y_test = testing_dataset(df,pred_col,test_size)
            elasticnet = ElasticNet(alpha=alpha,l1_ratio=l1_ratio_)
            elasticnet.fit(x_train,y_train)
            r2_elastic = elasticnet.score(x_test,y_test)
            return r2_elastic 
        except Exception as e : 
            raise Exception(f"(r2_elastic_net)- failed to find out elastic net r2 value")
            
            
    

        
    
    
    
        
        
        
        
        
    
        
    
        
    
        
        
 
        
        
        
        
        
            
    
    
    
    
        
        
    
        
        
        
        
        
        
        
        
        
            
            
            
    
            
        
        
        

In [20]:
def add(): 
    try : 
        a = 10 
        b = "vina" 
        result = a/b 
        return result

    except Exception as e :
        raise (Exception(f" - Type Error\n" + str(e))) 
    

In [25]:
def add(a,b): 
    return a/b

add(b=2,a=10)

5.0