In [1]:
def ReturnR2value(df, choice_str='R2'):
    '''Accepts a dataframe returns R2 value, MSE, Intercept or Coefficients
    Depending on Choice'''
    # Author: Alexei Marcilio
    # Date: Nov 20, 2020
    # Ver 1.0
    # We assume the last column is the target
    X = df.iloc[:,0:-1]
    y = df.iloc[:,-1]
    # Scale and fit
    sc = StandardScaler()
    X = sc.fit_transform(X)
    # Split data into train and test
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 11)
    # Create and train the model
    from sklearn.linear_model import LinearRegression
    #Create the model :
    regressor = LinearRegression()
    #Train the model :
    regressor.fit(X_train, y_train)
    # Predict
    y_pred = regressor.predict(X_test)
    from sklearn.metrics import mean_squared_error , r2_score
    mse = mean_squared_error(y_test, y_pred)

    # Root Mean Squared Error:
    root_mse = np.sqrt(mse)
    
    coeff_X = pd.DataFrame(regressor.coef_, index=df.columns[:-1], columns=['Coefficient'])
    
    df_ActPred = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
    
    #R_squared : 
    if choice_str == 'R2':
        return round(r2_score(y_test, y_pred)*100,2)
    elif choice_str == 'MSE':
        return root_mse
    elif choice_str == 'Intercept':
        return regressor.intercept_
    elif choice_str == 'Coefficients':
        return coeff_X
    elif choice_str == 'ActualVsPred':
        return df_ActPred
            

In [2]:
def RemoveOutlierDF(df):
    '''Accepts a dataframe returns a dataframe with all outliers based
    on IQR removed'''
    # Author: Alexei Marcilio
    # Date: Nov 20, 2020
    # Ver 1.0
    # Function takes a dataframe and removes all outliers
    # based in IQR
    # returns a new df
    from scipy import stats
    # IQR
    Q1 = df.quantile(0.25)
    Q3 = df.quantile(0.75)
    IQR = Q3 - Q1
    return df[~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR))).any(axis=1)]