In [17]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [18]:
try:
    del pd.DataFrame.missing
except AttributeError:
    pass

In [21]:
@pd.api.extensions.register_dataframe_accessor("lmodels")
class LinearModels:
    def __init__(self, pandas_obj) -> pd.DataFrame:
        self._df = pandas_obj
        
    def multilinear(self, xcols, ycol) -> np.array:
        """
            This function retrieves variables attributed to a train test split with sklearn.model_selection
            Args: xcols = the names of x columns to use in our multilinear models as predictors of ycol (passed as a list if it is plural),
                  ycol = the variable to predict from dataframe
            Output: (y_test, y_pred) the adjusted values to our linear model, from these we can retrieve a residual plot.
        """
        # Data pipeline
        X = self._df[xcols].values
        y = self._df[ycol].values
        
        # Normalize data
        X_train, X_test, y_train, y_test = train_test_split(X, y)
        sc_x = StandardScaler().fit(X)
        sc_y = StandardScaler().fit(y)
        
        # Selecting data
        X_train = sc_x.transform(X_train)
        X_test  = sc_x.transform(X_test)
        y_train = sc_y.transform(y_train)
        y_test  = sc_y.transform(y_test)
        
        # Training the model
        model = LinearRegression()
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        return y_test, y_pred
    
    def multilinear(self, xcol, ycol) -> np.array:
        """
            This function retrieves variables attributed to a train test split with sklearn.model_selection
            Args: xcols = the name of x column to use in our single linear models as predictors of ycol,
                  ycol = the variable to predict from dataframe
            Output: (y_test, y_pred) the adjusted values to our linear model, from these we can retrieve a residual plot.
        """
        # Data pipeline
        X = self._df[xcol].values.reshape(-1,1)
        y = self._df[ycol].values.reshape(-1,1)
        
        # Normalize data
        X_train, X_test, y_train, y_test = train_test_split(X, y)
        sc_x = StandardScaler().fit(X)
        sc_y = StandardScaler().fit(y)
        
        # Selecting data
        X_train = sc_x.transform(X_train)
        X_test  = sc_x.transform(X_test)
        y_train = sc_y.transform(y_train)
        y_test  = sc_y.transform(y_test)
        
        # Training the model
        model = LinearRegression()
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        return y_test, y_pred

  class LinearModels:
