### Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

### Call Data and Specify Sample Modelling Data

In [2]:
train = pd.read_csv("Data/train.csv")

In [3]:
dependent = train["SalePrice"]
l_dependent = np.log(train["SalePrice"])

In [4]:
categoricals = ["MSSubClass", "MSZoning", "Alley"]

In [5]:
numericals = ["LotFrontage", "LotArea"]

### Initialise Functions for Pipes

In [6]:
cat_imputer = SimpleImputer(strategy="constant", fill_value="None")

In [7]:
num_imputer = SimpleImputer(strategy="median")

In [8]:
scaler = StandardScaler()

In [9]:
encoder = OneHotEncoder(handle_unknown="ignore", sparse=False)

### Initialise Pipes

In [10]:
cat_pipe = Pipeline([
    ('impute', cat_imputer),
    ('encode', encoder)
])

In [11]:
num_pipe = Pipeline([
    ('impute', num_imputer),
    ('scale', scaler),
])

### Compose Pipes Using Column Transformer

In [12]:
data_preprocessor = ColumnTransformer(
    transformers=[
        ('categorical', cat_pipe, categoricals),
        ('numerical', num_pipe, numericals)
    ]
)

In [13]:
data_pipe = Pipeline([
    ('preprocessor', data_preprocessor)
])

In [14]:
X = data_pipe.fit_transform(train[numericals + categoricals])

### Fit Regression on Piped Data

In [15]:
lr = LinearRegression()

In [16]:
lr.fit(X, l_dependent)