### Imports

In [35]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

### Call Data and Specify Sample Modelling Data

In [2]:
train = pd.read_csv("Data/train.csv")

In [3]:
dependent = train["SalePrice"]
l_dependent = np.log(train["SalePrice"])

In [4]:
categoricals = ["MSSubClass", "MSZoning", "Alley"]

In [5]:
numericals = ["LotFrontage", "LotArea"]

### Initialise Functions for Pipes

In [6]:
cat_imputer = SimpleImputer(strategy="constant", fill_value="None")

In [7]:
num_imputer = SimpleImputer(strategy="median")

In [8]:
scaler = StandardScaler()

In [9]:
encoder = OneHotEncoder(handle_unknown="ignore", sparse=False)

### Initialise Pipes

In [10]:
cat_pipe = Pipeline([
    ('impute', cat_imputer),
    ('encode', encoder)
])

In [11]:
num_pipe = Pipeline([
    ('impute', num_imputer),
    ('scale', scaler),
])

### Compose Pipes Using Column Transformer

In [12]:
data_preprocessor = ColumnTransformer(
    transformers=[
        ('categorical', cat_pipe, categoricals),
        ('numerical', num_pipe, numericals)
    ]
)

In [14]:
data_pipe = Pipeline([
    ('preprocessor', data_preprocessor)
])

In [24]:
X = data_pipe.fit_transform(train[numericals + categoricals])

In [25]:
lr = LinearRegression()

In [32]:
lr.fit(X, l_dependent)

In [33]:
lr.score(X, l_dependent)

0.4452265968760091

In [37]:
preds = lr.predict(X)

In [41]:
np.sqrt(mean_squared_error(preds, l_dependent))

0.2974222746822299

In [47]:
pd.DataFrame(np.exp(preds)).head(20)

Unnamed: 0,0
0,210744.632982
1,172028.049108
2,216929.542337
3,167322.926086
4,234065.058705
5,160916.120899
6,169566.955651
7,216457.411234
8,116937.277272
9,121854.346183


In [48]:
pd.DataFrame(dependent).head(20)

Unnamed: 0,SalePrice
0,208500
1,181500
2,223500
3,140000
4,250000
5,143000
6,307000
7,200000
8,129900
9,118000
