# Multilayered perceptron (MLP)

### 0. MLP for regression

In [179]:
import pandas as pd
import seaborn as sns

a)

In [180]:
df_mpg = sns.load_dataset("mpg").drop("name", axis = 1)
df_mpg.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin
0,18.0,8,307.0,130.0,3504,12.0,70,usa
1,15.0,8,350.0,165.0,3693,11.5,70,usa
2,18.0,8,318.0,150.0,3436,11.0,70,usa
3,16.0,8,304.0,150.0,3433,12.0,70,usa
4,17.0,8,302.0,140.0,3449,10.5,70,usa


b)

In [181]:
df_mpg["origin"].value_counts()

usa       249
japan      79
europe     70
Name: origin, dtype: int64

In [182]:
df_mpg.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   mpg           398 non-null    float64
 1   cylinders     398 non-null    int64  
 2   displacement  398 non-null    float64
 3   horsepower    392 non-null    float64
 4   weight        398 non-null    int64  
 5   acceleration  398 non-null    float64
 6   model_year    398 non-null    int64  
 7   origin        398 non-null    object 
dtypes: float64(4), int64(3), object(1)
memory usage: 25.0+ KB


c)

In [183]:
df_mpg.query("horsepower.isna()")

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin
32,25.0,4,98.0,,2046,19.0,71,usa
126,21.0,6,200.0,,2875,17.0,74,usa
330,40.9,4,85.0,,1835,17.3,80,europe
336,23.6,4,140.0,,2905,14.3,80,usa
354,34.5,4,100.0,,2320,15.8,81,europe
374,23.0,4,151.0,,3035,20.5,82,usa


In [184]:
nan_index = df_mpg.query("horsepower.isna()").index
nan_index

Int64Index([32, 126, 330, 336, 354, 374], dtype='int64')

In [185]:
bins = pd.IntervalIndex.from_tuples([(69, 73), (74, 77), (78, 82)])

df_mpg["model_year"] = pd.cut(df_mpg["model_year"], bins = bins)
df_mpg = pd.get_dummies(df_mpg, columns=["model_year", "origin"], drop_first=True)

In [186]:
from sklearn.impute import KNNImputer

In [187]:
imputer = KNNImputer(n_neighbors=2)
imputed = imputer.fit_transform(df_mpg)

df_imputed = pd.DataFrame(imputed, columns = df_mpg.columns)
df_mpg["horsepower"] = round(df_imputed["horsepower"])

d)

In [188]:
df_mpg[df_mpg.index.isin(nan_index)]

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,"model_year_(74, 77]","model_year_(78, 82]",origin_japan,origin_usa
32,25.0,4,98.0,66.0,2046,19.0,0,0,0,1
126,21.0,6,200.0,106.0,2875,17.0,0,0,0,1
330,40.9,4,85.0,60.0,1835,17.3,0,1,0,0
336,23.6,4,140.0,104.0,2905,14.3,0,1,0,1
354,34.5,4,100.0,63.0,2320,15.8,0,1,0,0
374,23.0,4,151.0,86.0,3035,20.5,0,1,0,1


None of the imputed values seems to be unreasonable.

e)

In [189]:
from sklearn.model_selection import train_test_split

X, y = df_mpg.drop("mpg", axis = 1).to_numpy(), df_mpg["mpg"].to_numpy()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)
X_train_small, X_val, y_train_small, y_val = train_test_split(X_train, y_train, test_size=.3, random_state=42)
