In [45]:
import numpy as np
import pandas as pd

### Dataset

In [46]:
DATA_PATH = (
    "C:/Users/Jan/OneDrive/_Coding/UdemyML/"
    "Chapter13_CaseStudies/CaseStudyWebServer/employee_data.csv"
)

In [47]:
df = pd.read_csv(DATA_PATH)

df = df.drop("Unnamed: 0", axis=1)
df = df.drop("id", axis=1)

In [48]:
df.head()

Unnamed: 0,groups,age,healthy_eating,active_lifestyle,salary
0,A,36,5,5,2297
1,A,55,3,5,1134
2,A,61,8,1,4969
3,O,29,3,6,902
4,O,34,6,2,3574


In [49]:
data = df.to_numpy()

x = data[:, :-1]
y = data[:, -1]

categorical_features = [0]
numerical_features = [1, 2, 3]

print(f"x shape: {x.shape}")
print(f"y shape: {y.shape}")

x shape: (1000, 4)
y shape: (1000,)


### Sklearn Imports

In [50]:
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import StandardScaler

In [51]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

In [52]:
regr = RandomForestRegressor(n_estimators=100)

In [53]:
numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])

categorical_transformer = Pipeline(steps=[("ordinal", OrdinalEncoder())])

preprocessor_odinal = ColumnTransformer(
    transformers=[
        ("numeric", numeric_transformer, numerical_features),
        ("categorical", categorical_transformer, categorical_features),
    ]
)

In [54]:
pipe_ordinal = Pipeline(
    steps=[("preprocessor_odinal", preprocessor_odinal), ("regressor", regr)]
)

In [55]:
pipe_ordinal.fit(x_train, y_train)
score = pipe_ordinal.score(x_test, y_test)

print(f"Score: {score}")

Score: 0.9954610702146521


In [56]:
import pickle


pickle.dump(regr, open("model.pkl", "wb"))
regr = pickle.load(open("model.pkl", "rb"))

x_sample = np.array([0, 40, 10, 10])
y_pred = regr.predict([x_sample])

print(f"Pred: {y_pred}")

Pred: [4303.44]
