Class Nov 15

In [None]:
# Code Here
import pandas as pd 
import numpy as np
from palmerpenguins import load_penguins
from sklearn.pipeline import Pipeline
from sklearn.compose import make_column_selector, ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, PolynomialFeatures
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.metrics import accuracy_score

In [None]:
penguins = load_penguins()
penguins = penguins.dropna()
penguins.head()

In [None]:
penguins['gentoo'] = (penguins['species'] == 'Gentoo').astype(int)
X = penguins.drop(["bill_depth_mm", "species", "gentoo"], axis = 1)
y = penguins["gentoo"]

X_train, X_test, y_train, y_test = train_test_split(X, y)

In [None]:
lg = LogisticRegression(solver = "saga", max_iter = 1000)
enc = OneHotEncoder(drop = "first")

ct = ColumnTransformer(
    [
        ("dummify", OneHotEncoder(sparse_output = False, handle_unknown='error', drop="first"), make_column_selector(dtype_include=object)),
        ("scaler", StandardScaler(), make_column_selector(dtype_include=np.number))
    ], remainder="drop"
)

my_pipeline = Pipeline(
    [("preprocessing", ct), 
     ("model", lg)]
)

In [None]:
alphas = {
    "model__penalty": ['elasticnet'],
    "model__l1_ratio": [.0001, .001, .01, .1, 1]
}
gscv = GridSearchCV(my_pipeline, alphas, cv = 5, scoring='accuracy')
gscv_fitted = gscv.fit(X, y)
gscv_fitted.cv_results_["mean_test_score"]
gscv_fitted.best_estimator_

In [None]:
lg = LogisticRegression(solver = "saga", max_iter = 1000, penalty = "elasticnet", l1_ratio = .0001)
enc = OneHotEncoder(drop = "first")

ct = ColumnTransformer(
    [
        ("dummify", OneHotEncoder(sparse_output = False, handle_unknown='error', drop="first"), make_column_selector(dtype_include=object)),
        ("scaler", StandardScaler(), make_column_selector(dtype_include=np.number))
    ], remainder="drop"
)

my_pipeline = Pipeline(
    [("preprocessing", ct), 
     ("model", lg)]
)

fitted_pipeline = my_pipeline.fit(X_train, y_train)

y_pred = fitted_pipeline.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)