In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# filter warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = sns.load_dataset("tips")
df


In [None]:
df.time.unique()

## Pipeline

Encoding , Scaling, Missing Value treatment

In [None]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
df["time"] = encoder.fit_transform(df["time"])

In [None]:
df

In [None]:
# target is to predict time

In [None]:
df.time.unique() # dinner is 0 and lunch is 1

In [None]:
X =  df.drop('time',axis=1)
y = df["time"]

In [None]:
X

In [None]:
y

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=1)

In [None]:
df.isnull().sum()

## Pipeline
### handing missing value
### data encoding
###  feature scaling 

In [None]:
from sklearn.impute import SimpleImputer # for missing values
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler

# Pipeline is a sequence of data transformation
# column transformer >> groups all the pipeline steps for each of the column

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer


In [None]:
df

In [None]:
categorical_columns = ["sex","smoker","day"]
numerical_columns = ["total_bill","tip","size"]

In [None]:
# feature engineering automation using pipeline and column transformer

# below pipeline is for categorical columns
numerical_pipeline = Pipeline(steps=[('imputation',SimpleImputer(strategy="median")),
                                     ('scaling',StandardScaler())])

# below pipeline is for categorical columns
categorical_pipeline = Pipeline(steps=[('imputation',SimpleImputer(strategy="most_frequent")),
                                     ('encoding',OneHotEncoder())])

In [None]:
preprocessor = ColumnTransformer([("num_pipeline",numerical_pipeline,numerical_columns),
                   ("cat_pipeline",categorical_pipeline,categorical_columns)])

In [None]:
X_train = preprocessor.fit_transform(X_train)

In [None]:
X_test = preprocessor.transform(X_test)

In [None]:
X_train

In [None]:
X_test

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression


models = {"svc":SVC(),
          "dt_clf":DecisionTreeClassifier(),
          "log_clf":LogisticRegression()}

In [None]:
models.values()

In [None]:
models.keys()

In [None]:
from sklearn.metrics import accuracy_score

def model_train_eval(X_train, y_train, X_test, y_test, models):
    evaluation= {}
    for i in range(len(models)):
        model = list(models.values())[i]
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        model_score= accuracy_score(y_test,y_pred)
        evaluation[list(models.keys())[i]] = model_score
    return evaluation


In [None]:
model_train_eval(X_train, y_train, X_test, y_test, models)