<a href="https://colab.research.google.com/github/chidambarambaskaran/MachineLearning/blob/main/PipeLine_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [46]:
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, f1_score

data = pd.read_csv('Social_Network_Ads.csv')
x = data.drop('Purchased', axis=1)
y = data['Purchased']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

numerical_cols = x_train.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_cols = x_train.select_dtypes(include=['object']).columns.tolist()

numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Support Vector Classifier': SVC(kernel='rbf'),
    'Gaussian Naive Bayes': GaussianNB(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Kernel SVM': SVC(kernel='linear'),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42)
}

for model_name, model in models.items():

    model_pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('classifier', model)
    ])

    model_pipeline.fit(x_train, y_train)

    y_pred = model_pipeline.predict(x_test)

    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    print(f"{model_name}:")
    print(f"  Accuracy: {accuracy:.2f}")
    print(f"  F1 Score: {f1:.2f}")
    print("-" * 50)

Logistic Regression:
  Accuracy: 0.86
  F1 Score: 0.78
--------------------------------------------------
Support Vector Classifier:
  Accuracy: 0.93
  F1 Score: 0.90
--------------------------------------------------
Gaussian Naive Bayes:
  Accuracy: 0.94
  F1 Score: 0.91
--------------------------------------------------
K-Nearest Neighbors:
  Accuracy: 0.91
  F1 Score: 0.88
--------------------------------------------------
Kernel SVM:
  Accuracy: 0.86
  F1 Score: 0.78
--------------------------------------------------
Decision Tree:
  Accuracy: 0.84
  F1 Score: 0.76
--------------------------------------------------
Random Forest:
  Accuracy: 0.89
  F1 Score: 0.85
--------------------------------------------------
