Title: Binary vs. Multi-Class Classification<br>

Task 1:<br>
Binary Classification: Predict if a website visitor will click a button (Click or No Click).<br>
Use a web visitor interaction dataset.<br>
Task: Implement binary classification for click prediction.

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

def validate_numeric_column(col, col_name):
    if not all(isinstance(i, (int, float)) for i in col):
        raise ValueError(f'{col_name} must be numeric.')

def binary_classification_click_prediction():
    data = {
        'time_on_page': [30, 45, 25, 50, 20, 35, 60, 15, 40, 55],
        'scroll_depth': [80, 90, 60, 95, 50, 70, 100, 40, 85, 90],
        'device': ['mobile', 'desktop', 'mobile', 'desktop', 'mobile', 'desktop', 'desktop', 'mobile', 'desktop', 'mobile'],
        'clicked': ['yes', 'no', 'yes', 'yes', 'no', 'no', 'yes', 'no', 'yes', 'no']
    }

    df = pd.DataFrame(data)

    validate_numeric_column(df['time_on_page'], 'time_on_page')
    validate_numeric_column(df['scroll_depth'], 'scroll_depth')

    le = LabelEncoder()
    df['device'] = le.fit_transform(df['device'])
    df['clicked'] = le.fit_transform(df['clicked'])

    X = df.drop('clicked', axis=1)
    y = df['clicked']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

    model = LogisticRegression()
    try:
        model.fit(X_train, y_train)
    except Exception as e:
        print("Model training failed:", e)
        return

    y_pred = model.predict(X_test)
    print("Binary Classification - Click Prediction")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print(classification_report(y_test, y_pred))

binary_classification_click_prediction()


Binary Classification - Click Prediction
Accuracy: 0.3333333333333333
              precision    recall  f1-score   support

           0       0.33      1.00      0.50         1
           1       0.00      0.00      0.00         2

    accuracy                           0.33         3
   macro avg       0.17      0.50      0.25         3
weighted avg       0.11      0.33      0.17         3



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Task 2:<br>
Multi-Class Classification: Recognize handwritten digits (0-9).<br>
Use the MNIST dataset.<br>
Task: Develop a model that correctly classifies each handwritten digit.<br>


In [2]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

def multi_class_mnist_classification():
    digits = load_digits()
    X, y = digits.data, digits.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = LogisticRegression(max_iter=10000)
    try:
        model.fit(X_train, y_train)
    except Exception as e:
        print("Model training failed:", e)
        return

    y_pred = model.predict(X_test)
    print("Multi-Class Classification - MNIST")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print(classification_report(y_test, y_pred))

multi_class_mnist_classification()


Multi-Class Classification - MNIST
Accuracy: 0.975
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        33
           1       0.97      1.00      0.98        28
           2       1.00      1.00      1.00        33
           3       0.97      0.97      0.97        34
           4       1.00      0.98      0.99        46
           5       0.92      0.96      0.94        47
           6       0.97      0.97      0.97        35
           7       1.00      0.97      0.99        34
           8       0.97      0.97      0.97        30
           9       0.97      0.95      0.96        40

    accuracy                           0.97       360
   macro avg       0.98      0.98      0.98       360
weighted avg       0.98      0.97      0.98       360



Task 3:<br>
Multi-Class Classification: Classify a flower species based on petal and sepal measurements.<br>
Use the Iris dataset.<br>
Task: Use features to classify into three species: Setosa, Versicolor, or Virginica.


In [3]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

def multi_class_iris_classification():
    iris = load_iris()
    X, y = iris.data, iris.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = LogisticRegression(max_iter=1000)
    try:
        model.fit(X_train, y_train)
    except Exception as e:
        print("Model training failed:", e)
        return

    y_pred = model.predict(X_test)
    print("Multi-Class Classification - Iris")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print(classification_report(y_test, y_pred))

multi_class_iris_classification()


Multi-Class Classification - Iris
Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

