<a href="https://colab.research.google.com/github/Krish6115/MLLab/blob/main/Lab7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

There are several ways to upload files to Google Colab:

1.  **Using the Files tab:**
    *   Click on the folder icon on the left sidebar (Files).
    *   Click the "Upload to session storage" icon (an upward-pointing arrow).
    *   Select the file from your local computer.
    *   *Note: Files uploaded this way are temporary and will be deleted when the Colab session ends.*

2.  **Mounting Google Drive:**
    *   This is the recommended method for persistent storage.
    *   Run the following code cell:

In [None]:
        from google.colab import files
        uploaded = files.upload()

Saving Crop_recommendation.csv to Crop_recommendation.csv
User uploaded file "Crop_recommendation.csv" with length 150034 bytes


In [None]:
# ============ LAB 7: MODEL COMPARISON, HYPERPARAM TUNING ============

import pandas as pd
import numpy as np
from itertools import product

from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import Perceptron

# 1. LOAD AND PREPROCESS DATA
def load_and_prepare_data(filepath):
    df = pd.read_csv(filepath)
    X = df.drop('label', axis=1).values
    y = LabelEncoder().fit_transform(df['label'])
    X = StandardScaler().fit_transform(X)
    return X, y

# 2. SPLIT THE DATA
def split_data(X, y, test_size=0.2, random_state=42):
    return train_test_split(X, y, test_size=test_size, stratify=y, random_state=random_state)

# 3. CALCULATE METRICS
def get_metrics(y_true, y_pred):
    return dict(
        accuracy=accuracy_score(y_true, y_pred),
        precision=precision_score(y_true, y_pred, average='weighted', zero_division=0),
        recall=recall_score(y_true, y_pred, average='weighted', zero_division=0),
        f1=f1_score(y_true, y_pred, average='weighted', zero_division=0)
    )

# 4. RANDOMIZEDSEARCHCV WRAPPER
def randomized_hyperparam_search(model, param_grid, Xtr, ytr, n_iter=5):
    # Calculate the total number of parameter combinations
    param_combinations = 1
    for key in param_grid:
        param_combinations *= len(param_grid[key])

    # Use the minimum of n_iter and the total number of combinations
    actual_n_iter = min(n_iter, param_combinations)

    grid = RandomizedSearchCV(
        model, param_distributions=param_grid, n_iter=actual_n_iter, cv=3,
        scoring='accuracy', n_jobs=-1, random_state=42
    )
    grid.fit(Xtr, ytr)
    return grid.best_estimator_, grid.best_params_

# 5. TRAIN MODELS, TABULATE RESULTS
def evaluate_classifiers(Xtr, Xte, ytr, yte):
    classifiers = [
        ('SVC', SVC(probability=True, random_state=42), {'C':[1,10], 'kernel':['linear','rbf']}),
        ('DecisionTree', DecisionTreeClassifier(random_state=42), {'max_depth':[None,5,10]}),
        ('RandomForest', RandomForestClassifier(random_state=42), {'n_estimators':[50,100], 'max_depth':[None,10]}),
        ('AdaBoost', AdaBoostClassifier(random_state=42), {'n_estimators':[50,100]}),
        ('NaiveBayes', GaussianNB(), {}),
        ('MLP', MLPClassifier(max_iter=500, random_state=42), {'hidden_layer_sizes': [(50,),(100,)], 'activation':['relu','tanh']}),
        ('Perceptron', Perceptron(random_state=42), {'alpha':[0.0001,0.001]})
    ]
    rows = []
    for name, clf, param_grid in classifiers:
        if param_grid:
            fitted, best_params = randomized_hyperparam_search(clf, param_grid, Xtr, ytr)
        else:
            fitted = clf.fit(Xtr, ytr)
            best_params = 'default'
        train_pred = fitted.predict(Xtr)
        test_pred = fitted.predict(Xte)
        train_m = get_metrics(ytr, train_pred)
        test_m = get_metrics(yte, test_pred)
        rows.append({
            'Model': name,
            'BestParams': best_params,
            'TrainAcc': round(train_m['accuracy'],4),
            'TestAcc': round(test_m['accuracy'],4),
            'TrainF1': round(train_m['f1'],4),
            'TestF1': round(test_m['f1'],4)
        })
    return pd.DataFrame(rows)

# ===================== MAIN EXECUTION =====================
X, y = load_and_prepare_data('Crop_recommendation.csv')
Xtr, Xte, ytr, yte = split_data(X, y)
results_df = evaluate_classifiers(Xtr, Xte, ytr, yte)
print("\n===== Model Comparison Table =====\n")
print(results_df.to_string(index=False))




===== Model Comparison Table =====

       Model                                          BestParams  TrainAcc  TestAcc  TrainF1  TestF1
         SVC                       {'kernel': 'linear', 'C': 10}    0.9903   0.9886   0.9903  0.9886
DecisionTree                                 {'max_depth': None}    1.0000   0.9795   1.0000  0.9794
RandomForest             {'n_estimators': 50, 'max_depth': None}    1.0000   0.9955   1.0000  0.9955
    AdaBoost                               {'n_estimators': 100}    0.3142   0.3136   0.2071  0.2049
  NaiveBayes                                             default    0.9949   0.9955   0.9949  0.9954
         MLP {'hidden_layer_sizes': (50,), 'activation': 'relu'}    0.9955   0.9932   0.9955  0.9932
  Perceptron                                   {'alpha': 0.0001}    0.8676   0.8750   0.8563  0.8611
