In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Models
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB


In [2]:
# Load Wine dataset
wine = load_wine()
X = wine.data
y = wine.target

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [3]:
# Define models to train
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(),
    "SVM": SVC(),
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB()
}

# Store evaluation results
results = []

# Train and evaluate each model
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    results.append({
        "Model": name,
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred, average='weighted'),
        "Recall": recall_score(y_test, y_pred, average='weighted'),
        "F1-Score": f1_score(y_test, y_pred, average='weighted')
    })


In [4]:
# Show initial evaluation results
results_df = pd.DataFrame(results)
print("Initial Model Performance:\n")
print(results_df.sort_values(by="F1-Score", ascending=False))


Initial Model Performance:

                 Model  Accuracy  Precision    Recall  F1-Score
0  Logistic Regression  1.000000   1.000000  1.000000  1.000000
1        Random Forest  1.000000   1.000000  1.000000  1.000000
2                  SVM  1.000000   1.000000  1.000000  1.000000
4          Naive Bayes  1.000000   1.000000  1.000000  1.000000
3        Decision Tree  0.944444   0.951389  0.944444  0.944856


In [5]:
# Hyperparameter tuning using GridSearchCV for Random Forest
param_grid_rf = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5]
}

grid_rf = GridSearchCV(
    RandomForestClassifier(),
    param_grid=param_grid_rf,
    cv=5,
    scoring='f1_weighted',
    n_jobs=-1
)

grid_rf.fit(X_train, y_train)
print("Best Parameters (GridSearchCV - RF):", grid_rf.best_params_)


Best Parameters (GridSearchCV - RF): {'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 50}


In [6]:
# Hyperparameter tuning using RandomizedSearchCV for SVM
from scipy.stats import uniform
param_dist_svm = {
    'C': uniform(0.1, 10),
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto']
}

random_svm = RandomizedSearchCV(
    SVC(),
    param_distributions=param_dist_svm,
    n_iter=20,
    cv=5,
    scoring='f1_weighted',
    random_state=42,
    n_jobs=-1
)

random_svm.fit(X_train, y_train)
print("Best Parameters (RandomizedSearchCV - SVM):", random_svm.best_params_)


Best Parameters (RandomizedSearchCV - SVM): {'C': np.float64(0.30584494295802445), 'gamma': 'auto', 'kernel': 'rbf'}


In [7]:
# Final evaluation of best models from tuning
best_rf = grid_rf.best_estimator_
best_svm = random_svm.best_estimator_

for model, name in [(best_rf, "Tuned Random Forest"), (best_svm, "Tuned SVM")]:
    y_pred = model.predict(X_test)
    print(f"\n{name} Classification Report:\n")
    print(classification_report(y_test, y_pred))



Tuned Random Forest Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      1.00      1.00        14
           2       1.00      1.00      1.00         8

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36


Tuned SVM Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      1.00      1.00        14
           2       1.00      1.00      1.00         8

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36



In [8]:
# Compare the final tuned models
final_results = [
    {
        "Model": "Tuned Random Forest",
        "Accuracy": accuracy_score(y_test, best_rf.predict(X_test)),
        "Precision": precision_score(y_test, best_rf.predict(X_test), average='weighted'),
        "Recall": recall_score(y_test, best_rf.predict(X_test), average='weighted'),
        "F1-Score": f1_score(y_test, best_rf.predict(X_test), average='weighted')
    },
    {
        "Model": "Tuned SVM",
        "Accuracy": accuracy_score(y_test, best_svm.predict(X_test)),
        "Precision": precision_score(y_test, best_svm.predict(X_test), average='weighted'),
        "Recall": recall_score(y_test, best_svm.predict(X_test), average='weighted'),
        "F1-Score": f1_score(y_test, best_svm.predict(X_test), average='weighted')
    }
]

final_results_df = pd.DataFrame(final_results)
print("\nFinal Tuned Model Performance:\n")
print(final_results_df.sort_values(by='F1-Score', ascending=False))



Final Tuned Model Performance:

                 Model  Accuracy  Precision  Recall  F1-Score
0  Tuned Random Forest       1.0        1.0     1.0       1.0
1            Tuned SVM       1.0        1.0     1.0       1.0
