# Import Libraries

In [25]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import warnings
warnings.filterwarnings("ignore")


# Load and Prepare Data

In [None]:
# Load wine dataset
df=pd.read_csv("./train.csv")
X = df.drop('price_range', axis=1)
y = df['price_range']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Feature scaling
# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)


# Train Baseline Models and Evaluate

In [32]:
# Define models
models = {
    'Logistic Regression': LogisticRegression(max_iter=500),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'SVM': SVC()
}

results = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    results[name] = {
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred, average='weighted'),
        'Recall': recall_score(y_test, y_pred, average='weighted'),
        'F1-score': f1_score(y_test, y_pred, average='weighted')
    }

# Display results
results_df = pd.DataFrame(results).T
print("\n🔎 Baseline Model Results:")
print(results_df.sort_values(by='F1-score', ascending=False))


ValueError: Unknown label type: 'continuous'

 # Hyperparameter Tuning

GridSearchCV for SVM

In [None]:
param_grid_svm = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}

grid_svm = GridSearchCV(SVC(), param_grid_svm, cv=5, scoring='f1_weighted')
grid_svm.fit(X_train, y_train)

best_svm = grid_svm.best_estimator_
y_pred_svm = best_svm.predict(X_test)

print("\n✅ Best SVM via GridSearchCV:")
print(classification_report(y_test, y_pred_svm, target_names=wine.target_names))



✅ Best SVM via GridSearchCV:
              precision    recall  f1-score   support

     class_0       1.00      1.00      1.00        12
     class_1       0.93      1.00      0.97        14
     class_2       1.00      0.90      0.95        10

    accuracy                           0.97        36
   macro avg       0.98      0.97      0.97        36
weighted avg       0.97      0.97      0.97        36



RandomizedSearchCV for Random Forest

In [None]:
param_dist_rf = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 5, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

rand_search_rf = RandomizedSearchCV(
    RandomForestClassifier(),
    param_distributions=param_dist_rf,
    n_iter=10,
    cv=5,
    scoring='f1_weighted',
    random_state=42
)
rand_search_rf.fit(X_train, y_train)

best_rf = rand_search_rf.best_estimator_
y_pred_rf = best_rf.predict(X_test)

print("\n✅ Best Random Forest via RandomizedSearchCV:")
print(classification_report(y_test, y_pred_rf, target_names=wine.target_names))



✅ Best Random Forest via RandomizedSearchCV:
              precision    recall  f1-score   support

     class_0       1.00      1.00      1.00        12
     class_1       1.00      1.00      1.00        14
     class_2       1.00      1.00      1.00        10

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36



# Add Tuned Models to Comparison

In [None]:
# Add tuned models to results
results['Tuned SVM'] = {
    'Accuracy': accuracy_score(y_test, y_pred_svm),
    'Precision': precision_score(y_test, y_pred_svm, average='weighted'),
    'Recall': recall_score(y_test, y_pred_svm, average='weighted'),
    'F1-score': f1_score(y_test, y_pred_svm, average='weighted')
}

results['Tuned RF'] = {
    'Accuracy': accuracy_score(y_test, y_pred_rf),
    'Precision': precision_score(y_test, y_pred_rf, average='weighted'),
    'Recall': recall_score(y_test, y_pred_rf, average='weighted'),
    'F1-score': f1_score(y_test, y_pred_rf, average='weighted')
}

# Final comparison
final_results_df = pd.DataFrame(results).T
print("\n🏆 Final Model Comparison:")
print(final_results_df.sort_values(by='F1-score', ascending=False))



🏆 Final Model Comparison:
                     Accuracy  Precision    Recall  F1-score
Random Forest        1.000000   1.000000  1.000000  1.000000
Tuned RF             1.000000   1.000000  1.000000  1.000000
Logistic Regression  0.972222   0.974074  0.972222  0.971970
SVM                  0.972222   0.974074  0.972222  0.971970
Tuned SVM            0.972222   0.974074  0.972222  0.971970
Decision Tree        0.944444   0.944444  0.944444  0.944444


# Interpretation Example

In [None]:
best_model_name = final_results_df['F1-score'].idxmax()
print(f"\n⭐️ The best performing model is: {best_model_name}")



⭐️ The best performing model is: Random Forest
