<a href="https://colab.research.google.com/github/abishinjoseph/Encored/blob/main/Model_Selection_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import GridSearchCV

# Load the dataset
file_path = "wine.data"
column_names = ['Class', 'Alcohol', 'Malic_acid', 'Ash', 'Alcalinity_of_ash', 'Magnesium', 'Total_phenols', 'Flavanoids', 'Nonflavanoid_phenols', 'Proanthocyanins', 'Color_intensity', 'Hue', 'OD280/OD315_of_diluted_wines', 'Proline']
wine_data = pd.read_csv(file_path, names=column_names)

# Separate features and target variable
X = wine_data.drop('Class', axis=1)
y = wine_data['Class']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define models
models = {
    'Random Forest': RandomForestClassifier(),
    'SVM': SVC(),
    'KNN': KNeighborsClassifier(),
    'MLP': MLPClassifier()
}

# Model selection with cross-validation
for name, model in models.items():
    pipeline = make_pipeline(StandardScaler(), model)
    scores = cross_val_score(pipeline, X_train, y_train, cv=5)
    print(f"{name}: Mean accuracy: {np.mean(scores)}, Standard deviation: {np.std(scores)}")

# Hyperparameter tuning
param_grid = {
    'Random Forest': {'randomforestclassifier__n_estimators': [50, 100, 200]},
    'SVM': {'svc__C': [0.1, 1, 10], 'svc__gamma': [0.1, 1, 10]},
    'KNN': {'kneighborsclassifier__n_neighbors': [3, 5, 7]},
    'MLP': {'mlpclassifier__hidden_layer_sizes': [(50,), (100,), (50, 50)]}
}

best_models = {}
for name, model in models.items():
    pipeline = make_pipeline(StandardScaler(), model)
    grid_search = GridSearchCV(pipeline, param_grid[name], cv=5, scoring='accuracy')
    grid_search.fit(X_train, y_train)
    best_models[name] = grid_search.best_estimator_
    print(f"Best {name} parameters: {grid_search.best_params_}")

# Evaluate best models on test set
for name, model in best_models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} accuracy on test set: {accuracy}")
    print(classification_report(y_test, y_pred))


Random Forest: Mean accuracy: 0.9642857142857142, Standard deviation: 0.04517539514526254
SVM: Mean accuracy: 0.97192118226601, Standard deviation: 0.014050207583203595
KNN: Mean accuracy: 0.9578817733990148, Standard deviation: 0.02627976092285545




MLP: Mean accuracy: 0.97192118226601, Standard deviation: 0.014050207583203595
Best Random Forest parameters: {'randomforestclassifier__n_estimators': 50}
Best SVM parameters: {'svc__C': 1, 'svc__gamma': 0.1}
Best KNN parameters: {'kneighborsclassifier__n_neighbors': 5}




Best MLP parameters: {'mlpclassifier__hidden_layer_sizes': (100,)}
Random Forest accuracy on test set: 1.0
              precision    recall  f1-score   support

           1       1.00      1.00      1.00        14
           2       1.00      1.00      1.00        14
           3       1.00      1.00      1.00         8

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36

SVM accuracy on test set: 1.0
              precision    recall  f1-score   support

           1       1.00      1.00      1.00        14
           2       1.00      1.00      1.00        14
           3       1.00      1.00      1.00         8

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36

KNN accuracy on test set: 0.9444444444444444
              precision    recall  f1-score   support

 

