In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Task 1: Read Dataset from UCI machine learning repository
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/poker/poker-hand-training-true.data"
column_names = ["S1", "C1", "S2", "C2", "S3", "C3", "S4", "C4", "S5", "C5", "Hand"]
data = pd.read_csv(url, names=column_names)

# Task 2: Data Preprocessing
X = data.drop("Hand", axis=1)
y = data["Hand"]

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Task 3: Apply minimum 3 classification models
# Model 1: Random Forest Classifier
rf_clf = RandomForestClassifier(random_state=42)
rf_clf.fit(X_train_scaled, y_train)
rf_predictions = rf_clf.predict(X_test_scaled)

# Model 2: Gradient Boosting Classifier
gb_clf = GradientBoostingClassifier(random_state=42)
gb_clf.fit(X_train_scaled, y_train)
gb_predictions = gb_clf.predict(X_test_scaled)

# Model 3: Support Vector Classifier (SVC)
svc_clf = SVC(random_state=42)
svc_clf.fit(X_train_scaled, y_train)
svc_predictions = svc_clf.predict(X_test_scaled)

# Task 4: Apply Hyperparameter Tuning
# Hyperparameter tuning for Random Forest Classifier
rf_param_grid = {'n_estimators': [50, 100, 200],
                 'max_depth': [None, 10, 20],
                 'min_samples_split': [2, 5, 10]}
rf_grid_search = GridSearchCV(rf_clf, rf_param_grid, cv=5)
rf_grid_search.fit(X_train_scaled, y_train)
rf_tuned_predictions = rf_grid_search.predict(X_test_scaled)

# Task 5: Compare the results
print("Random Forest Classifier:")
print("Accuracy:", accuracy_score(y_test, rf_predictions))
print("Classification Report:\n", classification_report(y_test, rf_predictions))

print("\nGradient Boosting Classifier:")
print("Accuracy:", accuracy_score(y_test, gb_predictions))
print("Classification Report:\n", classification_report(y_test, gb_predictions))

print("\nSupport Vector Classifier:")
print("Accuracy:", accuracy_score(y_test, svc_predictions))
print("Classification Report:\n", classification_report(y_test, svc_predictions))

print("\nRandom Forest Classifier (after Hyperparameter Tuning):")
print("Accuracy:", accuracy_score(y_test, rf_tuned_predictions))
print("Classification Report:\n", classification_report(y_test, rf_tuned_predictions))


Random Forest Classifier:
Accuracy: 0.6117552978808477
Classification Report:
               precision    recall  f1-score   support

           0       0.64      0.80      0.71      2552
           1       0.56      0.50      0.53      2075
           2       0.33      0.00      0.01       230
           3       0.00      0.00      0.00       101
           4       0.00      0.00      0.00        22
           5       0.00      0.00      0.00         9
           6       0.00      0.00      0.00        12
           7       0.00      0.00      0.00         1

    accuracy                           0.61      5002
   macro avg       0.19      0.16      0.16      5002
weighted avg       0.58      0.61      0.58      5002


Gradient Boosting Classifier:
Accuracy: 0.6167532986805278
Classification Report:
               precision    recall  f1-score   support

           0       0.63      0.84      0.72      2552
           1       0.58      0.45      0.51      2075
           2       0.00

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
