In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler

# Loading the dataset
url = 'glass.csv'
data = pd.read_csv(url)

# Splitting the data into features and target variable
X = data.drop('Type', axis=1)
y = data['Type']

# Normalizing the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Defining the sampling strategies
oversample = SMOTE(k_neighbors=2)
undersample = RandomUnderSampler()

# Applying oversampling
X_over, y_over = oversample.fit_resample(X_train, y_train)

# Applying undersampling
X_under, y_under = undersample.fit_resample(X_train, y_train)

# Training and evaluating models
for sampling_type, X_sample, y_sample in [('Original', X_train, y_train), ('Oversampled', X_over, y_over), ('Undersampled', X_under, y_under)]:
    print(f"======= {sampling_type} Data =======")
    # Initialize the classifiers
    rf = RandomForestClassifier(random_state=42)
    svm = SVC(random_state=42)
    knn = KNeighborsClassifier()

    # Training the models
    rf.fit(X_sample, y_sample)
    svm.fit(X_sample, y_sample)
    knn.fit(X_sample, y_sample)

    # Models to evaluate
    models = {'Random Forest': rf, 'SVM': svm, 'KNN': knn}

    # Making predictions and evaluating the models
    for name, model in models.items():
        print(f"Results for {name} on {sampling_type} data:")
        predictions = model.predict(X_test)
        print(classification_report(y_test, predictions))
        print(confusion_matrix(y_test, predictions))
        print("\n")

Results for Random Forest on Original data:
              precision    recall  f1-score   support

           1       0.74      0.89      0.81        19
           2       0.75      0.65      0.70        23
           3       0.67      0.50      0.57         4
           5       0.75      0.50      0.60         6
           6       0.75      1.00      0.86         3
           7       0.91      1.00      0.95        10

    accuracy                           0.77        65
   macro avg       0.76      0.76      0.75        65
weighted avg       0.77      0.77      0.76        65

[[17  2  0  0  0  0]
 [ 4 15  1  1  1  1]
 [ 2  0  2  0  0  0]
 [ 0  3  0  3  0  0]
 [ 0  0  0  0  3  0]
 [ 0  0  0  0  0 10]]


Results for SVM on Original data:
              precision    recall  f1-score   support

           1       0.67      0.84      0.74        19
           2       0.64      0.70      0.67        23
           3       0.00      0.00      0.00         4
           5       1.00      0.50

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           1       0.57      0.84      0.68        19
           2       0.59      0.57      0.58        23
           3       0.00      0.00      0.00         4
           5       0.67      0.33      0.44         6
           6       1.00      0.67      0.80         3
           7       0.90      0.90      0.90        10

    accuracy                           0.65        65
   macro avg       0.62      0.55      0.57        65
weighted avg       0.62      0.65      0.62        65

[[16  3  0  0  0  0]
 [ 9 13  0  1  0  0]
 [ 2  2  0  0  0  0]
 [ 1  3  0  2  0  0]
 [ 0  0  0  0  2  1]
 [ 0  1  0  0  0  9]]


Results for Random Forest on Oversampled data:
              precision    recall  f1-score   support

           1       0.78      0.95      0.86        19
           2       0.79      0.65      0.71        23
           3       0.50      0.50      0.50         4
           5       0.67      0.67      0.67         6
          