In [19]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import RobustScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

In [10]:
df = pd.read_csv('basic_descriptor.csv')

In [11]:
df.columns

Index(['area', 'length', 'width', 'aspect_ratio', 'major_axis_length',
       'minor_axis_length', 'convex_hull_area', 'convex_hull_perimeter',
       'mean_r', 'mean_g', 'mean_b', 'red_sqr', 'green_sqr', 'blue_sqr',
       'texture_mean', 'texture_std', 'texture_uniformity',
       'texture_third_moment', 'category', 'label'],
      dtype='object')

In [13]:
X = df.drop(['category', 'label'], axis=1)
y = df['label']

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = RobustScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [21]:
# Initialize models
models = {
    'KNN': KNeighborsClassifier(n_neighbors=5),
    'SVM': SVC(kernel='rbf', random_state=42),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42)
}

results = {}

In [22]:
for name, model in models.items():
    print(f"\n{'-'*50}")
    print(f"{name} Results:")
    
    # Train the model
    model.fit(X_train_scaled, y_train)
    
    # Make predictions
    y_pred = model.predict(X_test_scaled)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    results[name] = accuracy
    
    # Perform cross-validation
    cv_scores = cross_val_score(model, X_train_scaled, y_train, cv=5)
    
    print(f"Test Accuracy: {accuracy:.4f}")
    print(f"Cross-validation Accuracy: {cv_scores.mean():.4f} (+/- {cv_scores.std()*2:.4f})")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))


--------------------------------------------------
KNN Results:
Test Accuracy: 0.8062
Cross-validation Accuracy: 0.7959 (+/- 0.0157)

Classification Report:
              precision    recall  f1-score   support

           0       0.67      0.74      0.70       388
           1       0.82      0.79      0.80       396
           2       0.96      0.96      0.96       293
           3       0.77      0.82      0.80       297
           4       0.94      0.85      0.89       211
           5       0.79      0.76      0.77       433

    accuracy                           0.81      2018
   macro avg       0.82      0.82      0.82      2018
weighted avg       0.81      0.81      0.81      2018


--------------------------------------------------
SVM Results:
Test Accuracy: 0.8627
Cross-validation Accuracy: 0.8629 (+/- 0.0122)

Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.84      0.82       388
           1       0.87      0.8