Classifying fruits using both color histograms and metadata as input features to a Naive Bayes model.

In [10]:
# 📦 Imports
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# 📁 Load data - only histogram files now
hist_train = pd.read_csv('/content/colour_Histogram_Training.csv')
hist_test = pd.read_csv('/content/colour_Histogram_Testing.csv')

# 🎯 Separate features and target (no merging with metadata needed)
X_train = hist_train.drop(columns=['filename', 'class'])  # Keep only histogram features
y_train = hist_train['class']
X_test = hist_test.drop(columns=['filename', 'class'])
y_test = hist_test['class']

# 🧠 Encode target labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# ⚙️ Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 📊 Naive Bayes model
model = GaussianNB()
model.fit(X_train_scaled, y_train_encoded)

# 🔍 Predictions
y_pred = model.predict(X_test_scaled)

# 📈 Evaluation
print("Accuracy:", accuracy_score(y_test_encoded, y_pred))
print("\nClassification Report:\n", classification_report(y_test_encoded, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test_encoded, y_pred))

Accuracy: 0.8542275286845337

Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.97      0.93       157
           1       0.35      0.58      0.44       164
           2       0.54      0.41      0.46       148
           3       0.92      0.87      0.89       160
           4       0.91      0.99      0.95       164
           5       0.63      0.83      0.72       161
           6       0.84      1.00      0.91       164
           7       0.43      0.66      0.52       152
           8       0.67      0.87      0.76       164
           9       0.66      0.77      0.71       164
          10       0.43      0.30      0.35       144
          11       1.00      1.00      1.00       166
          12       0.42      0.42      0.42       164
          13       1.00      1.00      1.00       219
          14       0.91      0.98      0.94       234
          15       1.00      0.88      0.94       164
          16       0.52    