In [None]:
# Import Necessary Libraries
import pandas as pd
import xgboost as xgb
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from PIL import Image
import os
import pickle

Collecting xgboost
  Downloading xgboost-2.1.3-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-2.1.3-py3-none-win_amd64.whl (124.9 MB)
   ---------------------------------------- 0.0/124.9 MB ? eta -:--:--
   ---------------------------------------- 0.3/124.9 MB ? eta -:--:--
   ---------------------------------------- 1.0/124.9 MB 3.1 MB/s eta 0:00:40
    --------------------------------------- 1.6/124.9 MB 2.9 MB/s eta 0:00:43
    --------------------------------------- 2.1/124.9 MB 2.9 MB/s eta 0:00:43
    --------------------------------------- 2.9/124.9 MB 2.9 MB/s eta 0:00:42
   - -------------------------------------- 3.1/124.9 MB 2.9 MB/s eta 0:00:43
   - -------------------------------------- 3.7/124.9 MB 2.8 MB/s eta 0:00:44
   - -------------------------------------- 4.2/124.9 MB 2.8 MB/s eta 0:00:44
   - -------------------------------------- 4.7/124.9 MB 2.7 MB/s eta 0:00:45
   - -------------------------------------- 5.0/124.9 MB 2.7 MB/s eta 0:00:45
   - ---

In [3]:
# Load CSV Files
train_df = pd.read_csv('cardatasettrain.csv')
test_df = pd.read_csv('cardatasettest.csv')

# Clean DataFrames
train_df_clean = train_df.drop(columns=['Unnamed: 0'])
test_df_clean = test_df.drop(columns=['Unnamed: 0'])

In [5]:
# Define image loading function with consistent shape handling
def load_images(dataframe, folder_path, img_size=(64, 64)):
    images = []
    for img_name in dataframe['image']:
        img_path = os.path.join(folder_path, img_name)
        try:
            img = Image.open(img_path).resize(img_size).convert('RGB')  # Ensure RGB
            img_array = np.array(img).flatten()  # Flatten the image
            images.append(img_array)
        except Exception as e:
            print(f"Error loading image {img_name}: {e}")
            continue
    return np.array(images, dtype=np.float32)

# Load training and testing images
train_images = load_images(train_df_clean, "cars_train/cars_train")
test_images = load_images(test_df_clean, "cars_test/cars_test")

# Verify loaded image shapes
print(f"Train Images Shape: {train_images.shape}")
print(f"Test Images Shape: {test_images.shape}")

Train Images Shape: (8144, 12288)
Test Images Shape: (8041, 12288)


In [6]:
# Extract bounding box features
train_boxes = train_df_clean[['x1', 'y1', 'x2', 'y2']].values
test_boxes = test_df_clean[['x1', 'y1', 'x2', 'y2']].values

# Combine image features with bounding boxes
X_train_full = np.hstack((train_boxes, train_images))
X_test = np.hstack((test_boxes, test_images))

# Encode labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(train_df_clean['Class'].values)

# Split into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    X_train_full, y_train_encoded, test_size=0.2, random_state=42
)

In [7]:
# Initialize the XGBoost classifier
xgb_model = xgb.XGBClassifier(
    objective='multi:softmax', 
    eval_metric='mlogloss', 
    use_label_encoder=False, 
    num_class=len(label_encoder.classes_),
    max_depth=6,            # Increase tree depth
    n_estimators=500,       # Number of boosting rounds
    learning_rate=0.1,      # Learning rate
    random_state=42
)

# Train the model
xgb_model.fit(X_train, y_train)

Parameters: { "use_label_encoder" } are not used.



In [None]:
from sklearn.metrics import accuracy_score, f1_score, precision_score

# Predict on training set
y_train_pred = xgb_model.predict(X_train)

# Calculate training accuracy
train_accuracy = accuracy_score(y_train, y_train_pred)
print(f'Training Accuracy: {train_accuracy * 100:.2f}%')

# Predict on validation set
y_val_pred = xgb_model.predict(X_val)

# Calculate validation accuracy
val_accuracy = accuracy_score(y_val, y_val_pred)
print(f'Validation Accuracy: {val_accuracy * 100:.2f}%')

# Calculate F1 score and precision
val_f1 = f1_score(y_val, y_val_pred, average='weighted')
val_precision = precision_score(y_val, y_val_pred, average='weighted')

print(f'Validation F1 Score: {val_f1:.2f}')
print(f'Validation Precision: {val_precision:.2f}')

# Inspect predictions and true labels
print("Predictions (first 10):", y_val_pred[:10])
print("True Labels (first 10):", y_val[:10])

Validation Accuracy: 4.05%
Predictions (first 10): [163  98 183 144 133 125 122  72  26   4]
True Labels (first 10): [171 125   9 104  19  33 160 125  37   1]


In [9]:
# Save the trained model
model_filename = "xgboost_car_model.pkl"
with open(model_filename, 'wb') as file:
    pickle.dump(xgb_model, file)

print(f"Model saved as {model_filename}")

Model saved as xgboost_car_model.pkl
