In [5]:
import numpy as np
from PIL import Image
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler
import os
from joblib import dump
from sklearn import svm

In [6]:
# Define the folder list
folder_list = ['red', 'green', 'yellow', 'not_traffic_light']

# Initialize lists to store the images and labels
images = []
labels = []

# Load images from each folder in folder_list
for folder in folder_list:
    folder_path = "../input/" + folder  # Adjust the path as needed
    files = os.listdir(folder_path)
    for file in files:
        image = Image.open(os.path.join(folder_path, file))
        image = image.resize((20, 34))  # Resize the image if needed
        image = np.array(image).flatten()  # Flatten the image
        images.append(image)
        labels.append(folder)

# Convert lists to numpy arrays
images = np.array(images)
labels = np.array(labels)

images = images.astype(np.float64)/255
print(images.shape)


# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

# Create and train the softmax regression model
model = svm.SVC(kernel='linear', probability=True, gamma='auto')
model.fit(X_train, y_train)


# Make predictions on the test set
y_pred = model.predict(X_test)
print(y_pred)

dump(model, 'clf.joblib')
# Calculate F1 score
f1 = f1_score(y_test, y_pred, average='weighted')
print('F1 score:', f1)

# # Calculate confusion matrix
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=folder_list))


(7684, 2040)
['red' 'not_traffic_light' 'yellow' ... 'yellow' 'red' 'not_traffic_light']
F1 score: 0.9876004770754688
Confusion Matrix:
[[561   0   2   0]
 [  3 214   6   1]
 [  1   4 631   0]
 [  0   2   0 112]]
                   precision    recall  f1-score   support

              red       0.99      1.00      0.99       563
            green       0.97      0.96      0.96       224
           yellow       0.99      0.99      0.99       636
not_traffic_light       0.99      0.98      0.99       114

         accuracy                           0.99      1537
        macro avg       0.99      0.98      0.98      1537
     weighted avg       0.99      0.99      0.99      1537



In [7]:
y_pred_prob = model.predict_proba(X_test)
max_confidence_scores = np.max(y_pred_prob, axis=1)
print(max_confidence_scores)

[0.99501715 0.99991051 0.76044262 ... 0.98873269 0.99999874 0.75932214]
