# Logistic regression model

The following notebook describes the creation of a logistic regression model for the classification of german traffic signs, using scikit-learn. 

In [4]:
# Load libraries
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from PIL import Image
import glob
import inspect
import numpy as np
import os
import pickle


In [5]:
# Location of the train and test images
current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
ruta_train = os.sep.join([current_dir,'..','images','train'])
ruta_test = os.sep.join([current_dir,'..','images','test'])


In [6]:
# Load train and test images

train_images = []
test_images = []

y_train = []
y_test = []

for filename in glob.iglob(ruta_train + os.sep + '*.ppm'):
    label = int(filename.split(os.sep)[-1].split('_')[0])
    y_train.append(label)
    train_images.append(Image.open(filename))

for filename in glob.iglob(ruta_test + os.sep + '*.ppm'):
    label = int(filename.split(os.sep)[-1].split('_')[0])
    y_test.append(label)
    test_images.append(Image.open(filename))
    

In [7]:
# Helper functions

# Resizes an image to a defined size, and then reshapes into a 1xn array
def process_img(img,size):
    out = img.resize(size, Image.ANTIALIAS)
    out = np.asarray(out)
    out = out.reshape(1, -1)
    return out

# Applies the process_img function to the train and test sets of images, and return train and test arrays
def formar_set(images,im_size):
    X = [process_img(img, im_size) for img in images]
    X = np.vstack(X)
    return X


In [8]:
# Definition of the model's hyperparameters (solver and image size), and initialization of the expected outcomes

solvers = ['newton-cg', 'lbfgs', 'liblinear']
sizes = [(17,17),(25,25),(35,35),(45,45),(55,55)]

best_solver = None
best_size = (0,0)
best_accuracy = 0
best_model = None


In [10]:
# Loop through parameters to find the best model

for solver in solvers:
    for size in sizes:
        X_train = formar_set(train_images,size)
        X_test = formar_set(test_images,size)
        
        lr = LogisticRegression(solver=solver)
        lr.fit(X_train,y_train)
        
        preds = lr.predict(X_test)
        accuracy = accuracy_score(preds,y_test)
        
        print('------------------------')
        print('Solver: {0}'.format(solver))
        print('Size: {0}'.format(size))
        print('Accuracy: {0}'.format(accuracy))
        
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_solver = solver
            best_size = size
            best_model = lr

------------------------
Solver: newton-cg
Size: (17, 17)
Accuracy: 0.8532818532818532
------------------------
Solver: newton-cg
Size: (25, 25)
Accuracy: 0.8687258687258688
------------------------
Solver: newton-cg
Size: (35, 35)
Accuracy: 0.8648648648648649
------------------------
Solver: newton-cg
Size: (45, 45)
Accuracy: 0.861003861003861
------------------------
Solver: newton-cg
Size: (55, 55)
Accuracy: 0.861003861003861
------------------------
Solver: lbfgs
Size: (17, 17)
Accuracy: 0.8494208494208494
------------------------
Solver: lbfgs
Size: (25, 25)
Accuracy: 0.8648648648648649
------------------------
Solver: lbfgs
Size: (35, 35)
Accuracy: 0.8764478764478765
------------------------
Solver: lbfgs
Size: (45, 45)
Accuracy: 0.8648648648648649
------------------------
Solver: lbfgs
Size: (55, 55)
Accuracy: 0.8648648648648649
------------------------
Solver: liblinear
Size: (17, 17)
Accuracy: 0.861003861003861
------------------------
Solver: liblinear
Size: (25, 25)
Accuracy

In [11]:
# Print the best parameters found

print(best_solver)
print(best_size)
print(best_accuracy)

# liblinear
# (35, 35)
# 0.8803088803088803

liblinear
(35, 35)
0.8803088803088803


In [12]:
# Save the best model

model_location = os.sep.join([current_dir,'..','models','model1','model1.pkl'])

with open(model_location, 'wb') as fid:
    pickle.dump(best_model, fid)  


In [14]:
# Load the model and predict
X_test = formar_set(test_images,best_size)
x = X_test[258]
x = x.reshape(1,-1)
model_location = os.sep.join([current_dir,'..','models','model1','model1.pkl'])

with open(model_location, 'rb') as fp:
     model = pickle.load(fp)

print(model.predict(x))
print(y_test[258])
    

[42]
42
