# Predicting emissions : classification model

## Simple CNN from the original images

In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

emission_data = pd.read_csv("../datasets/car_models_footprint.csv", sep=";")

path_annotrain = "../datasets/datasets_train/train_annotation/_annotation.csv"

train_annotation = pd.read_csv(path_annotrain, index_col=0)
train_annotation = train_annotation[train_annotation["models"]!=" "]

le = LabelEncoder()
train_annotation["model_e"] = le.fit_transform(train_annotation["models"])

n_classes = train_annotation["model_e"].max()+1

df = pd.merge(train_annotation, emission_data, "inner", on="models")
df.head()

Unnamed: 0,im_name,x_min,y_min,x_max,y_max,class,models,model_e,Brand,year,Average Urban Consumption,Average extra-urban consumption,Average mixed consumption,Average of CO2 (g per km),Average CO type I
0,107347968.jpg,40.0,244.0,1144.0,637.0,car,Audi S5 Convertible 2012,11,Audi,2012,7.90636,5.3053,6.262544,153.533569,298187279
1,109641728.jpg,278.0,284.0,917.0,539.0,car,Audi S5 Convertible 2012,11,Audi,2012,7.90636,5.3053,6.262544,153.533569,298187279
2,111214592.jpg,29.0,133.0,350.0,263.0,car,Audi S5 Convertible 2012,11,Audi,2012,7.90636,5.3053,6.262544,153.533569,298187279
3,110821376.jpg,34.0,71.0,245.0,160.0,car,Audi S5 Convertible 2012,11,Audi,2012,7.90636,5.3053,6.262544,153.533569,298187279
4,109379584.jpg,99.0,147.0,712.0,516.0,car,Audi S5 Convertible 2012,11,Audi,2012,7.90636,5.3053,6.262544,153.533569,298187279


In [2]:
from sklearn.metrics import accuracy_score

print("Classification baseline :", accuracy_score([df["model_e"].mode() for _ in range(df.shape[0])], 
                                                  df["model_e"]))

Classification baseline : 0.019098548510313215


In [5]:
import time
import torch
import torchvision
from torchvision.transforms import *
import torch.nn.functional as F
from PIL import Image
from sklearn.model_selection import train_test_split

df_train, df_test = train_test_split(df, test_size=.2, shuffle=True, random_state=13)

batch_size=64

class MyDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, index):
        row = self.dataframe.iloc[index]
        c = torchvision.transforms.Compose([Grayscale(),
                                            RandomHorizontalFlip(),
                                            RandomPerspective(),
                                            RandomRotation(degrees=(0, 180)),
                                            RandomVerticalFlip(),
                                            Resize((96,96)),
                                            functional.to_tensor])
        return (
            c(Image.open("../datasets/datasets_train/train/"+row["im_name"])),
            row["model_e"],
        )

train_set = MyDataset(df_train)
val_set = MyDataset(df_test)

#train_size = int(0.8 * len(dataset))
#test_size = len(dataset) - train_size

#train_set, val_set = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = torch.utils.data.DataLoader(dataset=train_set, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(dataset=val_set, batch_size=batch_size)

class ConvNet(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = torch.nn.Conv2d(1, 3, 3, padding='same')
        self.pool = torch.nn.MaxPool2d(2, 2)
        self.conv2 = torch.nn.Conv2d(3, 6, 3, padding='same')
        self.conv3 = torch.nn.Conv2d(6, 12, 3, padding='same')
        self.fc = torch.nn.Linear(1728, n_classes)
        self.dropout = torch.nn.Dropout()


    def forward(self, x):
        x = x.view(-1,1,96,96)
        x = self.dropout(self.pool(F.relu(self.conv1(x))))
        x = self.dropout(self.pool(F.relu(self.conv2(x))))
        x = self.dropout(self.pool(F.relu(self.conv3(x))))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = self.fc(x)
        return x

my_model = ConvNet()

optimizer = torch.optim.Adam(my_model.parameters(), lr=3e-4)
# optimizer = torch.optim.SGD(model.parameters(), lr=0.5e-1)

criterion = torch.nn.CrossEntropyLoss()

loss_l_train = []
loss_l_val = []
nb_epoch = 200

loss_l_train = []
loss_l_val = []
acc_train = []
acc_val = []
for num_epoch in range(nb_epoch):
    t0 = time.time()
    my_model.train()
    running_loss = 0
    total = 0
    correct = 0
    for i, (inputs, labels) in enumerate(train_loader):
        outputs = my_model(inputs) # Forward pass: Compute predicted y by passing  x to the model          
        labels = labels
        loss = criterion(outputs,labels) # Compute loss 

        optimizer.zero_grad() # re-init the gradients (otherwise they are cumulated)
        loss.backward() # perform back-propagation
        optimizer.step() # update the weights

        running_loss += loss.item()
        
        y_pred_softmax = torch.log_softmax(outputs, dim = 1)
        _, predicted = torch.max(y_pred_softmax, dim = 1) 
        total += labels.size(0)
        correct += (predicted == labels).sum()

    loss_l_train.append(running_loss/(i+1))
    acc_train.append(correct*100/total)

    my_model.eval()
    running_loss_val = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for i, (inputs, labels) in enumerate(test_loader):
            outputs = my_model(inputs)  
            labels = labels
            loss = criterion(outputs,labels)

            running_loss_val += loss.item()
            
            y_pred_softmax = torch.log_softmax(outputs, dim = 1)
            _, predicted = torch.max(y_pred_softmax, dim = 1) 
            total += labels.size(0)
            correct += (predicted == labels).sum()

    acc_val.append(correct*100/total)
    loss_l_val.append(running_loss_val/(i+1))

    if acc_val[-1] == max(acc_val):
        print("Saving best model")
        torch.save(my_model, 'models/best-classifier.pt')
        torch.save(my_model.state_dict(), 'models/best-classifier-parameters.pt')

    print(f'epoch {num_epoch} : train loss {loss_l_train[-1]:.5f}, train accuracy {acc_train[-1]:.2f}, val loss {loss_l_val[-1]:.5f}, val accuracy {acc_val[-1]:.2f}, time {time.time()-t0:.2f}s')

Saving best model
epoch 0 : train loss 4.62913, train accuracy 0.76, val loss 4.60622, val accuracy 0.38, time 29.36s
Saving best model
epoch 1 : train loss 4.56664, train accuracy 2.01, val loss 4.61004, val accuracy 1.91, time 20.38s
Saving best model
epoch 2 : train loss 4.55984, train accuracy 1.81, val loss 4.61245, val accuracy 1.91, time 31.03s
epoch 3 : train loss 4.55815, train accuracy 1.72, val loss 4.61203, val accuracy 1.15, time 20.43s
epoch 4 : train loss 4.56268, train accuracy 1.53, val loss 4.61397, val accuracy 0.38, time 24.23s
Saving best model
epoch 5 : train loss 4.56081, train accuracy 1.34, val loss 4.61237, val accuracy 2.29, time 29.27s
epoch 6 : train loss 4.55588, train accuracy 1.34, val loss 4.61262, val accuracy 1.91, time 25.21s
epoch 7 : train loss 4.55413, train accuracy 1.43, val loss 4.61338, val accuracy 1.91, time 25.74s
epoch 8 : train loss 4.55381, train accuracy 1.05, val loss 4.61193, val accuracy 1.53, time 20.11s
epoch 9 : train loss 4.55460