# Predicting emissions : regression model

## Simple CNN from the original images

In [1]:
import pandas as pd

emission_data = pd.read_csv("../datasets/car_models_footprint.csv", sep=";")

path_annotrain = "../datasets/datasets_train/train_annotation/_annotation.csv"

train_annotation = pd.read_csv(path_annotrain, index_col=0)
train_annotation = train_annotation[train_annotation["models"]!=" "]

df = pd.merge(train_annotation, emission_data, "inner", on="models")
df.head()

Unnamed: 0,im_name,x_min,y_min,x_max,y_max,class,models,Brand,year,Average Urban Consumption,Average extra-urban consumption,Average mixed consumption,Average of CO2 (g per km),Average CO type I
0,107347968.jpg,40.0,244.0,1144.0,637.0,car,Audi S5 Convertible 2012,Audi,2012,7.90636,5.3053,6.262544,153.533569,298187279
1,109641728.jpg,278.0,284.0,917.0,539.0,car,Audi S5 Convertible 2012,Audi,2012,7.90636,5.3053,6.262544,153.533569,298187279
2,111214592.jpg,29.0,133.0,350.0,263.0,car,Audi S5 Convertible 2012,Audi,2012,7.90636,5.3053,6.262544,153.533569,298187279
3,110821376.jpg,34.0,71.0,245.0,160.0,car,Audi S5 Convertible 2012,Audi,2012,7.90636,5.3053,6.262544,153.533569,298187279
4,109379584.jpg,99.0,147.0,712.0,516.0,car,Audi S5 Convertible 2012,Audi,2012,7.90636,5.3053,6.262544,153.533569,298187279


In [2]:
from sklearn.metrics import mean_squared_error

print("Regression baseline :", mean_squared_error([df["Average of CO2 (g per km)"].mean() for _ in range(df.shape[0])], 
                                                  df["Average of CO2 (g per km)"]))

Regression baseline : 5269.078827578003


In [3]:
import time
import torch
import torchvision
from torchvision.transforms import *
import torch.nn.functional as F
from PIL import Image
from sklearn.model_selection import train_test_split

df_train, df_test = train_test_split(df, test_size=.2, shuffle=True, random_state=13)

batch_size=64

class MyTrainDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, index):
        row = self.dataframe.iloc[index]
        c = torchvision.transforms.Compose([Grayscale(),
                                            RandomHorizontalFlip(),
                                            RandomPerspective(),
                                            RandomRotation(degrees=(0, 180)),
                                            RandomVerticalFlip(),
                                            Resize((96,96)),
                                            functional.to_tensor])
        return (
            c(Image.open("../datasets/datasets_train/train/"+row["im_name"])),
            row["Average of CO2 (g per km)"],
        )
    
class MyTestDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, index):
        row = self.dataframe.iloc[index]
        c = torchvision.transforms.Compose([Grayscale(),
                                            Resize((96,96)),
                                            functional.to_tensor])
        return (
            c(Image.open("../datasets/datasets_train/train/"+row["im_name"])),
            row["Average of CO2 (g per km)"],
        )
    
train_set = MyTrainDataset(df_train)
val_set = MyTestDataset(df_test)

#train_size = int(0.8 * len(dataset))
#test_size = len(dataset) - train_size

#train_set, val_set = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = torch.utils.data.DataLoader(dataset=train_set, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(dataset=val_set, batch_size=batch_size)

class ConvNet(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = torch.nn.Conv2d(1, 3, 3, padding='same')
        self.pool = torch.nn.MaxPool2d(2, 2)
        self.conv2 = torch.nn.Conv2d(3, 6, 3, padding='same')
        self.conv3 = torch.nn.Conv2d(6, 12, 3, padding='same')
        self.fc = torch.nn.Linear(1728, 1)
        self.dropout = torch.nn.Dropout()


    def forward(self, x):
        x = x.view(-1,1,96,96)
        x = self.dropout(self.pool(F.relu(self.conv1(x))))
        x = self.dropout(self.pool(F.relu(self.conv2(x))))
        x = self.dropout(self.pool(F.relu(self.conv3(x))))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = self.fc(x)
        return x

my_model = ConvNet()

optimizer = torch.optim.Adam(my_model.parameters(), lr=3e-4)
# optimizer = torch.optim.SGD(model.parameters(), lr=0.5e-1)

criterion = torch.nn.MSELoss()

loss_l_train = []
loss_l_val = []
nb_epoch = 200

for num_epoch in range(nb_epoch):
    t0 = time.time()
    my_model.train()
    running_loss = 0
    total = 0
    for i, (inputs, labels) in enumerate(train_loader):
        outputs = my_model(inputs) # Forward pass: Compute predicted y by passing  x to the model          
        labels = labels.unsqueeze(1).float()
        loss = criterion(outputs,labels) # Compute loss 

        optimizer.zero_grad() # re-init the gradients (otherwise they are cumulated)
        loss.backward() # perform back-propagation
        optimizer.step() # update the weights

        running_loss += loss.item()
        
        total += 1

    loss_l_train.append(running_loss/total)

    my_model.eval()
    running_loss_val = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = my_model(inputs)  
            labels = labels.unsqueeze(1).float()
            loss = criterion(outputs,labels)

            running_loss_val += loss.item()

            total += 1

    loss_l_val.append(running_loss_val/total)

    if loss_l_val[-1] == min(loss_l_val):
        print("Saving best model")
        torch.save(my_model, 'models/best-reg.pt')
        torch.save(my_model.state_dict(), 'models/best-reg-parameters.pt')

    print(f'epoch {num_epoch} : train loss {loss_l_train[-1]:.5f}, val loss {loss_l_val[-1]:.5f}, time {time.time()-t0:.2f}s')

Saving best model
epoch 0 : train loss 44848.45014, val loss 44947.30469, time 18.68s
Saving best model
epoch 1 : train loss 44086.28539, val loss 44483.06641, time 18.23s
Saving best model
epoch 2 : train loss 42556.83824, val loss 43545.20625, time 20.06s
Saving best model
epoch 3 : train loss 39505.13339, val loss 41279.12969, time 19.33s
Saving best model
epoch 4 : train loss 33742.31756, val loss 36202.32109, time 18.16s
Saving best model
epoch 5 : train loss 24270.49460, val loss 26928.23516, time 40.03s
Saving best model
epoch 6 : train loss 12923.85963, val loss 14881.44707, time 17.95s
Saving best model
epoch 7 : train loss 6401.35706, val loss 8146.12285, time 18.23s
epoch 8 : train loss 6000.43868, val loss 8467.27656, time 20.21s
epoch 9 : train loss 5750.09901, val loss 9516.07725, time 19.20s
epoch 10 : train loss 5706.91075, val loss 9320.03926, time 32.64s
epoch 11 : train loss 5615.34573, val loss 9246.77861, time 25.86s
epoch 12 : train loss 5769.27318, val loss 9408.

KeyboardInterrupt: 