# Predicting emissions

## Simple CNN from the original images

In [3]:
path_annotrain = "../datasets/datasets_train/train_annotation/_annotation.csv"

train_annotation = pd.read_csv(path_annotrain, index_col=0)
train_annotation = train_annotation[train_annotation["models"]!=" "]

df = pd.merge(train_annotation, emission_data, "inner", on="models")
df.head()

Unnamed: 0,im_name,x_min,y_min,x_max,y_max,class,models,Brand,year,Average Urban Consumption,Average extra-urban consumption,Average mixed consumption,Average of CO2 (g per km),Average CO type I,category
0,107347968.jpg,40.0,244.0,1144.0,637.0,car,Audi S5 Convertible 2012,Audi,2012,7.90636,5.3053,6.262544,153.533569,298187279,Convertible
1,109641728.jpg,278.0,284.0,917.0,539.0,car,Audi S5 Convertible 2012,Audi,2012,7.90636,5.3053,6.262544,153.533569,298187279,Convertible
2,111214592.jpg,29.0,133.0,350.0,263.0,car,Audi S5 Convertible 2012,Audi,2012,7.90636,5.3053,6.262544,153.533569,298187279,Convertible
3,110821376.jpg,34.0,71.0,245.0,160.0,car,Audi S5 Convertible 2012,Audi,2012,7.90636,5.3053,6.262544,153.533569,298187279,Convertible
4,109379584.jpg,99.0,147.0,712.0,516.0,car,Audi S5 Convertible 2012,Audi,2012,7.90636,5.3053,6.262544,153.533569,298187279,Convertible


In [46]:
from sklearn.metrics import mean_squared_error

print("Regression baseline :", mean_squared_error([df["Average of CO2 (g per km)"].mean() for _ in range(df.shape[0])], 
                                                  df["Average of CO2 (g per km)"])/df.shape[0])

Regression baseline : 4.025270303726511


In [53]:
import time
import torch
import torchvision
from torchvision.transforms import *
import torch.nn.functional as F
from PIL import Image

batch_size=64

class MyDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, index):
        row = self.dataframe.iloc[index]
        c = torchvision.transforms.Compose([Grayscale(),
                                            RandomHorizontalFlip(),
                                            RandomPerspective(),
                                            RandomRotation(degrees=(0, 180)),
                                            RandomVerticalFlip(),
                                            Resize((96,96)),
                                            functional.to_tensor])
        return (
            c(Image.open("../datasets/datasets_train/train/"+row["im_name"])),
            row["Average of CO2 (g per km)"],
        )

dataset = MyDataset(df)

train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size

train_set, val_set = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = torch.utils.data.DataLoader(dataset=train_set, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(dataset=val_set, batch_size=batch_size)

class ConvNet(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = torch.nn.Conv2d(1, 3, 3, padding='same')
        self.pool = torch.nn.MaxPool2d(2, 2)
        self.conv2 = torch.nn.Conv2d(3, 6, 3, padding='same')
        self.conv3 = torch.nn.Conv2d(6, 12, 3, padding='same')
        self.fc = torch.nn.Linear(1728, 1)
        self.dropout = torch.nn.Dropout()


    def forward(self, x):
        x = x.view(-1,1,96,96)
        x = self.dropout(self.pool(F.relu(self.conv1(x))))
        x = self.dropout(self.pool(F.relu(self.conv2(x))))
        x = self.dropout(self.pool(F.relu(self.conv3(x))))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = self.fc(x)
        return x

my_model = ConvNet()

optimizer = torch.optim.Adam(my_model.parameters(), lr=3e-4)
# optimizer = torch.optim.SGD(model.parameters(), lr=0.5e-1)

criterion = torch.nn.MSELoss()

loss_l_train = []
loss_l_val = []
nb_epoch = 200

for num_epoch in range(nb_epoch):
    t0 = time.time()
    my_model.train()
    running_loss = 0
    total = 0
    for i, (inputs, labels) in enumerate(train_loader):
        outputs = my_model(inputs) # Forward pass: Compute predicted y by passing  x to the model          
        labels = labels.unsqueeze(1).float()
        loss = criterion(outputs,labels) # Compute loss 

        optimizer.zero_grad() # re-init the gradients (otherwise they are cumulated)
        loss.backward() # perform back-propagation
        optimizer.step() # update the weights

        running_loss += loss.item()
        
        total += labels.size(0)

    loss_l_train.append(running_loss/total)

    my_model.eval()
    running_loss_val = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = my_model(inputs)  
            labels = labels.unsqueeze(1).float()
            loss = criterion(outputs,labels)

            running_loss_val += loss.item()

            total += labels.size(0)

    loss_l_val.append(running_loss_val/total)

    if loss_l_val[-1] == min(loss_l_val):
        print("Saving best model")
        torch.save(my_model, 'models/best-reg.pt')
        torch.save(my_model.state_dict(), 'models/best-reg-parameters.pt')

    print(f'epoch {num_epoch} : train loss {loss_l_train[-1]:.5f}, val loss {loss_l_val[-1]:.5f}, time {time.time()-t0:.2f}s')

Saving best model
epoch 0 : train loss 725.86420, val loss 897.27581, time 20.14s
Saving best model
epoch 1 : train loss 706.61034, val loss 885.22564, time 18.22s
Saving best model
epoch 2 : train loss 666.85862, val loss 853.82938, time 18.40s
Saving best model
epoch 3 : train loss 590.55086, val loss 783.64893, time 17.76s
Saving best model
epoch 4 : train loss 462.48929, val loss 655.61792, time 17.96s
Saving best model
epoch 5 : train loss 290.37592, val loss 457.51686, time 18.36s
Saving best model
epoch 6 : train loss 140.57691, val loss 273.13351, time 18.11s
Saving best model
epoch 7 : train loss 109.15769, val loss 225.03036, time 18.03s
epoch 8 : train loss 111.39893, val loss 251.97196, time 18.61s
epoch 9 : train loss 105.73188, val loss 253.94425, time 17.64s
epoch 10 : train loss 104.47991, val loss 245.86983, time 18.05s
epoch 11 : train loss 100.95169, val loss 242.87257, time 18.36s
epoch 12 : train loss 102.26520, val loss 238.15772, time 19.25s
epoch 13 : train loss

In [None]:
import time
import torch
import torchvision
from torchvision.transforms import *
import torch.nn.functional as F
from PIL import Image

batch_size=64

class MyDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, index):
        row = self.dataframe.iloc[index]
        c = torchvision.transforms.Compose([Grayscale(),
                                            RandomHorizontalFlip(),
                                            RandomPerspective(),
                                            RandomRotation(degrees=(0, 180)),
                                            RandomVerticalFlip(),
                                            Resize((96,96)),
                                            functional.to_tensor])
        return (
            c(Image.open("../datasets/datasets_train/train/"+row["im_name"])),
            row["Average of CO2 (g per km)"],
        )

dataset = MyDataset(df)

train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size

train_set, val_set = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = torch.utils.data.DataLoader(dataset=train_set, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(dataset=val_set, batch_size=batch_size)

class ConvNet(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = torch.nn.Conv2d(1, 3, 3, padding='same')
        self.pool = torch.nn.MaxPool2d(2, 2)
        self.conv2 = torch.nn.Conv2d(3, 6, 3, padding='same')
        self.conv3 = torch.nn.Conv2d(6, 12, 3, padding='same')
        self.fc = torch.nn.Linear(1728, 1)
        self.dropout = torch.nn.Dropout()


    def forward(self, x):
        x = x.view(-1,1,96,96)
        x = self.dropout(self.pool(F.relu(self.conv1(x))))
        x = self.dropout(self.pool(F.relu(self.conv2(x))))
        x = self.dropout(self.pool(F.relu(self.conv3(x))))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = self.fc(x)
        return x

my_model = ConvNet()

optimizer = torch.optim.Adam(my_model.parameters(), lr=3e-4)
# optimizer = torch.optim.SGD(model.parameters(), lr=0.5e-1)

criterion = torch.nn.MSELoss()

loss_l_train = []
loss_l_val = []
nb_epoch = 200

for num_epoch in range(nb_epoch):
    t0 = time.time()
    my_model.train()
    running_loss = 0
    total = 0
    for i, (inputs, labels) in enumerate(train_loader):
        outputs = my_model(inputs) # Forward pass: Compute predicted y by passing  x to the model          
        labels = labels.unsqueeze(1).float()
        loss = criterion(outputs,labels) # Compute loss 

        optimizer.zero_grad() # re-init the gradients (otherwise they are cumulated)
        loss.backward() # perform back-propagation
        optimizer.step() # update the weights

        running_loss += loss.item()
        
        total += labels.size(0)

    loss_l_train.append(running_loss/total)

    my_model.eval()
    running_loss_val = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = my_model(inputs)  
            labels = labels.unsqueeze(1).float()
            loss = criterion(outputs,labels)

            running_loss_val += loss.item()

            total += labels.size(0)

    loss_l_val.append(running_loss_val/total)

    if loss_l_val[-1] == min(loss_l_val):
        print("Saving best model")
        torch.save(my_model, 'models/best-reg.pt')
        torch.save(my_model.state_dict(), 'models/best-reg-parameters.pt')

    print(f'epoch {num_epoch} : train loss {loss_l_train[-1]:.5f}, val loss {loss_l_val[-1]:.5f}, time {time.time()-t0:.2f}s')