In [1]:
import pandas as pd
import numpy as np
from PIL import Image
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.dataset import random_split
import random

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
csv_path = "final.csv"
image_path = "./images"
batch_size = 256
epochs = 200
learning_rate = 1e-3
seed = 42

In [3]:
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

In [4]:
class AllDataset(Dataset):
    def __init__(self, csv_path, image_path, image_transform=transforms.Compose([transforms.ToTensor()])):
        super(AllDataset).__init__()
        csv = pd.read_csv(csv_path)
        csv_np = csv.to_numpy()
        self.images = []
        for Id in csv['Id']:
            Id = str(Id)
            image = Image.open(image_path+'/'+Id+'.jpg')
            self.images.append(image_transform(image))
            image.close()
        
        for i in [1, 3, 4, 5, 6, 7, 8]:
            wordset = {word: idx for idx, word in enumerate(np.unique(csv_np[:,i]))}
            for row in range(len(csv_np)):
                csv_np[row][i] = wordset[csv_np[row][i]]
        self.ints = torch.from_numpy(np.array(csv_np[:,[1,3,4,5,6,7,8]], dtype="int"))
        self.floats = torch.from_numpy(np.array(list(map(lambda x: (x - np.array([0.61756694, 5.49324267, 4.76044624, 3.05992563])) / np.array([0.39076653, 1.28855836, 0.77676551, 0.51616518]), csv_np[:,[2,9,10,11]])), dtype="float")).float()
        self.target = torch.from_numpy(np.array(csv_np[:,[12]], dtype="float")).float()
    
    
    def __getitem__(self,idx):
        return self.images[idx], self.ints[idx],self.floats[idx], self.target[idx]
    
    
    def __len__(self):
        return len(self.ints)

In [5]:
data_length = len(pd.read_csv(csv_path))
train_length = int(data_length * 0.6)
test_length = int(data_length * 0.2)
val_length = data_length - train_length - test_length

image_transform = transforms.Compose([transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5),
                                                         (0.5, 0.5, 0.5))
                                     ])

train_dataset = AllDataset(csv_path, image_path, image_transform)
train_dataset, test_dataset = random_split(train_dataset, [train_length, test_length+val_length])
test_dataset, val_dataset = random_split(test_dataset, [test_length, val_length])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle = True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle = True)

In [6]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.resnet18 = torchvision.models.resnet18(pretrained=False)
        self.resnet18.conv1 = nn.Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        self.resnet18.fc = nn.Sequential(nn.Linear(512, 2048),
                                         nn.ReLU(),
                                        nn.Linear(2048, 2048),
                                        nn.ReLU())
        self.resenet18 = self.resnet18.to(device)
        
        self.emb1 = torch.nn.Embedding(8, 20)
        self.emb2 = torch.nn.Embedding(11, 20)
        self.emb3 = torch.nn.Embedding(20, 20)
        self.emb4 = torch.nn.Embedding(4, 20)
        self.emb5 = torch.nn.Embedding(4, 20)
        self.emb6 = torch.nn.Embedding(4, 20)
        self.emb7 = torch.nn.Embedding(7, 20)
        self.act = nn.ReLU()
        self.fc = nn.Linear(4, 80)
        self.csvfc1 = nn.Linear(220, 4096)
        self.csvfc2 = nn.Linear(4096, 4096)
        self.csvfc3 = nn.Linear(4096, 2048)
        
        self.fc1 = nn.Linear(4096, 4096)
        self.fc2 = nn.Linear(4096, 2048)
        self.fc3 = nn.Linear(2048, 1)
        self.dropout = nn.Dropout()
    
    def forward(self, image, x, y):
        image = self.resnet18(image)
        
        x1 = self.emb1(x[:,0])
        x2 = self.emb2(x[:,1])
        x3 = self.emb3(x[:,2])
        x4 = self.emb4(x[:,3])
        x5 = self.emb5(x[:,4])
        x6 = self.emb6(x[:,5])
        x7 = self.emb7(x[:,6])
        y = self.fc(y)
        x = torch.cat((x1, x2, x3, x4, x5, x6, x7, y), dim=1)
        
        x = self.dropout(self.act(self.csvfc1(x)))
        x = self.dropout(self.act(self.csvfc2(x)))
        x = self.dropout(self.act(self.csvfc3(x)))
        x = torch.cat((x, image), dim=1)
        
        x = self.act(self.fc1(x))
        x = self.act(self.fc2(x))
        return self.fc3(x)

In [7]:
model = Net().to(device)

criterion = nn.MSELoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [8]:
for epoch in range(epochs):
    model.train()
    criterion.train()
    
    avg_loss = 0

    for Image, X1, X2, Y in train_loader:
        Image = Image.to(device)
        X1 = X1.to(device)
        X2 = X2.to(device)
        Y = Y.to(device)

        model.zero_grad()  # why we use zero_grad?
        prediction = model(Image, X1, X2)
        loss = torch.sqrt(criterion(prediction, Y)).to(device)
        loss.backward()
        optimizer.step()
        avg_loss += loss / len(train_loader)
    print(f'[Epoch: {epoch+1:>2}] Average loss: {avg_loss:.4f}, ', end='')
    
    model.eval()
    criterion.eval()
    with torch.no_grad():
        val_avg_loss = 0.
        for Image_val, X1_val, X2_val, Y_val in val_loader:
            Image_val = Image_val.to(device)
            X1_val = X1_val.to(device)
            X2_val = X2_val.to(device)
            Y_val = Y_val.to(device)
            val_prediction = model(Image_val, X1_val, X2_val)
            val_loss = torch.sqrt(criterion(val_prediction, Y_val)).to(device)
            val_avg_loss += val_loss / len(val_loader)
        
        print(f"val_loss: {val_avg_loss:.4f}")

[Epoch:  1] Average loss: 4569.1372, val_loss: 4765.3687
[Epoch:  2] Average loss: 4337.0991, val_loss: 5367.7578
[Epoch:  3] Average loss: 3783.4651, val_loss: 6978.0229
[Epoch:  4] Average loss: 3437.6626, val_loss: 5780.6514
[Epoch:  5] Average loss: 3052.0640, val_loss: 3260.1079
[Epoch:  6] Average loss: 2909.9033, val_loss: 3436.5591
[Epoch:  7] Average loss: 3328.3279, val_loss: 3317.9673
[Epoch:  8] Average loss: 2779.9407, val_loss: 2473.9558
[Epoch:  9] Average loss: 2152.6438, val_loss: 1885.8972
[Epoch: 10] Average loss: 1887.7583, val_loss: 1601.2504
[Epoch: 11] Average loss: 1770.5187, val_loss: 1695.9771
[Epoch: 12] Average loss: 1479.3682, val_loss: 1579.0046
[Epoch: 13] Average loss: 1456.6321, val_loss: 2806.5796
[Epoch: 14] Average loss: 1416.3243, val_loss: 2059.3484
[Epoch: 15] Average loss: 1245.0148, val_loss: 2380.2556
[Epoch: 16] Average loss: 1300.9620, val_loss: 1466.8362
[Epoch: 17] Average loss: 1416.8129, val_loss: 1723.7662
[Epoch: 18] Average loss: 1452.

[Epoch: 145] Average loss: 759.0699, val_loss: 1161.4014
[Epoch: 146] Average loss: 810.1230, val_loss: 1046.5858
[Epoch: 147] Average loss: 915.9099, val_loss: 1005.2530
[Epoch: 148] Average loss: 824.2602, val_loss: 1042.2831
[Epoch: 149] Average loss: 874.4677, val_loss: 986.8243
[Epoch: 150] Average loss: 647.7676, val_loss: 1176.0264
[Epoch: 151] Average loss: 629.9417, val_loss: 1007.9111
[Epoch: 152] Average loss: 712.2084, val_loss: 1086.6335
[Epoch: 153] Average loss: 712.4941, val_loss: 1223.4004
[Epoch: 154] Average loss: 540.4898, val_loss: 1002.5806
[Epoch: 155] Average loss: 624.3896, val_loss: 1131.9430
[Epoch: 156] Average loss: 691.0719, val_loss: 1425.1804
[Epoch: 157] Average loss: 831.4714, val_loss: 1021.7207
[Epoch: 158] Average loss: 769.0342, val_loss: 957.5113
[Epoch: 159] Average loss: 754.7045, val_loss: 1135.7540
[Epoch: 160] Average loss: 712.6167, val_loss: 1873.5472
[Epoch: 161] Average loss: 606.9592, val_loss: 922.0737
[Epoch: 162] Average loss: 658.904

In [9]:
model.eval()
criterion.eval()
ss_tot = 0
ss_res = 0
with torch.no_grad():
    for Image_test, X1_test, X2_test, Y_test in test_loader:
        Image_test =Image_test.to(device)
        X1_test = X1_test.to(device)
        X2_test = X2_test.to(device)
        Y_test = Y_test.to(device)
        prediction = model(Image_test, X1_test, X2_test)
        mean = torch.mean(Y_test)
        ss_tot += torch.sum((Y_test - mean) ** 2)
        ss_res += torch.sum((Y_test - prediction) ** 2)
    
    accuracy = 1 - ss_res/ss_tot
    print(f"Accuracy: {accuracy*100:.2f}%")

Accuracy: 90.67%
