In [1]:
import pandas as pd
import numpy as np
from PIL import Image
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.dataset import random_split
import random

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
csv_path = "."
image_path = "./images"
batch_size = 256
epochs = 200
learning_rate = 1e-3
seed = 42

In [3]:
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

In [4]:
class AllDataset(Dataset):
    def __init__(self, csv_path, image_path, image_transform=transforms.Compose([transforms.ToTensor()])):
        super(AllDataset).__init__()
        csv = pd.read_csv(csv_path)
        csv_np = csv.to_numpy()
        
        self.image_transform = image_transform
        self.ids = list(map(str, csv_np[:,0]))
        self.image_path = image_path
        
        for i in [1, 3, 4, 5, 6, 7, 8]:
            wordset = {word: idx for idx, word in enumerate(np.unique(csv_np[:,i]))}
            for row in range(len(csv_np)):
                csv_np[row][i] = wordset[csv_np[row][i]]
        self.ints = torch.from_numpy(np.array(csv_np[:,[1,3,4,5,6,7,8]], dtype="int"))
        self.floats = torch.from_numpy(np.array(csv_np[:,[2,9,10,11]], dtype="float")).float()
        self.target = torch.from_numpy(np.array(csv_np[:,[12]], dtype="float")).float()
    
    
    def __getitem__(self,idx):
        image = Image.open(self.image_path+'/'+self.ids[idx]+'.jpg')
        image_tensor = self.image_transform(image)
        image.close()
        return image_tensor, self.ints[idx],self.floats[idx], self.target[idx]
    
    
    def __len__(self):
        return len(self.ints)

In [5]:
image_transform = transforms.Compose([transforms.ToTensor()
                                     ])

train_dataset = AllDataset(csv_path+'/train.csv', image_path, image_transform)
test_dataset = AllDataset(csv_path+'/test.csv', image_path, image_transform)
val_dataset = AllDataset(csv_path+'/val.csv', image_path, image_transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle = True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle = True)

In [6]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.resnet18 = torchvision.models.resnet18(pretrained=False)
        self.resnet18.fc = nn.Sequential(nn.Linear(512, 4096),
                                         nn.BatchNorm1d(4096),
                                         nn.ReLU(),
                                         nn.Dropout()
                                        )
        self.resenet18 = self.resnet18.to(device)
        
        self.emb1 = torch.nn.Embedding(8, 20)
        self.emb2 = torch.nn.Embedding(11, 20)
        self.emb3 = torch.nn.Embedding(20, 20)
        self.emb4 = torch.nn.Embedding(4, 20)
        self.emb5 = torch.nn.Embedding(4, 20)
        self.emb6 = torch.nn.Embedding(4, 20)
        self.emb7 = torch.nn.Embedding(7, 20)
        self.act = nn.ReLU()
        self.fc = nn.Linear(4, 80)
        self.csvbn = nn.BatchNorm1d(80)
        self.csvfc1 = nn.Linear(220, 8192)
        self.csvfc2 = nn.Linear(8192, 8192)
        self.csvfc3 = nn.Linear(8192, 4096)
        self.csvbn1 = nn.BatchNorm1d(8192)
        self.csvbn2 = nn.BatchNorm1d(8192)
        self.csvbn3 = nn.BatchNorm1d(4096)
        
        self.fc1 = nn.Linear(8192, 4096)
        self.fc2 = nn.Linear(4096, 1)
        self.bn1 = nn.BatchNorm1d(4096)
        self.dropout = nn.Dropout()
    
    def forward(self, image, x, y):
        image = self.resnet18(image)
        
        x1 = self.emb1(x[:,0])
        x2 = self.emb2(x[:,1])
        x3 = self.emb3(x[:,2])
        x4 = self.emb4(x[:,3])
        x5 = self.emb5(x[:,4])
        x6 = self.emb6(x[:,5])
        x7 = self.emb7(x[:,6])
        y = self.csvbn(self.fc(y))
        x = torch.cat((x1, x2, x3, x4, x5, x6, x7, y), dim=1)
        
        x = self.dropout(self.act(self.csvbn1(self.csvfc1(x))))
        x = self.dropout(self.act(self.csvbn2(self.csvfc2(x))))
        x = self.dropout(self.act(self.csvbn3(self.csvfc3(x))))
        x = torch.cat((x, image), dim=1)
        
        x = self.dropout(self.act(self.bn1(self.fc1(x))))
        return self.fc2(x)

In [7]:
model = Net().to(device)

criterion = nn.MSELoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [8]:
for epoch in range(epochs):
    model.train()
    criterion.train()
    
    avg_loss = 0

    for Img, X1, X2, Y in train_loader:
        Img = Img.to(device)
        X1 = X1.to(device)
        X2 = X2.to(device)
        Y = Y.to(device)

        model.zero_grad()
        prediction = model(Img, X1, X2)
        loss = torch.sqrt(criterion(prediction, Y)).to(device)
        loss.backward()
        optimizer.step()
        avg_loss += loss / len(train_loader)
    print(f'[Epoch: {epoch+1:>2}] Average loss: {avg_loss:.4f}, ', end='')
    
    model.eval()
    criterion.eval()
    with torch.no_grad():
        val_avg_loss = 0.
        for Image_val, X1_val, X2_val, Y_val in val_loader:
            Image_val = Image_val.to(device)
            X1_val = X1_val.to(device)
            X2_val = X2_val.to(device)
            Y_val = Y_val.to(device)
            val_prediction = model(Image_val, X1_val, X2_val)
            val_loss = torch.sqrt(criterion(val_prediction, Y_val)).to(device)
            val_avg_loss += val_loss / len(val_loader)
        
        print(f"val_loss: {val_avg_loss:.4f}")

[Epoch:  1] Average loss: 4386.0732, val_loss: 4059.1494
[Epoch:  2] Average loss: 4343.2930, val_loss: 4595.4800
[Epoch:  3] Average loss: 4502.4038, val_loss: 4164.3027
[Epoch:  4] Average loss: 4359.0620, val_loss: 4488.6191
[Epoch:  5] Average loss: 4605.9663, val_loss: 4314.8706
[Epoch:  6] Average loss: 4173.6626, val_loss: 4099.2759
[Epoch:  7] Average loss: 5594.8481, val_loss: 4183.4932
[Epoch:  8] Average loss: 3928.9604, val_loss: 4126.0942
[Epoch:  9] Average loss: 4113.3047, val_loss: 4247.1777
[Epoch: 10] Average loss: 3947.5435, val_loss: 3882.3040
[Epoch: 11] Average loss: 3934.0725, val_loss: 3934.1631
[Epoch: 12] Average loss: 3841.7168, val_loss: 3943.6484
[Epoch: 13] Average loss: 3920.7812, val_loss: 3540.2773
[Epoch: 14] Average loss: 3730.4177, val_loss: 3718.4382
[Epoch: 15] Average loss: 3614.7852, val_loss: 3379.8533
[Epoch: 16] Average loss: 3639.3958, val_loss: 3602.7327
[Epoch: 17] Average loss: 3795.5361, val_loss: 3676.9663
[Epoch: 18] Average loss: 3510.

[Epoch: 144] Average loss: 939.9152, val_loss: 1447.3506
[Epoch: 145] Average loss: 1019.9977, val_loss: 1579.7068
[Epoch: 146] Average loss: 1011.0018, val_loss: 1372.9178
[Epoch: 147] Average loss: 983.1951, val_loss: 1555.9540
[Epoch: 148] Average loss: 829.1470, val_loss: 1379.1600
[Epoch: 149] Average loss: 923.9688, val_loss: 1326.6255
[Epoch: 150] Average loss: 1004.4633, val_loss: 1646.6611
[Epoch: 151] Average loss: 1012.2207, val_loss: 1648.0925
[Epoch: 152] Average loss: 2553.4202, val_loss: 1939.2454
[Epoch: 153] Average loss: 824.9622, val_loss: 1562.1403
[Epoch: 154] Average loss: 850.0100, val_loss: 1608.1995
[Epoch: 155] Average loss: 937.5533, val_loss: 1742.9166
[Epoch: 156] Average loss: 996.8402, val_loss: 1399.1776
[Epoch: 157] Average loss: 982.6718, val_loss: 1265.3888
[Epoch: 158] Average loss: 892.2037, val_loss: 1343.4982
[Epoch: 159] Average loss: 847.6124, val_loss: 1483.1326
[Epoch: 160] Average loss: 1623.6033, val_loss: 1649.7344
[Epoch: 161] Average loss

In [9]:
model.eval()
criterion.eval()
ss_tot = 0
ss_res = 0
with torch.no_grad():
    for Image_test, X1_test, X2_test, Y_test in test_loader:
        Image_test =Image_test.to(device)
        X1_test = X1_test.to(device)
        X2_test = X2_test.to(device)
        Y_test = Y_test.to(device)
        prediction = model(Image_test, X1_test, X2_test)
        mean = torch.mean(Y_test)
        ss_tot += torch.sum((Y_test - mean) ** 2)
        ss_res += torch.sum((Y_test - prediction) ** 2)
    
    accuracy = 1 - ss_res/ss_tot
    print(f"Accuracy: {accuracy*100:.2f}%")

Accuracy: 81.98%
