Let's now work on the model to try and predict the damage level of each building. Let's start by importing the necessary libraries and the dataset containing the one-hot signatures created in the previous notebook.

In [1]:
import torch
import pandas as pd
import numpy as np


In [10]:
# dataframe = pd.read_csv('signature_dataframe.csv')
dataframe = pd.read_pickle('signature_dataframe.pkl')
dataframe.head()

Unnamed: 0,x,y
0,"[-0.8674214516922528, 0.5968959280707078, 1, 0...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,"[-0.7894931324262333, 1.6023856802166563, 0, 1...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,"[-0.664418864464821, 0.4405339408980041, 1, 0,...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,"[-0.21363849853229364, 0.19104651928789274, 0,...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,"[-0.21115149773491346, 0.1691897795630536, 0, ...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [27]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SelfAttentionClassifier(nn.Module):
    def __init__(self, input_dim, n_classes):
        super().__init__()
        self.input_dim = input_dim
        self.n_classes = n_classes
        
        # Linear layer to transform the input into query, key, and value representations
        self.fc = nn.Sequential(nn.Linear(input_dim, 3 * input_dim), nn.ReLU(),
                                nn.Linear(3 * input_dim, 3 * input_dim), nn.ReLU(),
                                nn.Linear(3 * input_dim, 3 * input_dim), nn.ReLU()
                                )
        
        
        # Linear layer to produce the final classification logits
        self.classifier = nn.Sequential(nn.Linear(input_dim, input_dim), nn.ReLU(), 
                                        nn.Linear(input_dim, n_classes), nn.ReLU()
                                        )
        
    def forward(self, inputs):
        # Transform the input into query, key, and value representations
        qkv = self.fc(inputs)
        q, k, v = qkv.split(self.input_dim, dim=-1)
        
        # Compute the dot product between the queries and keys to obtain the attention scores
        scores = torch.matmul(q, k.transpose(-1, -2)) / np.sqrt(self.input_dim)
        
        # Normalize the attention scores using a softmax function
        attention = F.softmax(scores, dim=-1)
        
        # Weight the values using the attention scores
        weighted_values = torch.matmul(attention, v)
        
        # Compute the final representation of the input sequence
        representation = weighted_values.sum(dim=1)
        
        # Pass the representation to the classifier to obtain the logits
        logits = self.classifier(representation)
        
        return logits


In [28]:
dataframe['y'].iloc[[0,1]]

0    [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
1    [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
Name: y, dtype: object

In [29]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe
        
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        # Get the label for the current sample
        label = torch.Tensor(self.dataframe['y'].iloc[idx])
        
        # Get the input for the current sample
        input = torch.Tensor(self.dataframe['x'].iloc[idx])
        
        return input, label

In [30]:
dataset = CustomDataset(dataframe)
dataset[[0,1]][0]

tensor([[-0.8674,  0.5969,  1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  1.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  1.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  1.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  1.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  1.0000,  0.0000,
          1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  1.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [-0.7895,  1.6024,  0.0000,  1.0000,  0.0000,  0.0000,  0.0000,  1.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  1.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000, 

In [31]:
model = SelfAttentionClassifier(input_dim=len(dataset[0][0]), n_classes=len(dataset[0][1]))
model(torch.tensor(dataset[0][0]))

  


IndexError: Dimension out of range (expected to be in range of [-1, 0], but got -2)