In [1]:
# Load packages
import numpy as np
import pandas as pd
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.models import inception_v3, Inception_V3_Weights
from pytorchtools import EarlyStopping
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from category_encoders import OrdinalEncoder, OneHotEncoder, TargetEncoder
from sklearn.impute import SimpleImputer
from pathlib import Path
import shared_functions as sf

In [2]:
# Define model & file name
model_name = 'MultiModalModel'
file_name = 'property-sales_new-york-city_2022_pre-processed'

In [3]:
# Create output directory for exports
Path(f'../models/{model_name}').mkdir(parents=True, exist_ok=True)

In [4]:
# Load subset keys as list
subset_keys = pd.read_csv(f'../data/processed/subset_keys.csv').squeeze().to_list()

In [5]:
# Load subset index as series
subset_index = pd.read_csv(f'../data/processed/subset_index.csv', index_col=0)

In [6]:
# Use GPU when possible
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu').type
print(f'Device type: {device.upper()}')

Device type: CUDA


In [7]:
# Set random seed
seed = 42
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

In [8]:
dataset_params = {
    'data': f'../data/processed/{file_name}.parquet',
    'target_name': 'sale_price',
    'to_drop': 'sale_price_adj',
    'image_directory': '../data/raw/satellite-images_new-york-city_2022_640x640_19/',
    'image_transformation': transforms.Compose([
        transforms.CenterCrop((600, 600)), # crop image borders by margin of 20px to remove text from 640x640
        transforms.Resize((299, 299)), # resize image to 299x299
        transforms.ToTensor(),  # convert image to PyTorch tensor
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # noarmlize based on ImageNet data
        ]),
    'subset_index': '../data/processed/subset_index.csv',
    'input_scaler': StandardScaler(),
    'target_scaler': None,
    'categorical_encoder': TargetEncoder(),
    'numerical_imputer': SimpleImputer(missing_values=pd.NA, strategy='mean'),
    'data_overview': f'../data/processed/{file_name}_data-overview.csv'
    }

In [9]:
# Instantiate datasets
subsets = {subset_key: sf.MultiModalDataset(**dataset_params, subset=subset_key) for subset_key in subset_keys}
dataset = sf.MultiModalDataset(**dataset_params)

In [10]:
# Define model architecture
class MultiModalModel(nn.Module):
    # Define model components
    def __init__(self):
        super().__init__()

        # Define text model
        self.TextModel = nn.Sequential(
            nn.Linear(25, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
            )
        
        # Define image model
        self.ImageModel = inception_v3(weights=Inception_V3_Weights.DEFAULT)
        self.ImageModel.aux_logits = False
        for parameter in self.ImageModel.parameters():
            parameter.requires_grad = False
        self.ImageModel.fc = nn.Sequential(
            nn.Linear(self.ImageModel.fc.in_features, 1)
            )

        # Define linear layer for output
        self.linear = nn.Linear(2, 1)

        # Define acitvation function
        self.relu = nn.ReLU(inplace=True)
    
    # Define forward pass
    def forward(self, X_text, X_image):
        X_text = self.relu(self.TextModel(X_text))
        X_image = self.relu(self.ImageModel(X_image))
        y = self.linear(torch.cat((X_text, X_image), dim=1))
        return y

In [11]:
# Instantiate model
model = MultiModalModel()

In [12]:
# Calculate number of model parameters
n_params = sum(parameter.numel() for parameter in model.parameters())
print(f'# model paramters: {n_params}')

# model paramters: 25118093


In [13]:
# Pass model to GPU
model.to(device)

MultiModalModel(
  (TextModel): Sequential(
    (0): Linear(in_features=25, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=1, bias=True)
  )
  (ImageModel): Inception3(
    (Conv2d_1a_3x3): BasicConv2d(
      (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
      (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (Conv2d_2a_3x3): BasicConv2d(
      (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (Conv2d_2b_3x3): BasicConv2d(
      (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, 

In [14]:
epochs = 5

# Define hyperparameters
batch_size = 32
learning_rate = .01

In [15]:
# Define loss function
loss_function = nn.MSELoss().to(device)

In [16]:
# Define optimization alogrithm
optimizer = optim.Adam(params=model.parameters(), lr=learning_rate)

In [None]:
# Define early stopping condition
stop_early = EarlyStopping(patience=3, delta=0, path=f'../models/{model_name}/state_dict.pt')

In [17]:
# Create data loaders
dataloader_train = DataLoader(subsets['train'], batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True)
dataloader_val = DataLoader(subsets['val'], batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True)

In [18]:
# Create data frame for storing training history
history = pd.DataFrame(index=pd.Index(np.arange(epochs) + 1, name='epoch'))

In [19]:
# Training loop
for epoch in np.arange(epochs) + 1:
    # Activate training mode
    model.train()

    # Create list for storing batch losses
    batch_losses_train = []

    # Iterate over all training batches
    for i, sample in enumerate(dataloader_train):
        # Pass batch to GPU
        X, y = [X.to(device) for X in sample[:-1]], sample[-1].to(device)

        # Reset gradients
        model.zero_grad()

        # Run forward pass
        output_train = model(*X)

        # Compute batch loss
        batch_loss_train = loss_function(output_train, y)

        # Run backward pass
        batch_loss_train.backward()

        # Optimise parameters
        optimizer.step()

        # Store batch loss
        batch_losses_train.append(batch_loss_train.data.item())
    
    # Calculate training loss
    epoch_rmse_train = np.mean(batch_losses_train)**.5

    # Store training loss in history
    history.loc[epoch, 'RMSE_train'] = epoch_rmse_train.item()

    # Activate validation mode
    model.eval() # deactivates potential Dropout and BatchNorm
    
    # Create list for storing batch losses
    batch_losses_val = []

    # Iterate over all validation batches
    for i, sample in enumerate(dataloader_val):
        with torch.no_grad():
            # Pass batch to GPU
            X, y = [X.to(device) for X in sample[:-1]], sample[-1].to(device)

            # Run forward pass
            output_val = model(*X)

            # Compute batch loss
            batch_loss_val = loss_function(output_val, y)

            # Store batch loss
            batch_losses_val.append(batch_loss_val.data.item())

    # Calculate validation loss
    epoch_rmse_val = np.mean(batch_losses_val)**.5

    # Store validation loss in history
    history.loc[epoch, 'RMSE_val'] = epoch_rmse_val.item()
    
    # Print progress to console
    print(f'Epoch {epoch:{len(str(epochs))}.0f}/{epochs}: RMSE_train: {epoch_rmse_train.item():,.0f}, RMSE_val: {epoch_rmse_val.item():,.0f}')

    # Stop early in case validation loss does not improve
    stop_early(epoch_rmse_val, model)
    if stop_early.early_stop:
        print("Early stopping condition met")
        break
        
# Load the best model
model.load_state_dict(torch.load(stop_early.path))


Epoch 1/5: RMSE_train: 1,731,531, RMSE_val: 1,721,584
Epoch 2/5: RMSE_train: 1,616,694, RMSE_val: 1,431,828
Epoch 3/5: RMSE_train: 1,141,611, RMSE_val: 1,410,324
Epoch 4/5: RMSE_train: 957,292, RMSE_val: 1,390,850
Epoch 5/5: RMSE_train: 893,123, RMSE_val: 1,327,975


In [20]:
# Save training history
history.to_csv(f'../models/{model_name}/history.csv')

In [22]:
# Generate model predictions
predictions = sf.get_predictions(model, dataset, subset_index, device, save_as=f'../models/{model_name}/predictions.csv')

TypeError: to() received an invalid combination of arguments - got (list), but expected one of:
 * (torch.device device, torch.dtype dtype, bool non_blocking, bool copy, *, torch.memory_format memory_format)
 * (torch.dtype dtype, bool non_blocking, bool copy, *, torch.memory_format memory_format)
 * (Tensor tensor, bool non_blocking, bool copy, *, torch.memory_format memory_format)


In [None]:
# Compute performance metrics
metrics = sf.get_metrics(predictions, subset_keys, save_as=f'../models/{model_name}/perf_metrics.csv')

In [23]:
sf.plot_history(history, save_as=f'../models/{model_name}/history.pdf')

AttributeError: 'DataFrame' object has no attribute 'loss_train'

Error in callback <function _draw_all_if_interactive at 0x7f017973b4c0> (for post_execute):


RuntimeError: Failed to process string with tex because latex could not be found

RuntimeError: Failed to process string with tex because latex could not be found

<Figure size 453.6x288 with 1 Axes>

In [None]:
sf.plot_scatter(predictions, save_as=f'../models/{model_name}/scatter.pdf')

In [None]:
sf.plot_log(predictions, save_as=f'../models/{model_name}/log.pdf')