In [28]:
# Define model
import torch
import torch.nn as nn
from torch.utils.data import DataLoader


def create_hidden_layers(hidden_layer_sizes):
    if not hidden_layer_sizes:
        return []

    hidden_layers = []
    for i in range(1, len(hidden_layer_sizes)):
        hidden_size = hidden_layer_sizes[i]
        hidden_layers.append(nn.Linear(hidden_layer_sizes[i - 1], hidden_size))
    return hidden_layers


class MultiHiddenLayerNetwork(nn.Module):
    def __init__(self, input_size, output_size, hidden_layer_sizes):
        super(MultiHiddenLayerNetwork, self).__init__()

        if not hidden_layer_sizes:
            self.input_layer = nn.Linear(input_size, output_size)
            hidden_layers = []
        else:
            self.input_layer = nn.Linear(input_size, hidden_layer_sizes[0])
            hidden_layers = create_hidden_layers(hidden_layer_sizes)
            hidden_layers.append(nn.Linear(hidden_layer_sizes[-1], output_size))

        self.hidden_layers = nn.ModuleList(hidden_layers)

    def pass_forward(self, x):
        x = self.input_layer(x)
        for i, hidden_layer in enumerate(self.hidden_layers):
            x = torch.relu(x)
            x = hidden_layer(x)
        return x


# Network that takes in all the input features and outputs a vector of qualities
class QualitiesPredictor(MultiHiddenLayerNetwork):
    def __init__(self, input_size, num_quality_scores, hidden_layer_sizes):
        super(QualitiesPredictor, self).__init__(input_size, num_quality_scores, hidden_layer_sizes)

    def forward(self, x):
        return super().pass_forward(x)


# Network that takes a quality and the price and predicts the satisfaction
class SatisfactionPredictor(MultiHiddenLayerNetwork):
    def __init__(self, hidden_layer_sizes):
        super(SatisfactionPredictor, self).__init__(2, 1, hidden_layer_sizes)

    def forward(self, q, p):
        x = torch.cat((q, p), dim = 1)
        return super().pass_forward(x)


# Takes all the quality scores and predicts a total quality variable
class TotalQualityPredictor(MultiHiddenLayerNetwork):
    def __init__(self, num_quality_scores, hidden_layer_sizes):
        super(TotalQualityPredictor, self).__init__(num_quality_scores, 1, hidden_layer_sizes)

    def forward(self, x):
        return super().pass_forward(x)


class CausalModel(nn.Module):
    def __init__(self, input_size, num_quality_scores, activation_on_quality,
                 qualities_predictor_hidden_layer_sizes,
                 satisfaction_predictor_hidden_layer_sizes,
                 total_quality_predictor_hidden_layer_sizes):
        super(CausalModel, self).__init__()

        self.num_quality_scores = num_quality_scores

        self.qualities_predictor_net = QualitiesPredictor(input_size, num_quality_scores,
                                                          qualities_predictor_hidden_layer_sizes)

        self.activation_on_quality = activation_on_quality

        self.satisfaction_predictors = [
            SatisfactionPredictor(satisfaction_predictor_hidden_layer_sizes) for _ in range(num_quality_scores)
        ]

        self.total_quality_predictor = TotalQualityPredictor(num_quality_scores,
                                                             total_quality_predictor_hidden_layer_sizes)

        self.total_satisfaction_predictor = SatisfactionPredictor(satisfaction_predictor_hidden_layer_sizes)

    def forward(self, x, p):
        qualities = self.qualities_predictor_net(x)
        qualities = self.activation_on_quality(qualities)

        satisfactions = torch.zeros(self.num_quality_scores, 1)
        for i, sat_predictor in enumerate(self.satisfaction_predictors):
            satisfactions[i] = sat_predictor(qualities[i], p)

        total_quality = self.total_quality_predictor(qualities)
        total_quality = self.activation_on_quality(total_quality)

        total_satisfaction = self.total_satisfaction_predictor(qualities, p)

        return satisfactions, total_satisfaction, total_quality

    def loss_function(self, satisfactions, total_satisfaction, satisfactions_targets, total_satisfaction_target):
        # Define custom loss function
        criterion = nn.MSELoss()  # Use Mean Squared Error as the loss criterion
        # Compute the loss for satisfactions
        loss_satisfactions = criterion(satisfactions, satisfactions_targets)
        # Compute the loss for total_satisfaction
        loss_total_satisfaction = criterion(total_satisfaction, total_satisfaction_target)
        # Add up the losses with appropriate weights (if desired)
        total_loss = loss_satisfactions + loss_total_satisfaction

        return total_loss

In [29]:
import pandas as pd
import torch
from torch.utils.data import Dataset

class CausalDataloader(Dataset):
    def __init__(self, df):
        CATEGORIES = ["staff", "facilities", "cleanliness", "comfort", "location"]

        self.X = torch.tensor(df[["stars", "reviews", "rating"] + list(pd.get_dummies(df["city"]).columns)].values).float()
        self.p = torch.tensor(df[["price"]].values).float()
        self.s = torch.tensor(df[CATEGORIES].values).float()
        self.total_s = torch.tensor(df[["rating"]].values).float()

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        X = self.X[idx, :]
        p = self.p[idx, :]
        s = self.s[idx, :]
        total_s = self.total_s[idx, :]

        return X, p, s, total_s

In [30]:
import numpy as np

data = pd.read_csv("data.csv")

# Step 2: Split the dataset into train, test, and validation sets
# Define the ratio for train, test, and validation sets
train_ratio = 0.8
test_ratio = 0.1
val_ratio = 0.1

# Get the number of samples in the dataset
num_samples = len(data)

# Calculate the number of samples for train, test, and validation sets
num_train = int(train_ratio * num_samples)
num_test = int(test_ratio * num_samples)
num_val = num_samples - num_train - num_test

# Shuffle the dataset
indices = np.arange(num_samples)
np.random.shuffle(indices)

# Split the dataset into train, test, and validation sets
train_data = data.iloc[indices[:num_train]]
test_data = data.iloc[indices[num_train:num_train + num_test]]
val_data = data.iloc[indices[num_train + num_test:]]

train_loader = CausalDataloader(train_data)
test_loader = CausalDataloader(test_data)
val_loader = CausalDataloader(val_data)

In [31]:
CATEGORIES = ["staff", "facilities", "cleanliness", "comfort", "location"]

NUM_FEATURES = len(data[["stars", "reviews", "rating"] + list(pd.get_dummies(data["city"]).columns)].columns)
NUM_QUALITY_SCORES = len(data[CATEGORIES].columns)

model = CausalModel(NUM_FEATURES, NUM_QUALITY_SCORES, nn.Sigmoid, [], [], [])
model.float()

In [32]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

num_epochs = 10

for epoch in range(num_epochs):
    model.train()

    for batch_idx, (X, p, target_s, target_total_s) in enumerate(train_loader):
        optimizer.zero_grad()
        s, total_s = model(X, p)
        loss = model.loss_function(s, total_s, target_s, target_total_s)
        loss.backward()
        optimizer.step()

        # Record batch loss
        batch_loss = loss.item()

        # Print batch loss
        print('Epoch [{}/{}], Batch [{}/{}], Batch Loss: {:.4f}'
              .format(epoch+1, num_epochs, batch_idx+1, len(train_loader), batch_loss))

    model.eval()
    total_val_loss = 0
    with torch.no_grad():
        for val_batch_idx, (val_X, val_p, val_target_s, val_target_total_s) in enumerate(train_loader):
            val_s, val_total_s = model(val_X, val_p)
            val_loss = model.loss_function(val_s, val_total_s, val_target_s, val_target_total_s)
            total_val_loss += val_loss.item()

    validation_loss = total_val_loss / len(val_loader)

    print('Epoch [{}/{}], Validation Loss: {:.4f}'
          .format(epoch+1, num_epochs, validation_loss))

RuntimeError: expected scalar type Double but found Float