In [1]:
import numpy as np
import pandas as pd
import importlib
import matplotlib.pyplot as plt
import seaborn as sns
import utils
import preprocessing
import models
importlib.reload(models) 
importlib.reload(utils)
importlib.reload(preprocessing)
import torch
import gower

In [2]:
df = preprocessing.load_dataset()

In [3]:
a = gower.gower_matrix(df)[:10, 0]

In [4]:
b = np.array([utils.gower_dist(df.iloc[0,:].to_numpy(), df.iloc[i,:].to_numpy()) for i in range(10)])

In [5]:
a

array([0.        , 0.13945666, 0.12793462, 0.15760538, 0.12678677,
       0.15220223, 0.15831052, 0.16298524, 0.06653662, 0.11459053],
      dtype=float32)

In [6]:
data = torch.tensor(df.to_numpy(), dtype=torch.float32)
binary_indices = torch.tensor(utils.binary_indices) 
# Calculate pos_weight for each binary feature
pos_weights = []
for idx in binary_indices:
    pos_count = (data[:, idx] == 1).sum().item()
    neg_count = (data[:, idx] == 0).sum().item()
    pos_weight = neg_count / pos_count if pos_count > 0 else 1.0
    pos_weights.append(pos_weight)

# Convert to tensor
pos_weights = torch.tensor(pos_weights, dtype=torch.float32)
print("Calculated pos_weights:", pos_weights)

Calculated pos_weights: tensor([4.3741e-01, 1.5016e-01, 1.5658e-02, 1.2943e-02, 3.9861e-02, 1.0952e-02,
        1.4227e-02, 1.7093e-02, 7.0155e-02, 7.3826e-02, 1.2801e-02, 8.2621e-03,
        2.6226e-02, 1.3891e-04, 5.1402e-02])


In [8]:
x_original = torch.tensor([[0.5, 1.0, 0.2], [0.7, 0.0, 0.8]], dtype=torch.float32)
x_reconstructed = torch.tensor([[0.4, 0.8, 0.3], [0.6, 0.1, 0.7]], dtype=torch.float32)
binary_indices = [1]
continuous_indices = [0, 2]

# Test Both Implementations
loss_fn = models.Autoencoder_Loss_Prob(binary_indices, continuous_indices)
loss_1 = utils.gower_loss(x_original, x_reconstructed, binary_indices, continuous_indices)
loss_2 = loss_fn(x_original, x_reconstructed)

print("Gower Loss:", loss_1.item())
print("Autoencoder_Loss_Prob Loss:", loss_2.item())

Gower Loss: 0.1925828903913498
Autoencoder_Loss_Prob Loss: 0.1925828903913498


In [14]:
utils.binary_indices 

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]

In [15]:
utils.continuous_indices

[0, 16, 17, 18, 19, 20]

In [16]:



import torch
import torch.nn.functional as F
import torch.nn as nn

def gower_loss(x_original, x_reconstructed, binary_indices, continuous_indices):
    # Binary indices handling
    alpha = len(binary_indices)/(len(binary_indices) + len(continuous_indices))
    x_bin_original = x_original[:, binary_indices]
    x_bin_reconstructed = torch.sigmoid(x_reconstructed[:, binary_indices])  # Use sigmoid to squash outputs
    # Binary loss (using BCE)
    binary_loss = F.binary_cross_entropy(x_bin_reconstructed, x_bin_original, reduction='mean')

    # Continuous data handling
    continuous_indices = [i for i in range(x_original.shape[1]) if i not in binary_indices]
    x_cont_original = x_original[:, continuous_indices]
    x_cont_reconstructed = x_reconstructed[:, continuous_indices]

    # Continuous loss (using MSE)
    continuous_loss = F.mse_loss(x_cont_reconstructed, x_cont_original, reduction='mean')

    # Combine losses:
    total_loss = alpha * binary_loss + (1 - alpha) * continuous_loss
    return total_loss

class Autoencoder_Loss_Prob(nn.Module):
    def __init__(self, binary_indices, continuous_indices):
        super(Autoencoder_Loss_Prob, self).__init__()
        self.binary_indices = binary_indices
        self.continuous_indices = continuous_indices
        self.alpha = len(binary_indices) / (len(binary_indices) + len(continuous_indices))

    def forward(self, x_original, x_reconstructed):
        # Extract binary features
        x_bin_original = x_original[:, self.binary_indices]
        x_bin_reconstructed = x_reconstructed[:, self.binary_indices]
        
        # Debug: Print intermediate values
        print("x_bin_original:", x_bin_original)
        print("x_bin_reconstructed (logits):", x_bin_reconstructed)

        # Compute binary cross-entropy loss for binary features
        binary_loss = F.binary_cross_entropy_with_logits(
            x_bin_reconstructed, x_bin_original, reduction='mean'
        )
        
        # Debug: Print binary loss
        print("Binary Loss:", binary_loss)

        # Extract continuous features
        x_cont_original = x_original[:, self.continuous_indices]
        x_cont_reconstructed = x_reconstructed[:, self.continuous_indices]

        # Debug: Print intermediate values
        print("x_cont_original:", x_cont_original)
        print("x_cont_reconstructed:", x_cont_reconstructed)

        # Compute mean squared error loss for continuous features
        continuous_loss = F.mse_loss(x_cont_reconstructed, x_cont_original, reduction='mean')
        
        # Debug: Print continuous loss
        print("Continuous Loss:", continuous_loss)

        # Combine losses with respective weights
        total_loss = self.alpha * binary_loss + (1 - self.alpha) * continuous_loss
        
        # Debug: Print total loss
        print("Total Loss:", total_loss)
        
        return total_loss

# Sample Data for Testing
x_original = torch.tensor([[0.5, 1.0, 0.2], [0.7, 0.0, 0.8]], dtype=torch.float32)
x_reconstructed = torch.tensor([[0.4, 0.8, 0.3], [0.6, 0.1, 0.7]], dtype=torch.float32)
binary_indices = [1]
continuous_indices = [0, 2]

# Test Both Implementations
loss_fn = Autoencoder_Loss_Prob(binary_indices, continuous_indices)
loss_1 = gower_loss(x_original, x_reconstructed, binary_indices, continuous_indices)
loss_2 = loss_fn(x_original, x_reconstructed)

print("Gower Loss:", loss_1.item())
print("Autoencoder_Loss_Prob Loss:", loss_2.item())


x_bin_original: tensor([[1.],
        [0.]])
x_bin_reconstructed (logits): tensor([[0.8000],
        [0.1000]])
Binary Loss: tensor(0.5577)
x_cont_original: tensor([[0.5000, 0.2000],
        [0.7000, 0.8000]])
x_cont_reconstructed: tensor([[0.4000, 0.3000],
        [0.6000, 0.7000]])
Continuous Loss: tensor(0.0100)
Total Loss: tensor(0.1926)
Gower Loss: 0.1925828903913498
Autoencoder_Loss_Prob Loss: 0.1925828903913498
