In [None]:
####################################################################################################################################################

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from scipy.special import softmax
from scipy.stats import entropy

url = 'http://files.grouplens.org/datasets/movielens/ml-100k/u.data'
columns = ['user_id', 'item_id', 'rating', 'timestamp']

df = pd.read_csv(url, sep='\t', names=columns)

df.drop('timestamp', axis=1, inplace=True)

# Creating a user-item matrix
user_item_matrix = df.pivot(index='user_id', columns='item_id', values='rating').fillna(0)

print("User-Item Matrix before GCN:\n", user_item_matrix.head())

features = torch.tensor(user_item_matrix.values, dtype=torch.float)

# custom similarity metric
def stable_softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

def custom_similarity(v1, v2):
    epsilon = 1e-8  # Small value to avoid division by zero
    v1 = stable_softmax(v1)
    v2 = stable_softmax(v2)

    if np.isnan(v1).any() or np.isnan(v2).any():
        print("NaN detected in softmax output")
        return np.nan

    indices_v1 = np.arange(1, len(v1) + 1)
    v1_exp = v1 * np.exp(v1 * indices_v1)

    indices_v2 = np.arange(1, len(v2) + 1)
    v2_exp = v2 * np.exp(v2 * indices_v2)

    if np.isnan(v1_exp).any() or np.isnan(v2_exp).any():
        print("NaN detected in exponential transformation")
        return np.nan

    joint_distribution = np.outer(v1_exp, v2_exp)
    v1_flat = v1_exp.flatten()
    v2_flat = v2_exp.flatten()
    joint_flat = joint_distribution.flatten()

    H_v1 = entropy(v1_flat + epsilon)  # Add epsilon to avoid log(0)
    H_v2 = entropy(v2_flat + epsilon)  # Add epsilon to avoid log(0)

    def sum_exponentials_metric(a, b):
        return np.exp(a) + np.exp(b)

    return sum_exponentials_metric(H_v1, H_v2)

num_users = user_item_matrix.shape[0]
similarity_matrix = np.zeros((num_users, num_users))

for i in range(num_users):
    for j in range(num_users):
        if i != j:
            similarity_matrix[i, j] = custom_similarity(user_item_matrix.iloc[i].values, user_item_matrix.iloc[j].values)

# Checking for any NaN values in the similarity matrix
print("Any NaN in similarity matrix:", np.isnan(similarity_matrix).any())

edge_index = []
edge_weight = []

for i in range(num_users):
    for j in range(num_users):
        if i != j:  # to Avoid self-loops for simplicity
            edge_index.append([i, j])
            edge_weight.append(similarity_matrix[i, j])

edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
edge_weight = torch.tensor(edge_weight, dtype=torch.float)

# Checking for any NaN values in edge weights
print("Any NaN in edge weights:", torch.isnan(edge_weight).any())

# Defining the GCN model
class DeeperWeightedGCN(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(DeeperWeightedGCN, self).__init__()
        self.conv1 = GCNConv(in_channels, 64, normalize=True, add_self_loops=False)
        self.conv2 = GCNConv(64, 32, normalize=True, add_self_loops=False)
        self.conv3 = GCNConv(32, 16, normalize=True, add_self_loops=False)
        self.conv4 = GCNConv(16, out_channels, normalize=True, add_self_loops=False)

    def forward(self, x, edge_index, edge_weight):
        # First Convolutional Layer
        x = self.conv1(x, edge_index, edge_weight=edge_weight)
        if torch.isnan(x).any():
            print("NaN values after conv1")
        x = F.relu(x)

        # Second Convolutional Layer
        x = self.conv2(x, edge_index, edge_weight=edge_weight)
        if torch.isnan(x).any():
            print("NaN values after conv2")
        x = F.relu(x)

        # Third Convolutional Layer
        x = self.conv3(x, edge_index, edge_weight=edge_weight)
        if torch.isnan(x).any():
            print("NaN values after conv3")
        x = F.relu(x)

        # Fourth Convolutional Layer (Final)
        x = self.conv4(x, edge_index, edge_weight=edge_weight)
        if torch.isnan(x).any():
            print("NaN values after conv4")

        return x
model = DeeperWeightedGCN(features.size(1), features.size(1))
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.MSELoss()

data = Data(x=features, edge_index=edge_index, edge_attr=edge_weight)

# Training loop
for epoch in range(500):
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index, data.edge_attr)

    if torch.isnan(out).any():
        print("NaN values in the output at epoch", epoch)
        print(pd.DataFrame(out.detach().numpy(), index=user_item_matrix.index, columns=user_item_matrix.columns))  # Detach the tensor before converting to numpy
        break

    loss = criterion(out[data.x != 0], data.x[data.x != 0])

    if torch.isnan(loss).any():
        print("NaN values in the loss at epoch", epoch)
        break

    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

# Predicting the ratings
model.eval()
with torch.no_grad():
    out = model(data.x, data.edge_index, data.edge_attr)

# Applying the constraint: clip the values between 1 and 5
out = torch.clamp(out, min=1.0, max=5.0)

predicted_ratings = pd.DataFrame(out.numpy(), index=user_item_matrix.index, columns=user_item_matrix.columns)

for user in user_item_matrix.index:
    for movie in user_item_matrix.columns:
        if user_item_matrix.at[user, movie] == 0:
            user_item_matrix.at[user, movie] = predicted_ratings.at[user, movie]

print("User-Item Matrix after GCN:\n", user_item_matrix)


User-Item Matrix before GCN:
 item_id  1     2     3     4     5     6     7     8     9     10    ...  \
user_id                                                              ...   
1         5.0   3.0   4.0   3.0   3.0   5.0   4.0   1.0   5.0   3.0  ...   
2         4.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   2.0  ...   
3         0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  ...   
4         0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  ...   
5         4.0   3.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  ...   

item_id  1673  1674  1675  1676  1677  1678  1679  1680  1681  1682  
user_id                                                              
1         0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  
2         0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  
3         0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  
4         0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  
5         0.0   0

In [None]:
after = np.array(user_item_matrix)


In [None]:
before = np.array(user_item_matrix_)

In [None]:
# for exponential sum

import numpy as np

def precision_at_k(predictions, actuals, k):
    top_k_pred = np.argsort(predictions)[-k:]
    relevant_items = np.nonzero(actuals)[0]
    precision = len(set(top_k_pred).intersection(set(relevant_items))) / k
    return precision

k = 10
num_users = before.shape[0]
total_precision = 0

for user_id in range(num_users):
    precision = precision_at_k(after[user_id], before[user_id], k)
    total_precision += precision

average_precision = total_precision / num_users

print(f"Average Precision@{k} across all users: {average_precision}")


Average Precision@10 across all users: 0.7799575821845165


In [None]:
#########################################################################################################################################################