In [66]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import TensorDataset, DataLoader
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Use GPU if available, otherwise use CPU

class AutoEncoder(nn.Module):
    def __init__(self):
        super().__init__()
        
        # Set the number of hidden units
        self.num_hidden = 8
        
        # Define the encoder part of the autoencoder
        self.encoder = nn.Sequential(
            nn.Linear(3, 256),  # input size: 784, output size: 256
            nn.ReLU(),  # apply the ReLU activation function
            nn.Linear(256, self.num_hidden),  # input size: 256, output size: num_hidden
            nn.ReLU(),  # apply the ReLU activation function
        )
        
        # Define the decoder part of the autoencoder
        self.decoder = nn.Sequential(
            nn.Linear(self.num_hidden, 256),  # input size: num_hidden, output size: 256
            nn.ReLU(),  # apply the ReLU activation function
            nn.Linear(256, 3),  # input size: 256, output size: 784
            nn.Sigmoid(),  # apply the sigmoid activation function to compress the output to a range of (0, 1)
        )

    def forward(self, x):
        # Pass the input through the encoder
        encoded = self.encoder(x)
        # Pass the encoded representation through the decoder
        decoded = self.decoder(encoded)
        # Return both the encoded representation and the reconstructed output
        return encoded, decoded

In [67]:
def convert(user_id, input_matrix, output_matrix):

    top_ten_input = []
    if type(input_matrix) == 'numpy.ndarray':
        for i in range(10):
            weights = input_matrix[user_id]
            top_ten_input.append(np.argmax(weights))
            weights.pop(user_id)
    if type(input_matrix) == 'pandas.DataFrame':
        for i in range(10):
            weights = input_matrix[user_id]
            top_ten_input.append(np.argmax(weights))
            weights.pop(user_id)

    top_ten_output = []
    if type(input_matrix) == 'numpy.ndarray':
        for i in range(10):
            weights = output_matrix[user_id]
            top_ten_output.append(np.argmax(weights))
            weights.pop(user_id)
    if type(input_matrix) == 'pandas.DataFrame':
        for i in range(10):
            weights = output_matrix[user_id]
            top_ten_output.append(np.argmax(weights))
            weights.pop(user_id)
    
    return top_ten_input, top_ten_output

In [68]:
user_artists = pd.read_csv('../unpushed_work/last_fm_data/user_artists.dat', sep='\t')

user_artists_train = user_artists[:74437]
user_artists_test = user_artists[74437:92834]

X_train = user_artists_train.groupby('userID').sample(frac=0.5, random_state=42)
y_train = user_artists_train.drop(X_train.index)

X_test = user_artists_test.groupby('userID').sample(frac=0.5, random_state=42)
y_test = user_artists_test.drop(X_test.index)

X_train_matrix = X_train.pivot(index='userID', columns='artistID', values=['weight']).fillna(0).to_numpy()
y_train_matrix = y_train.pivot(index='userID', columns='artistID', values=['weight']).fillna(0).to_numpy()
X_test_matrix = X_test.pivot(index='userID', columns='artistID', values=['weight']).fillna(0).to_numpy()
y_test_matrix = y_test.pivot(index='userID', columns='artistID', values=['weight']).fillna(0).to_numpy()

X_train = X_train.to_numpy()
y_train = y_train.to_numpy()
X_test = X_test.to_numpy()
y_test = y_test.to_numpy()

X_train = X_train.astype('float32')
y_train = y_train.astype('float32')
X_test = X_test.astype('float32')
y_test = y_test.astype('float32')

In [69]:
# Convert the training data to PyTorch tensors
X_train = torch.from_numpy(X_train) if isinstance(X_train, np.ndarray) else X_train

# Create the autoencoder model and optimizer
model = AutoEncoder()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Define the loss function
criterion = nn.MSELoss()

# Create a DataLoader to handle batching of the training data
train_loader = torch.utils.data.DataLoader(
    X_train, batch_size=32, shuffle=True
)

In [70]:
# Training loop
num_epochs = 10

for epoch in range(num_epochs):
    total_loss = 0.0
    for batch_idx, data in enumerate(train_loader):
        # Get a batch of training data and move it to the device
        data = data.to(device)

        # Forward pass
        encoded, decoded = model(data)

        # Compute the loss and perform backpropagation
        loss = criterion(decoded, data)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Update the running loss
        total_loss += loss.item() * data.size(0)

    # Print the epoch loss
    epoch_loss = total_loss / len(train_loader.dataset)
    print(
        "Epoch {}/{}: loss={:.4f}".format(epoch + 1, num_epochs, epoch_loss)
    )

Epoch 1/10: loss=12950708.2803
Epoch 2/10: loss=12950699.2975
Epoch 3/10: loss=12950699.3026
Epoch 4/10: loss=12950699.4674
Epoch 5/10: loss=12950699.4157
Epoch 6/10: loss=12950699.3391
Epoch 7/10: loss=12950699.5618
Epoch 8/10: loss=12950699.4639
Epoch 9/10: loss=12950699.3869
Epoch 10/10: loss=12950699.3097


In [80]:
X_train.to(torch.int)

user_2_tensor = X_train[X_train[:, 0] == 2]
sorted_user_2_tensor = user_2_tensor[user_2_tensor[:, 2].argsort(descending=True)]
top_10_input = sorted_user_2_tensor[:10, 1].tolist()
print(top_10_input)

user_2_tensor = X_train[X_train[:, 0] == 2]
sorted_user_2_tensor = user_2_tensor[user_2_tensor[:, 2].argsort(descending=True)]
top_10_input = sorted_user_2_tensor[:10, 1].tolist()
print(top_10_input)

[54.0, 55.0, 57.0, 59.0, 60.0, 63.0, 64.0, 66.0, 67.0, 68.0]
