In [3]:
#Clothing ID, Brand, Category, Color, .... 
#User ID, Clothing ID, Rating (0 = Disike,1 = Like,2 = Love,3 = Purhcased...)

#Method: 
#Collaborative Filtering, Epsilon Greedy Algorithm (Q Learning)

In [4]:
import pandas as pd
import numpy as np
import random

# Parameters
num_clothing = 500
num_users = 1000
num_ratings = 10000

# Clothing data generation
clothing_id = [f"{i:05d}" for i in range(1, num_clothing + 1)]
brands = ['Brand' + str(i) for i in range(1, 21)]  # 20 random brand names
categories = ['shoes', 'shirts', 'pants', 'shorts', 'jackets', 'hoodies', 'hats']
colors = ['red', 'blue', 'green', 'black', 'white', 'yellow', 'purple', 'grey', 'orange', 'brown']

clothing_data = pd.DataFrame({
    'Clothing ID': clothing_id,
    'Clothing Brand': random.choices(brands, k=num_clothing),
    'Category': random.choices(categories, k=num_clothing),
    'Color': random.choices(colors, k=num_clothing)
})

# User ratings data generation
user_ids = [f"{i:04d}" for i in range(1, num_users + 1)]
clothing_ids = random.choices(clothing_id, k=num_ratings)
ratings = np.random.randint(0, 3, size=num_ratings)

user_ratings_data = pd.DataFrame({
    'User ID': random.choices(user_ids, k=num_ratings),
    'Clothing ID': clothing_ids,
    'Rating': ratings
})

# Save to CSV
clothing_data.to_csv('clothing.csv', index=False)
user_ratings_data.to_csv('user_ratings.csv', index=False)

print("Files generated successfully: clothing.csv and user_ratings.csv")


Files generated successfully: clothing.csv and user_ratings.csv


In [23]:
import pandas as pd
import numpy as np
import random

# Simulate data creation
def create_data():
    num_clothing = 754
    num_users = 10000
    num_ratings = 43133

    # Clothing data
    clothing_id = [f"{i:05d}" for i in range(1, num_clothing + 1)]
    brands = ['Brand' + str(i) for i in range(1, 21)]  # 20 brands
    categories = ['shoes', 'shirts', 'pants', 'shorts', 'jackets', 'hoodies', 'hats']
    colors = ['red', 'blue', 'green', 'black', 'white', 'yellow', 'purple', 'grey', 'orange', 'brown']

    clothing_data = pd.DataFrame({
        'Clothing ID': clothing_id,
        'Clothing Brand': random.choices(brands, k=num_clothing),
        'Category': random.choices(categories, k=num_clothing),
        'Color': random.choices(colors, k=num_clothing)
    })

    # User ratings data
    user_ids = [f"{i:04d}" for i in range(1, num_users + 1)]
    ratings = np.random.randint(0, 3, size=num_ratings)

    user_ratings_data = pd.DataFrame({
        'User ID': random.choices(user_ids, k=num_ratings),
        'Clothing ID': random.choices(clothing_id, k=num_ratings),
        'Rating': ratings
    })

    return clothing_data, user_ratings_data

# Call the function to create data
clothing_data, user_ratings_data = create_data()


user_ratings_data


Unnamed: 0,User ID,Clothing ID,Rating
0,0069,00018,1
1,1603,00050,1
2,4625,00512,2
3,0162,00711,2
4,4450,00097,2
...,...,...,...
43128,6542,00143,2
43129,5118,00715,1
43130,2941,00490,0
43131,0246,00116,0


In [19]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

# Convert user IDs and clothing IDs to categorical indices
user_ids = user_ratings_data['User ID'].astype('category')
clothing_ids = user_ratings_data['Clothing ID'].astype('category')

# Create mappings
user_map = {user_id: i for i, user_id in enumerate(user_ids.cat.categories)}
clothing_map = {clothing_id: i for i, clothing_id in enumerate(clothing_ids.cat.categories)}

# Update data with mapped values
user_ratings_data['User ID'] = user_ids.cat.codes
user_ratings_data['Clothing ID'] = clothing_ids.cat.codes

# Split data
train_data, test_data = train_test_split(user_ratings_data, test_size=0.2, random_state=42)

# Dataset class
class RatingsDataset(Dataset):
    def __init__(self, user_ratings):
        self.users = torch.tensor(user_ratings['User ID'].values, dtype=torch.long)
        self.items = torch.tensor(user_ratings['Clothing ID'].values, dtype=torch.long)
        self.ratings = torch.tensor(user_ratings['Rating'].values, dtype=torch.float32)
    
    def __len__(self):
        return len(self.ratings)
    
    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.ratings[idx]

# Define the model
class MatrixFactorization(nn.Module):
    def __init__(self, num_users, num_items, embedding_size=20):
        super(MatrixFactorization, self).__init__()
        self.user_embeddings = nn.Embedding(num_users, embedding_size)
        self.item_embeddings = nn.Embedding(num_items, embedding_size)
        self.user_biases = nn.Embedding(num_users, 1)
        self.item_biases = nn.Embedding(num_items, 1)
        
    def forward(self, user, item):
        user_embedding = self.user_embeddings(user)
        item_embedding = self.item_embeddings(item)
        user_bias = self.user_biases(user)
        item_bias = self.item_biases(item)
        
        # Dot product of user and item embeddings
        interaction = (user_embedding * item_embedding).sum(dim=1)
        
        # Add biases
        interaction += user_bias.squeeze() + item_bias.squeeze()
        
        return interaction

# Instantiate the model
num_users = user_ids.cat.categories.size
num_items = clothing_ids.cat.categories.size
model = MatrixFactorization(num_users, num_items)

# Define loss and optimizer
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Load data into DataLoader
train_dataset = RatingsDataset(train_data)
test_dataset = RatingsDataset(test_data)
train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False)

# Training loop
def train(epoch, model, train_loader, loss_fn, optimizer):
    model.train()
    total_loss = 0
    
    for user, item, rating in train_loader:
        # Zero the gradients
        optimizer.zero_grad()
        
        # Forward pass
        prediction = model(user, item)
        
        # Compute loss
        loss = loss_fn(prediction, rating)
        
        # Backward pass
        loss.backward()
        
        # Update parameters
        optimizer.step()
        
        total_loss += loss.item()
    
    print(f'Epoch {epoch}: Training Loss: {total_loss / len(train_loader)}')

# Train the model
num_epochs = 10
for epoch in range(1, num_epochs + 1):
    train(epoch, model, train_loader, loss_fn, optimizer)


Epoch 1: Training Loss: 21.5966396892772
Epoch 2: Training Loss: 12.170845845166374
Epoch 3: Training Loss: 7.363084905287799
Epoch 4: Training Loss: 4.603492978741141
Epoch 5: Training Loss: 2.9698950087322906
Epoch 6: Training Loss: 1.9651107788085938
Epoch 7: Training Loss: 1.3211447394946043
Epoch 8: Training Loss: 0.9052712207331377
Epoch 9: Training Loss: 0.6216573719592655
Epoch 10: Training Loss: 0.43448316174394946


In [21]:
import pandas as pd
import numpy as np
import random
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

# Function to generate random data
def generate_random_data(num_items, num_users, num_ratings):
    clothing_ids = [f"{i:05d}" for i in range(1, num_items + 1)]
    user_ids = [f"{i:04d}" for i in range(1, num_users + 1)]
    ratings = np.random.randint(0, 3, size=num_ratings)
    
    random_data = pd.DataFrame({
        'User ID': random.choices(user_ids, k=num_ratings),
        'Clothing ID': random.choices(clothing_ids, k=num_ratings),
        'Rating': ratings
    })
    return random_data

# Create new random test data
new_test_data = generate_random_data(500, 1000, 5000)

# Assuming the model and other variables have been defined in the earlier script:
# Convert this new test data
new_test_user_ids = new_test_data['User ID'].astype('category')
new_test_clothing_ids = new_test_data['Clothing ID'].astype('category')
new_test_data['User ID'] = new_test_user_ids.cat.codes
new_test_data['Clothing ID'] = new_test_clothing_ids.cat.codes

# Create a Dataset and DataLoader for the new test data
new_test_dataset = RatingsDataset(new_test_data)
new_test_loader = DataLoader(new_test_dataset, batch_size=512, shuffle=False)

# Function to evaluate the model
def evaluate_model(model, test_loader):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for user, item, rating in test_loader:
            prediction = model(user, item)
            loss = loss_fn(prediction, rating)
            total_loss += loss.item()
    print(f"Test Loss: {total_loss / len(test_loader)}")

# Evaluate the model on the new test data
evaluate_model(model, new_test_loader)


Test Loss: 11.9881742477417
