<a href="https://colab.research.google.com/github/Vishnu0920/Recommender_System_Models/blob/main/FMOnStudentDataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pandas torch scikit-learn
!pip install pyfm

Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m34.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m823.6/823.6 kB[0m [31m35.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.1/14.1 MB[0m [31m46.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Downloading nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
# Cell 2: Import libraries
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder


In [None]:
# Cell 3: Load the dataset
df = pd.read_csv('grade_data.csv')


In [None]:
# Cell 4: Encode student_id and course_id using LabelEncoder
le_student = LabelEncoder()
le_course = LabelEncoder()

df['student_id'] = le_student.fit_transform(df['student_id'])
df['course_id'] = le_course.fit_transform(df['course_id'])


In [None]:
# Cell 5: Map course grades to the range [0, 1] for regression
df['course_grade'] = df['course_grade'] / 10.0


In [None]:
# Cell 6: Train-test split
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)


In [None]:
# Cell 7: Convert the data to PyTorch tensors
train_user = torch.LongTensor(train_df['student_id'].values)
train_course = torch.LongTensor(train_df['course_id'].values)
train_grade = torch.FloatTensor(train_df['course_grade'].values)

test_user = torch.LongTensor(test_df['student_id'].values)
test_course = torch.LongTensor(test_df['course_id'].values)
test_grade = torch.FloatTensor(test_df['course_grade'].values)


In [None]:
# Cell 8: Define the Factorization Machine model
class FactorizationMachine(nn.Module):
    def __init__(self, num_users, num_courses, embedding_dim):
        super(FactorizationMachine, self).__init__()

        # Adjust the dimensions of embeddings and linear layer
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.course_embedding = nn.Embedding(num_courses, embedding_dim)
        self.linear = nn.Linear(embedding_dim * 2, 1)

    def forward(self, user, course):
        user_emb = self.user_embedding(user)
        course_emb = self.course_embedding(course)

        # Concatenate user and course embeddings along the last dimension
        interaction = torch.cat([user_emb, course_emb], dim=1)

        output = self.linear(interaction)
        return output


In [None]:
# class FactorizationMachine(nn.Module):
#     def forward(self, user, course):
#         user_emb = self.user_embedding(user)
#         course_emb = self.course_embedding(course)

#         # Reshape embeddings if necessary
#         user_emb = user_emb.unsqueeze(1) if len(user_emb.shape) < 3 else user_emb
#         course_emb = course_emb.unsqueeze(1) if len(course_emb.shape) < 3 else course_emb

#         # Concatenate user and course embeddings along the last dimension
#         interaction = torch.cat([user_emb, course_emb], dim=2)

#         # Reshape interaction tensor
#         interaction = interaction.view(-1, interaction.size(1) * interaction.size(2))

#         output = self.linear(interaction)
#         return output


In [None]:
# Cell 9: Hyperparameters
num_users = len(le_student.classes_)
num_courses = len(le_course.classes_)
embedding_dim = 10
num_epochs = 10
batch_size = 64
learning_rate = 0.001


In [None]:
# Cell 10: Create DataLoader for training
train_dataset = TensorDataset(train_user, train_course, train_grade)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)


In [None]:
# Cell 11: Instantiate the model, define loss function and optimizer
model = FactorizationMachine(num_users, num_courses, embedding_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


In [None]:
# Set the print interval
print_interval = 10  # Adjust the interval as needed

# Cell 12: Training loop with performance monitoring
for epoch in range(num_epochs):
    total_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    for batch_index, batch in enumerate(train_dataloader):
        user, course, grade = batch
        optimizer.zero_grad()

        # Print input shapes for debugging
        #print("Shapes - User:", user.shape, "Course:", course.shape)

        output = model(user, course).squeeze()
        loss = criterion(output, grade)
        loss.backward()
        optimizer.step()

        # Print training loss at regular intervals
        if batch_index % print_interval == 0:
            print(f'Epoch {epoch}, Batch {batch_index}, Loss: {loss.item()}')




Epoch 0, Batch 0, Loss: 0.8195591568946838
Epoch 0, Batch 10, Loss: 0.5981851816177368
Epoch 0, Batch 20, Loss: 0.514411211013794
Epoch 0, Batch 30, Loss: 0.4948277771472931
Epoch 0, Batch 40, Loss: 0.5008884072303772
Epoch 0, Batch 50, Loss: 0.4669727385044098
Epoch 0, Batch 60, Loss: 0.4339965283870697
Epoch 0, Batch 70, Loss: 0.35265734791755676
Epoch 0, Batch 80, Loss: 0.4525752067565918
Epoch 0, Batch 90, Loss: 0.320129930973053
Epoch 0, Batch 100, Loss: 0.25728151202201843
Epoch 0, Batch 110, Loss: 0.28497493267059326
Epoch 0, Batch 120, Loss: 0.2424587905406952
Epoch 0, Batch 130, Loss: 0.2943894863128662
Epoch 0, Batch 140, Loss: 0.2656079828739166
Epoch 0, Batch 150, Loss: 0.20164929330348969
Epoch 0, Batch 160, Loss: 0.1889808177947998
Epoch 0, Batch 170, Loss: 0.20459142327308655
Epoch 0, Batch 180, Loss: 0.20902928709983826
Epoch 0, Batch 190, Loss: 0.1690414845943451
Epoch 0, Batch 200, Loss: 0.1974780410528183
Epoch 0, Batch 210, Loss: 0.1615159958600998
Epoch 0, Batch 22

In [None]:
# Cell 14: Evaluation on the test dataset
model.eval()  # Set the model to evaluation mode
test_dataset = TensorDataset(test_user, test_course, test_grade)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

total_test_loss = 0.0
num_test_samples = 0

# Evaluate the model on the test dataset
with torch.no_grad():
    for batch in test_dataloader:
        user, course, grade = batch
        output = model(user, course).squeeze()
        loss = criterion(output, grade)
        total_test_loss += loss.item()
        num_test_samples += len(grade)

# Calculate Mean Squared Error (MSE) on the test dataset
test_mse = total_test_loss / num_test_samples

print(f'Mean Squared Error (MSE) on the test dataset: {test_mse}')



Mean Squared Error (MSE) on the test dataset: 0.00025130533620206804


In [None]:
#To evaluate model based on hit ratio
def hit_ratio_at_k(model, test_data, le_student, le_course, top_k):
    model.eval()
    num_hits = 0
    total_users = len(test_data['student_id'].unique())

    for user_id in test_data['student_id'].unique():
        user = torch.LongTensor([le_student.transform([user_id])[0]])
        all_course_ids = torch.arange(len(le_course.classes_))
        user_ids = torch.full_like(all_course_ids, fill_value=user.item())

        predictions = model(user_ids, all_course_ids).squeeze()

        top_indices = torch.topk(predictions, top_k).indices.numpy()
        top_course_ids = le_course.inverse_transform(top_indices)

        user_data = test_data[test_data['student_id'] == user_id]
        true_courses = user_data['course_id'].values

        # Check if any of the true courses is among the top-k recommended courses
        if any(course in top_course_ids for course in true_courses):
            num_hits += 1

    hit_ratio = num_hits / total_users
    return hit_ratio


In [None]:
# Set top_k to the desired number for hit ratio calculation
top_k = 5
# Evaluate Hit Ratio at k on the test set
hit_ratio = hit_ratio_at_k(model, test_df, le_student, le_course, top_k)
print(f"Hit Ratio at {top_k}: {hit_ratio}")
#we are getting 0->it means students chosen courses isnt the same as the recommeded courses
#this isnt an effective measure of model performance because ts not necessary that students wud have
#chosen the recommended courses.

Hit Ratio at 5: 0.0


In [None]:
def recommend_courses(model, user_and_courses, top_k):
    input_user, input_courses = user_and_courses

    # Convert input_user to PyTorch tensor
    user_ids = torch.LongTensor([input_user])

    # Generate all possible course IDs
    all_course_ids = torch.arange(len(le_course.classes_))

    # Repeat the given user_id for all courses
    user_ids = torch.full_like(all_course_ids, fill_value=user_ids[0])

    # Make predictions for all courses for the given student
    predictions = model(user_ids, all_course_ids).squeeze()

    # Exclude courses already in input_courses from recommendations
    for course, grade in input_courses.items():
        course_index = le_course.transform([course])[0]
        if course_index < len(predictions):
            predictions[course_index] = float('-inf')

    # Get the indices of the top-k predictions
    num_recommendations = min(top_k, len(predictions))
    top_indices = torch.topk(predictions, num_recommendations).indices

    # Map the top indices back to the course IDs
    top_course_ids = le_course.inverse_transform(top_indices.numpy())

    # Exclude courses already in input_courses from recommendations (additional check)
    top_course_ids = [course_id for course_id in top_course_ids if course_id not in input_courses]

    return top_course_ids

# Example: Input student's grade for previous courses
input_user = 123  # Replace with the actual student ID

# Use the actual course labels seen during training
input_courses = {
    'CHEMISTRY LABORATORY': 8,
    'GENERAL CHEMISTRY': 7,
    'ELECTRICAL SCIENCES': 3,
    'ADDITIVE MANUFACTURING': 1,
    'PRACTICE SCHOOL I':10,
    'PHYSICS LABORATORY':5

    # Add more courses and grades as needed
}

# Set top_k to the desired number
top_k = 5

# Call the function with the updated course grades
user_and_courses = (input_user, input_courses)
recommended_courses = recommend_courses(model, user_and_courses, top_k)

print(f"Top {top_k} recommended courses for the student based on previous grades: {recommended_courses}")


Top 5 recommended courses for the student based on previous grades: ['PRACTICE SCHOOL II', 'STUDY PROJECT', 'THESIS', 'CONTROL SYSTEMS LABORATORY', 'ELEC & ELECTRONIC CIRCUITS LAB']


In [None]:
def recommend_courses(model, user_grades, top_k):
    # Convert user grades to PyTorch tensor
    user_grades_tensor = torch.FloatTensor(user_grades)

    # Generate all possible course IDs
    all_course_ids = torch.arange(len(le_course.classes_))

    # Repeat the given user grades for all courses
    user_grades_tensor = user_grades_tensor.unsqueeze(0).expand(len(le_course.classes_), -1)

    # Make predictions for all courses for the given user grades
    predictions = model(user_grades_tensor.long(), all_course_ids).squeeze()  # Convert to LongTensor

    # Get the indices of the top-k predictions
    num_recommendations = min(top_k, len(predictions))
    top_indices = torch.topk(predictions, num_recommendations).indices

    # Map the top indices back to the course IDs
    top_course_ids = le_course.inverse_transform(top_indices.numpy())

    return top_course_ids


# Example: Input student's grade for previous courses
input_grades = {
    'CHEMISTRY LABORATORY': 8,
    'GENERAL CHEMISTRY': 7,
    'ELECTRICAL SCIENCES': 3,
    'ADDITIVE MANUFACTURING': 1,
    'PRACTICE SCHOOL I': 10,
    'PHYSICS LABORATORY': 5
    # Add more courses and grades as needed
}

# Set top_k to the desired number
top_k = 5

# Call the function with the updated course grades
recommended_courses = recommend_courses(model, list(input_grades.values()), top_k)

print(f"Top {top_k} recommended courses for the student based on previous grades: {recommended_courses}")


RuntimeError: Tensors must have same number of dimensions: got 3 and 2