<a href="https://colab.research.google.com/github/Vishnu0920/Recommender_System_Models/blob/main/AttentionalFMOnStudentDataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder


In [None]:
# Load the dataset
df = pd.read_csv('grade_data.csv')

# Encode student_id and course_id using LabelEncoder
le_student = LabelEncoder()
le_course = LabelEncoder()

df['student_id'] = le_student.fit_transform(df['student_id'])
df['course_id'] = le_course.fit_transform(df['course_id'])

# Map course grades to the range [0, 1] for regression
df['course_grade'] = df['course_grade'] / 10.0

# Train-test split
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)


In [None]:
# Convert the data to PyTorch tensors
train_user = torch.LongTensor(train_df['student_id'].values)
train_course = torch.LongTensor(train_df['course_id'].values)
train_grade = torch.FloatTensor(train_df['course_grade'].values)

test_user = torch.LongTensor(test_df['student_id'].values)
test_course = torch.LongTensor(test_df['course_id'].values)
test_grade = torch.FloatTensor(test_df['course_grade'].values)


In [None]:
class AttentionalFactorizationMachine(nn.Module):
    def __init__(self, num_users, num_courses, embedding_dim):
        super(AttentionalFactorizationMachine, self).__init__()

        # Adjust the dimensions of embeddings and linear layer
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.course_embedding = nn.Embedding(num_courses, embedding_dim)
        self.linear = nn.Linear(embedding_dim * 2, 1)
        self.attention = nn.Linear(embedding_dim * 2, 1)

    def forward(self, user, course):
        user_emb = self.user_embedding(user)
        course_emb = self.course_embedding(course)

        # Interaction term
        interaction = torch.cat([user_emb, course_emb], dim=1)

        # Attention mechanism
        attention_weights = torch.sigmoid(self.attention(interaction))
        attention = attention_weights * interaction

        # Concatenate interaction with attention
        interaction_attention = interaction + attention

        output = self.linear(interaction_attention)
        return output.squeeze()


In [None]:
# Hyperparameters
num_users = len(le_student.classes_)
num_courses = len(le_course.classes_)
embedding_dim = 10
num_epochs = 10
batch_size = 64
learning_rate = 0.001

# Create DataLoader for training
train_dataset = TensorDataset(train_user, train_course, train_grade)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Instantiate the model, define loss function and optimizer
model = AttentionalFactorizationMachine(num_users, num_courses, embedding_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


In [None]:
# Set the print interval
print_interval = 10  # Adjust the interval as needed

# Training loop with performance monitoring
for epoch in range(num_epochs):
    total_loss = 0.0

    for batch_index, batch in enumerate(train_dataloader):
        user, course, grade = batch
        optimizer.zero_grad()

        output = model(user, course).squeeze()
        loss = criterion(output, grade)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        if batch_index % print_interval == 0:
            print(f'Epoch {epoch}, Batch {batch_index}, Loss: {loss.item()}')

    # Print average loss at the end of each epoch
    avg_loss = total_loss / len(train_dataloader.dataset)
    print(f'Epoch {epoch}, Average Loss: {avg_loss}')


Epoch 0, Batch 0, Loss: 1.2062864303588867
Epoch 0, Batch 10, Loss: 1.1519439220428467
Epoch 0, Batch 20, Loss: 1.2703150510787964
Epoch 0, Batch 30, Loss: 1.0147649049758911
Epoch 0, Batch 40, Loss: 1.2423255443572998
Epoch 0, Batch 50, Loss: 0.7098916172981262
Epoch 0, Batch 60, Loss: 0.6395025849342346
Epoch 0, Batch 70, Loss: 0.7679346203804016
Epoch 0, Batch 80, Loss: 0.7215143442153931
Epoch 0, Batch 90, Loss: 0.6791014075279236
Epoch 0, Batch 100, Loss: 0.6706181764602661
Epoch 0, Batch 110, Loss: 0.5120199918746948
Epoch 0, Batch 120, Loss: 0.49761661887168884
Epoch 0, Batch 130, Loss: 0.5822432041168213
Epoch 0, Batch 140, Loss: 0.4195896089076996
Epoch 0, Batch 150, Loss: 0.4313325881958008
Epoch 0, Batch 160, Loss: 0.5568811297416687
Epoch 0, Batch 170, Loss: 0.47009938955307007
Epoch 0, Batch 180, Loss: 0.38351234793663025
Epoch 0, Batch 190, Loss: 0.3767416179180145
Epoch 0, Batch 200, Loss: 0.34226834774017334
Epoch 0, Batch 210, Loss: 0.3706035017967224
Epoch 0, Batch 22

In [None]:
# Define test dataset
test_dataset = TensorDataset(test_user, test_course, test_grade)

# Define test dataloader
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [None]:
# Evaluation on the test dataset
model.eval()
total_test_loss = 0.0
num_test_samples = len(test_dataset)

with torch.no_grad():
    for batch in test_dataloader:
        user, course, grade = batch
        output = model(user, course)
        loss = criterion(output, grade)
        total_test_loss += loss.item()

test_mse = total_test_loss / num_test_samples
print(f'Mean Squared Error (MSE) on the test dataset: {test_mse}')


Mean Squared Error (MSE) on the test dataset: 0.0002814433951916469


In [None]:
def recommend_courses(model, user_and_courses, top_k):
    input_user, input_courses = user_and_courses

    # Convert input_user to PyTorch tensor
    user_ids = torch.LongTensor([input_user])

    # Generate all possible course IDs
    all_course_ids = torch.arange(len(le_course.classes_))

    # Repeat the given user_id for all courses
    user_ids = torch.full_like(all_course_ids, fill_value=user_ids[0])

    # Make predictions for all courses for the given student
    predictions = model(user_ids, all_course_ids).squeeze()

    # Exclude courses already in input_courses from recommendations
    for course, grade in input_courses.items():
        course_index = le_course.transform([course])[0]
        if course_index < len(predictions):
            predictions[course_index] = float('-inf')

    # Get the indices of the top-k predictions
    num_recommendations = min(top_k, len(predictions))
    top_indices = torch.topk(predictions, num_recommendations).indices

    # Map the top indices back to the course IDs
    top_course_ids = le_course.inverse_transform(top_indices.numpy())

    # Exclude courses already in input_courses from recommendations (additional check)
    top_course_ids = [course_id for course_id in top_course_ids if course_id not in input_courses]

    return top_course_ids


In [None]:
# Example: Input student's grade for previous courses
input_user = 123  # Replace with the actual student ID

# Use the actual course labels seen during training
input_courses = {
    'CHEMISTRY LABORATORY': 8,
    'GENERAL CHEMISTRY': 7,
    'ELECTRICAL SCIENCES': 3,
    'ADDITIVE MANUFACTURING': 1,
    'PRACTICE SCHOOL I':10,
    'PHYSICS LABORATORY':5

    # Add more courses and grades as needed
}

# Set top_k to the desired number
top_k = 5

# Call the function with the updated course grades
user_and_courses = (input_user, input_courses)
recommended_courses = recommend_courses(model, user_and_courses, top_k)

print(f"Top {top_k} recommended courses for the student based on previous grades: {recommended_courses}")


Top 5 recommended courses for the student based on previous grades: ['PRACTICE SCHOOL II', 'DESIGN PROJECT', 'BIOLOGY LABORATORY', 'STUDY PROJECT', 'THESIS']
