<a href="https://colab.research.google.com/github/ifuu16/LearningResourcesRecommender/blob/main/LearningResourcesRecommender.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#Import the necessary libraries
import pandas as pd
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

In [4]:

def preprocess_learning_resources(file_path):
    """
    Preprocess the learning resources dataset.

    Args:
        file_path (str): Path to the CSV file containing the dataset.

    Returns:
        pd.DataFrame: Preprocessed DataFrame.
    """
    # Load the dataset
    resources_data = pd.read_csv(file_path)

     # Display dataset information before changes
    print("Before changes:")
    resources_data.info()

    # Rename the columns
    resources_data.rename(index=str, columns={
        "Resource ID": "ID",
        "Category/Subject": "subject",
        "Tags/Keywords for the course": "tags",
        "Title of the course": "title",
        "Difficulty Level": "difficulty_level",
        "URL/Link to the course": "link"
    }, inplace=True)


   #convert text to lowercase and uppercase to ensure consistency

    resources_data['subject'] = resources_data['subject'].str.lower()
    resources_data['difficulty_level'] = resources_data['difficulty_level'].str.capitalize()
    resources_data['tags'] = resources_data['tags'].str.lower()


    # Remove duplicate rows based on 'Title' and 'Category' columns. We have 300 rows but only 14 unique values hence we need to drop unique rows
    resources_data.drop_duplicates(subset=['title'], inplace=True)






    # Changing the ID datatype from object to int and then dropping it
    #resources_data["ID"] = resources_data["ID"].apply(lambda x: int(x.replace('R', '')))

    # Drop the ID column
    resources_data.drop(columns=["ID"], inplace=True)\


    # Display dataset information after changes
    print("After changes:")
    resources_data.info()


    return resources_data


file_path = "ML_Learning_Dataset.csv"
preprocessed_resources = preprocess_learning_resources(file_path)


Before changes:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Data columns (total 7 columns):
 #   Column                        Non-Null Count  Dtype 
---  ------                        --------------  ----- 
 0   Resource ID                   300 non-null    object
 1   Title of the course           300 non-null    object
 2   Description about the course  300 non-null    object
 3   Category/Subject              300 non-null    object
 4   Difficulty Level              300 non-null    object
 5   Tags/Keywords for the course  300 non-null    object
 6   URL/Link to the course        300 non-null    object
dtypes: object(7)
memory usage: 16.5+ KB
After changes:
<class 'pandas.core.frame.DataFrame'>
Index: 15 entries, 0 to 14
Data columns (total 6 columns):
 #   Column                        Non-Null Count  Dtype 
---  ------                        --------------  ----- 
 0   title                         15 non-null     object
 1   Description about the cour

In [5]:
#Print the 5 first rows
preprocessed_resources.head()

Unnamed: 0,title,Description about the course,subject,difficulty_level,tags,link
0,Introduction to Linear Algebra,"Learn the fundamentals of linear algebra, incl...",maths,Beginner,"linear algebra, vectors, matrices",https://example.com/intro-to-linear-algebra
1,Machine Learning Basics,An introductory course on machine learning cov...,machine learning,Beginner,"machine learning, ai, algorithms",https://example.com/ml-basics
2,Statistics for Data Science,Comprehensive course on statistics tailored fo...,maths,Medium,"statistics, data science, analysis",https://example.com/stats-for-data-science
3,Advanced Data Visualization,Explore advanced techniques and tools for effe...,data visualization,Medium,"data visualization, charts, graphs",https://example.com/advanced-data-viz
4,Python Programming Fundamentals,Start your programming journey with Python and...,programming,Beginner,"python, programming, coding",https://example.com/python-fundamentals


In [6]:

#Let's create a class for the dataset
class ResourcesDataset:
  def __init__(self,resources):
    self.resources = resources

    #Encode the categorical features
    self.category_encoders = {}
    categorical_cols = ['subject', 'difficulty_level','link']

    for col in categorical_cols:
      le = LabelEncoder()
      self.resources[f'{col}_encoded'] = le.fit_transform(self.resources[col])
      self.category_encoders[col] = le

    #prepare feature matrix
    self.feature_columns = [
          'subject_encoded',
          'difficulty_level_encoded',
          'link_encoded'
      ]



      #Add one-hot encoded tags
    self.resources['tags_list'] = self.resources['tags'].str.split(',')

      # One-hot encoding for tags
    unique_tags = set(tag for tags in self.resources['tags_list'] for tag in tags)
    for tag in unique_tags:
        self.resources[f'tag_{tag}'] = self.resources['tags_list'].apply(lambda x: 1 if tag in x else 0)
        self.feature_columns.append(f'tag_{tag}')

    # Prepare feature tensor
    self.features = torch.tensor(
      self.resources[self.feature_columns].values,
        dtype=torch.float32
        )

      # Normalize features
    self.scaler = StandardScaler()
    self.features = torch.tensor(
        self.scaler.fit_transform(self.features),
        dtype=torch.float32
        )






In [7]:
class ResourceRecommenderModel(nn.Module):
    def __init__(self, input_dim, embedding_dim=64):
        """
        Neural network for resource recommendation

        Args:
            input_dim (int): Number of input features
            embedding_dim (int): Dimension of embedding layer
        """
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, embedding_dim),
            nn.ReLU()
        )

    def forward(self, x):
        """Forward pass of the model"""
        return self.network(x)

    def recommend(self, input_resource, dataset, top_k=3):
        """
        Generate recommendations based on input resource

        Args:
            input_resource (str): Title of input resource
            dataset (LearningResourceDataset): Dataset object
            top_k (int): Number of recommendations to generate

        Returns:
            DataFrame with top recommendations
        """
        # Find input resource index
        resource_index = dataset.resources[
            dataset.resources['title'] == input_resource
        ].index[0]

        resource_index = int(resource_index)



        # Get embeddings
        with torch.no_grad():
            input_embedding = self.forward(
                dataset.features[resource_index].unsqueeze(0)
            )
            all_embeddings = self.forward(dataset.features)

            # Calculate cosine similarity
            similarities = nn.functional.cosine_similarity(
                input_embedding,
                all_embeddings
            ).squeeze()

        # Sort and get top recommendations
        top_indices = similarities.argsort(descending=True)[1:top_k+1]

        return dataset.resources.iloc[top_indices]


In [8]:
#Import tabulate to visualize result
from tabulate import tabulate

def main():
    # define dataset
    resources = preprocessed_resources
    dataset = ResourcesDataset(resources)

    # Initialize model
    model = ResourceRecommenderModel(input_dim=len(dataset.feature_columns))

    # Training (simplified for demonstration)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    # Simple training loop
    for epoch in range(100):
        embeddings = model(dataset.features)
        loss = criterion(embeddings, embeddings)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Generate recommendations
    input_resource = 'Introduction to Linear Algebra'
    recommendations = model.recommend(input_resource, dataset)

    # Display recommendations in a tabulated format
    print(f"\nRecommendations for '{input_resource}':")
    print(tabulate(
        recommendations[['title', 'subject', 'difficulty_level','link']],
        headers=['Title', 'Subject Category', 'Difficulty Level','Link'],
        tablefmt='fancy_grid'
    ))


if __name__ == '__main__':
    main()



Recommendations for 'Introduction to Linear Algebra':
╒════╤═══════════════════════════════════╤════════════════════╤════════════════════╤═══════════════════════════════════════════════════╕
│    │ Title                             │ Subject Category   │ Difficulty Level   │ Link                                              │
╞════╪═══════════════════════════════════╪════════════════════╪════════════════════╪═══════════════════════════════════════════════════╡
│  1 │ Machine Learning Basics           │ machine learning   │ Beginner           │ https://example.com/ml-basics                     │
├────┼───────────────────────────────────┼────────────────────┼────────────────────┼───────────────────────────────────────────────────┤
│  6 │ Neural Networks and Deep Learning │ machine learning   │ Medium             │ https://example.com/deep-learning-with-tensorflow │
├────┼───────────────────────────────────┼────────────────────┼────────────────────┼───────────────────────────────────────