In [1]:
import pandas as pd

In [2]:
import joblib

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

Getting Your Data Ready

In [4]:
data = {
    'student_id': [1, 1, 2, 2, 3, 3, 4, 4, 5, 5],
    'topic': ['Algebra', 'Calculus', 'Geometry', 'Algebra', 'Calculus', 'Geometry', 'Trigonometry', 'Algebra', 'Calculus', 'Trigonometry'],
    'difficulty': ['Easy', 'Hard', 'Medium', 'Easy', 'Hard', 'Medium', 'Medium', 'Easy', 'Hard', 'Hard'],
    'score': [90, 60, 85, 95, 50, 88, 75, 92, 55, 65],
    'time_spent_minutes': [15, 45, 20, 10, 60, 22, 30, 12, 50, 40],
    'liked_topic': [True, False, True, True, False, True, True, True, False, False],
    'recommended_next_topic': ['Geometry', 'Algebra', 'Calculus', 'Trigonometry', 'Algebra', 'Trigonometry', 'Calculus', 'Geometry', 'Algebra', 'Calculus']
}

In [5]:
df = pd.DataFrame(data)

Save created data to a CSV file

In [6]:
df.to_csv('student_learning_data.csv', index=False)
print("Successfully created and saved the dataset as 'student_learning_data.csv'")
print("\nHere's a look at our data:")
print(df.head())

Successfully created and saved the dataset as 'student_learning_data.csv'

Here's a look at our data:
   student_id     topic difficulty  score  time_spent_minutes  liked_topic  \
0           1   Algebra       Easy     90                  15         True   
1           1  Calculus       Hard     60                  45        False   
2           2  Geometry     Medium     85                  20         True   
3           2   Algebra       Easy     95                  10         True   
4           3  Calculus       Hard     50                  60        False   

  recommended_next_topic  
0               Geometry  
1                Algebra  
2               Calculus  
3           Trigonometry  
4                Algebra  


In [8]:
label_encoder = LabelEncoder()
df['difficulty_encoded'] = label_encoder.fit_transform(df['difficulty'])

In [9]:
df['topic_encoded'] = label_encoder.fit_transform(df['topic'])
df['recommended_next_topic_encoded'] = label_encoder.fit_transform(df['recommended_next_topic'])
df['liked_topic_encoded'] = label_encoder.fit_transform(df['liked_topic'])

print("\nOur data after encoding text into numbers:")
print(df.head())


Our data after encoding text into numbers:
   student_id     topic difficulty  score  time_spent_minutes  liked_topic  \
0           1   Algebra       Easy     90                  15         True   
1           1  Calculus       Hard     60                  45        False   
2           2  Geometry     Medium     85                  20         True   
3           2   Algebra       Easy     95                  10         True   
4           3  Calculus       Hard     50                  60        False   

  recommended_next_topic  difficulty_encoded  topic_encoded  \
0               Geometry                   0              0   
1                Algebra                   1              1   
2               Calculus                   2              2   
3           Trigonometry                   0              0   
4                Algebra                   1              1   

   recommended_next_topic_encoded  liked_topic_encoded  
0                               2                  

Building Smart Recommendation Engine

In [10]:
features = ['topic_encoded', 'difficulty_encoded', 'score', 'time_spent_minutes', 'liked_topic_encoded']
label = 'recommended_next_topic_encoded'

X = df[features]
y = df[label]

Training dataset

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Use the K-Nearest Neighbors (K-NN) algorithm

In [12]:
knn = KNeighborsClassifier(n_neighbors=3)

In [13]:
knn.fit(X_train, y_train)
print("\nModel training is complete!")


Model training is complete!


Testing Dataset

In [14]:
y_pred = knn.predict(X_test)

In [15]:
accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy: {accuracy * 100:.2f}%")


Model Accuracy: 0.00%


Confusion Matrix

In [16]:
conf_matrix = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(conf_matrix)


Confusion Matrix:
[[0 2]
 [0 0]]


Making It Work & Better

In [17]:
joblib.dump(knn, 'recommendation_model.joblib')
print("\nModel saved as 'recommendation_model.joblib'")


Model saved as 'recommendation_model.joblib'


Loading our trained model

In [18]:
loaded_model = joblib.load('recommendation_model.joblib')

Enter New student's data

In [20]:
new_student_data = pd.DataFrame({
    'topic_encoded': [0],          # Corresponds to 'Algebra'
    'difficulty_encoded': [0],   # Corresponds to 'Easy'
    'score': [95],
    'time_spent_minutes': [13],
    'liked_topic_encoded': [1]   # Corresponds to 'True'
})

In [21]:
prediction_encoded = loaded_model.predict(new_student_data)

Mapping of original data

In [23]:
topic_mapping = dict(zip(df['recommended_next_topic_encoded'], df['recommended_next_topic']))
predicted_topic = topic_mapping[prediction_encoded[0]]

In [24]:
print(f"\nNew recommendation for the student is: {predicted_topic}")
print("\nVoila! Your recommendation system suggested a next step based on the student's data! ")


New recommendation for the student is: Geometry

Voila! Your recommendation system suggested a next step based on the student's data! 
