In [36]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from surprise.model_selection.split import train_test_split
from surprise import Dataset, Reader , KNNBasic
themes= pd.read_csv("themes.csv")
users = pd.read_csv("users.csv")
themes.head()

Unnamed: 0,themesid,titel,keywords
0,1,CONCEPTION D’UN SYSTÈME D’INFORMATION \nET DÉ...,IL | Architecture de l'information | Conceptio...
1,2,TRAITEMENT DE L'INFORMATION \nGÉOGRAPHIQUE DAN...,IL | Collecte de données | Traitement de donné...
2,3,TECHNIQUES DE GÉNÉRATION DES SCRIPTS DE \nRECO...,IL | Analyse de code source | Interprétation s...
3,4,RÉALISATION D'UNE APPLICATION MOBILE POUR LA\n...,IL | Développement d'applications mobiles | Sé...
4,5,CONCEPTION ET DÉVELOPPEMENT DU MODULE RECRUTEM...,IL | Collecte des besoins | Développement web ...


In [37]:
df = themes.merge(users , how ="left" , on ="themesid")
columns_titles = ["themesid","userid","titel","keywords","liked"]
df=df.reindex(columns=columns_titles)
df.dropna()

Unnamed: 0,themesid,userid,titel,keywords,liked
0,1,1.0,CONCEPTION D’UN SYSTÈME D’INFORMATION \nET DÉ...,IL | Architecture de l'information | Conceptio...,1.0
1,1,3.0,CONCEPTION D’UN SYSTÈME D’INFORMATION \nET DÉ...,IL | Architecture de l'information | Conceptio...,0.0
4,4,2.0,RÉALISATION D'UNE APPLICATION MOBILE POUR LA\n...,IL | Développement d'applications mobiles | Sé...,1.0
11,11,3.0,CONCEPTION D'UN DATAWAREHOUSE ET MISE EN PLACE...,BIG DATA | Datawarehouse | Modélisation de don...,1.0
12,11,1.0,CONCEPTION D'UN DATAWAREHOUSE ET MISE EN PLACE...,BIG DATA | Datawarehouse | Modélisation de don...,1.0
13,12,1.0,TECHNIQUES DE MACHINE LEARNING POUR LA \nDÉTEC...,BIG DATA | Machine Learning | Réseaux de neuro...,1.0
14,12,3.0,TECHNIQUES DE MACHINE LEARNING POUR LA \nDÉTEC...,BIG DATA | Machine Learning | Réseaux de neuro...,0.0
15,13,1.0,PROPOSITION ET CONCEPTION D'UNE SOLUTION \nDMV...,GTR | Réseau | Sécurité de l'information | Int...,0.0
16,13,2.0,PROPOSITION ET CONCEPTION D'UNE SOLUTION \nDMV...,GTR | Réseau | Sécurité de l'information | Int...,1.0
17,14,1.0,CONCEPTION ET RÉALISATION D'UNE PLATE-FORME DE...,GTR | Robotique | Technologie de suivi solair...,1.0


In [38]:
reader = Reader(rating_scale=(1, 5))
dataset = Dataset.load_from_df(df[['userid', 'themesid','liked']], reader)
trainset, testset = train_test_split(dataset, test_size=0.20)
sim_options = {'name': 'cosine', 'user_based': True}
model = KNNBasic(sim_options=sim_options)
model.fit(trainset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x7f56d804b6a0>

In [42]:
# Make hybrid recommendations

tfidf = TfidfVectorizer(stop_words='english')
post_features = tfidf.fit_transform(df['titel'])
# Train a content-based filtering model
post_similarity = cosine_similarity(post_features) 

user_id = 1
n_recommendations = 10
liked_posts = df[df['userid'] == user_id]['themesid'].tolist() #post li darlhom like deja 
liked_posts_sim = post_similarity[liked_posts, :] #hna ydir similarity matrix bin li darlhom like w lokhrin 
cf_predictions = model.test([(user_id, themesid, 1) for themesid in liked_posts])
cf_predictions = {pred.iid: pred.est for pred in cf_predictions} #ydirlk collaborative filtring based user previosly ratings
cb_predictions = liked_posts_sim.sum(axis=0) 
cb_predictions = {idx: score for idx, score in enumerate(cb_predictions)} #generate a theme based collaborative filtring 
hybrid_predictions = {}
for themesid in cb_predictions: #and we merge theme here 
    if themesid not in cf_predictions:
        hybrid_predictions[themesid] = cb_predictions[themesid]
    else:
        hybrid_predictions[themesid] = 0.5 * cf_predictions[themesid] + 0.5 * cb_predictions[themesid]

In [43]:
print(hybrid_predictions)

{0: 1.5349144154627772, 1: 1.2674572077313886, 2: 0.24400400354995883, 3: 0.37564135521104586, 4: 0.8781762848600668, 5: 0.7797315558092672, 6: 0.2575339437080197, 7: 0.3256416286575408, 8: 0.7593298547758845, 9: 0.5411056587764804, 10: 0.1605788814309074, 11: 1.7898351752853006, 12: 1.7898351752853006, 13: 3.6843390113997962, 14: 1.6843390113997965, 15: 1.7638017424646246, 16: 1.7638017424646246, 17: 0.6326018720198046, 18: 0.6326018720198046, 19: 1.8197147290446831, 20: 1.4098573645223416, 21: 0.18472008659200997, 22: 0.8915488042096966, 23: 0.8915488042096966, 24: 0.2894385630486889, 25: 0.6342936252198229, 26: 0.9079938749479125, 27: 1.3106018244791982, 28: 1.5349144154627772, 29: 0.9907374698207809, 30: 0.23182840321809275, 31: 0.4845407105053745, 32: 0.476716706417448, 33: 0.8766464577324009, 34: 0.1120947425712181, 35: 0.305850370857914, 36: 0.4439448075264291, 37: 0.9363999314464393, 38: 1.0132261505901485, 39: 0.4329892785854838, 40: 1.1490343701152173, 41: 0.5031110225841361,