In [2]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules, fpgrowth
import time

In [5]:
ratings = pd.read_csv('MovieLens20MDataset/rating.csv')
movies = pd.read_csv('MovieLens20MDataset/movie.csv')

In [None]:
movies = movies[movies['genres'] != "(no genres listed)"]
bitmap_df = ratings[['userId', 'movieId', 'rating']].copy()
bitmap_df = bitmap_df[bitmap_df['rating'] > 3]
bitmap_df['rating'] = 1

user_movie_counts = bitmap_df['userId'].value_counts()
active_users = user_movie_counts[user_movie_counts >= 100].index
bitmap_df = bitmap_df[bitmap_df['userId'].isin(active_users)]
bitmap_df = bitmap_df.drop_duplicates(subset=['userId', 'movieId'])

user_movie_matrix = bitmap_df.pivot(index='userId', columns='movieId', values='rating').fillna(0).astype(bool)

movies['genres'] = movies['genres'].apply(lambda x: x.split('|'))
movie_genres = movies.explode('genres')[['movieId', 'genres']]

In [3]:
user_movie_matrix

movieId,1,2,3,4,5,6,7,8,9,10,...,131231,131239,131241,131243,131248,131250,131252,131254,131256,131262
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,False,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
11,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
14,True,False,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
138474,True,True,False,False,True,True,False,False,True,True,...,False,False,False,False,False,False,False,False,False,False
138475,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
138477,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
138486,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [4]:
movie_genres

Unnamed: 0,movieId,genres
0,1,Adventure
0,1,Animation
0,1,Children
0,1,Comedy
0,1,Fantasy
...,...,...
27274,131256,Comedy
27275,131258,Adventure
27277,131262,Adventure
27277,131262,Fantasy


In [17]:
movie_genres.to_csv("derivatives/movie_genres.csv")

In [5]:
frequent_itemsets = fpgrowth(user_movie_matrix, min_support=0.2, use_colnames=True)

In [6]:
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.679252,(296)
1,0.664935,(318)
2,0.656564,(593)
3,0.637369,(260)
4,0.635002,(1198)
...,...,...
31095,0.204653,"(1968, 50)"
31096,0.202604,"(1968, 1210)"
31097,0.201362,"(1968, 1)"
31098,0.230458,"(1968, 593)"


In [7]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.2)

In [8]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,"(296, 318)",(593),0.506091,0.656564,0.408123,0.806422,1.228246,0.075842,1.774150,0.376245
1,"(593, 318)",(296),0.497546,0.679252,0.408123,0.820270,1.207609,0.070163,1.784615,0.342155
2,(296),"(593, 318)",0.679252,0.497546,0.408123,0.600841,1.207609,0.070163,1.258782,0.535988
3,(593),"(296, 318)",0.656564,0.506091,0.408123,0.621604,1.228246,0.075842,1.305271,0.541093
4,"(593, 2571)",(296),0.473415,0.679252,0.389620,0.822999,1.211625,0.068052,1.812122,0.331689
...,...,...,...,...,...,...,...,...,...,...
367363,(1197),(1968),0.466978,0.321787,0.208665,0.446841,1.388623,0.058398,1.226073,0.525049
367364,"(1968, 1196)",(260),0.226648,0.637369,0.202200,0.892129,1.399707,0.057741,3.361725,0.369256
367365,"(1968, 260)",(1196),0.222838,0.629950,0.202200,0.907383,1.440405,0.061823,3.995501,0.393419
367366,(1196),"(1968, 260)",0.629950,0.222838,0.202200,0.320977,1.440405,0.061823,1.144530,0.826242


In [9]:
rules.to_csv("rules/kisisel_tur_rules.csv")

In [10]:
user_id = 1
user_movies = ratings[ratings['userId'] == user_id]['movieId'].tolist()
user_movies

[2,
 29,
 32,
 47,
 50,
 112,
 151,
 223,
 253,
 260,
 293,
 296,
 318,
 337,
 367,
 541,
 589,
 593,
 653,
 919,
 924,
 1009,
 1036,
 1079,
 1080,
 1089,
 1090,
 1097,
 1136,
 1193,
 1196,
 1198,
 1200,
 1201,
 1208,
 1214,
 1215,
 1217,
 1219,
 1222,
 1240,
 1243,
 1246,
 1249,
 1258,
 1259,
 1261,
 1262,
 1266,
 1278,
 1291,
 1304,
 1321,
 1333,
 1348,
 1350,
 1358,
 1370,
 1374,
 1387,
 1525,
 1584,
 1750,
 1848,
 1920,
 1967,
 1994,
 1997,
 2021,
 2100,
 2118,
 2138,
 2140,
 2143,
 2173,
 2174,
 2193,
 2194,
 2253,
 2288,
 2291,
 2542,
 2628,
 2644,
 2648,
 2664,
 2683,
 2692,
 2716,
 2761,
 2762,
 2804,
 2872,
 2918,
 2944,
 2947,
 2959,
 2968,
 3000,
 3030,
 3037,
 3081,
 3153,
 3265,
 3438,
 3476,
 3479,
 3489,
 3499,
 3889,
 3932,
 3996,
 3997,
 4011,
 4027,
 4105,
 4128,
 4133,
 4226,
 4306,
 4446,
 4467,
 4571,
 4720,
 4754,
 4878,
 4896,
 4911,
 4915,
 4941,
 4980,
 4993,
 5026,
 5039,
 5040,
 5146,
 5171,
 5540,
 5679,
 5797,
 5816,
 5898,
 5952,
 5999,
 6093,
 6242,
 6333

In [10]:
kisisel_tur_df = pd.read_csv("rules/kisisel_tur_rules.csv")
movie_genres = pd.read_csv("derivatives/movie_genres.csv")
# Prepare genre and user data
genre_movies_df = movie_genres.groupby('genres')['movieId'].apply(list).reset_index()
genre_list = movie_genres['genres'].unique().tolist()

In [None]:
user_id = 1
selected_genre = "Children"

user_movies = ratings[ratings['userId'] == user_id]['movieId'].tolist()

def filter_rules(antecedents, consequents):
    movie_match = all(item in user_movies for item in antecedents)
    genre_match = all(
        selected_genre in movie_genres[movie_genres['movieId'] == item]['genres'].values
        for item in consequents
    )
    return movie_match and genre_match

recommended_movies = rules[rules.apply(lambda row: filter_rules(row['antecedents'], row['consequents']), axis=1)]
recommended_movies = recommended_movies.sort_values('lift', ascending=False)

recommendations = set()
for consequent in recommended_movies['consequents']:
    if consequent in user_movies:
        pass
    recommendations.update(consequent)

print("Önerilen filmler:", recommendations)

True
True
True
True
False
True
False
False
False
True
False
True
False
True
False
True
True
True
True
False
False
True
False
True
True
False
True
True
True
True
True
True
True
True
True
True
True
True
False
False
True
False
True
True
False
False
False
True
False
True
True
True
False
False
True
False
False
True
True
False
False
True
False
False
True
False
True
False
True
True
False
True
True
False
False
True
True
False
True
True
True
True
True
True
True
True
False
False
True
True
False
False
True
True
True
True
True
True
True
True
True
True
True
True
True
True
False
False
False
False
False
True
True
True
True
True
False
False
False
True
True
False
False
True
True
True
False
False
True
False
False
False
True
True
False
False
True
False
False
True
False
True
False
True
True
False
True
True
False
False
True
True
True
False
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True

In [13]:
# User preferences
user_id = 1
selected_genre = "Children"
user_movies = ratings[ratings['userId'] == user_id]['movieId'].tolist()

# Convert antecedents and consequents columns to lists if needed
if isinstance(kisisel_tur_df['antecedents'].iloc[0], str):
    kisisel_tur_df['antecedents'] = kisisel_tur_df['antecedents'].apply(eval)
    kisisel_tur_df['consequents'] = kisisel_tur_df['consequents'].apply(eval)

# Function to filter rules based on antecedents and genre
def filter_rules(antecedents, consequents):
    movie_match = all(item in user_movies for item in antecedents)
    genre_match = all(
        selected_genre in movie_genres[movie_genres['movieId'] == item]['genres'].values
        for item in consequents
    )
    return movie_match and genre_match

# Filter and recommend movies
recommended_movies = kisisel_tur_df[kisisel_tur_df.apply(lambda row: filter_rules(row['antecedents'], row['consequents']), axis=1)]
recommended_movies = recommended_movies.sort_values('lift', ascending=False)

# Collect recommendations
recommendations = set()
for consequent in recommended_movies['consequents']:
    recommendations.update(movie for movie in consequent if movie not in user_movies)
    
recommended_movie_names = movies[movies['movieId'].isin(recommendations)]['title'].tolist()
print("Önerilen filmler:", recommended_movie_names)

Önerilen filmler: ['Toy Story (1995)', 'Lion King, The (1994)', 'Aladdin (1992)', 'Willy Wonka & the Chocolate Factory (1971)', 'Who Framed Roger Rabbit? (1988)', 'Toy Story 2 (1999)', 'Monsters, Inc. (2001)', 'Finding Nemo (2003)', 'Shrek 2 (2004)']
