In [2]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules, fpgrowth

In [3]:
ratings = pd.read_csv('MovieLens20MDataset/rating.csv')
movies = pd.read_csv('MovieLens20MDataset/movie.csv')

movies = movies[movies['genres'] != "(no genres listed)"]
bitmap_df = ratings[['userId', 'movieId', 'rating']].copy()
bitmap_df = bitmap_df[bitmap_df['rating'] > 3]
bitmap_df['rating'] = 1

user_movie_counts = bitmap_df['userId'].value_counts()
active_users = user_movie_counts[user_movie_counts >= 100].index
bitmap_df = bitmap_df[bitmap_df['userId'].isin(active_users)]
bitmap_df = bitmap_df.drop_duplicates(subset=['userId', 'movieId'])

user_movie_matrix = bitmap_df.pivot(index='userId', columns='movieId', values='rating').fillna(0).astype(bool)

movies['genres'] = movies['genres'].apply(lambda x: x.split('|'))
movie_genres = movies.explode('genres')[['movieId', 'genres']]

In [38]:
user_movie_matrix

movieId,1,2,3,4,5,6,7,8,9,10,...,131231,131239,131241,131243,131248,131250,131252,131254,131256,131262
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,False,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
11,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
14,True,False,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
138474,True,True,False,False,True,True,False,False,True,True,...,False,False,False,False,False,False,False,False,False,False
138475,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
138477,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
138486,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [39]:
movie_genres

Unnamed: 0,movieId,genres
0,1,Adventure
0,1,Animation
0,1,Children
0,1,Comedy
0,1,Fantasy
...,...,...
27274,131256,Comedy
27275,131258,Adventure
27277,131262,Adventure
27277,131262,Fantasy


In [40]:
frequent_itemsets = fpgrowth(user_movie_matrix, min_support=0.2, use_colnames=True)

In [46]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.4)

In [47]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,"(296, 260, 318)","(593, 2571)",0.353943,0.473415,0.239753,0.677377,1.430831,0.072191,1.632200,0.466066
1,"(593, 2571, 318)","(296, 260)",0.370742,0.458867,0.239753,0.646683,1.409303,0.069631,1.531580,0.461543
2,"(296, 260)","(593, 2571, 318)",0.458867,0.370742,0.239753,0.522489,1.409303,0.069631,1.317785,0.536707
3,"(593, 2571)","(296, 260, 318)",0.473415,0.353943,0.239753,0.506433,1.430831,0.072191,1.308954,0.571808
4,"(296, 1198)","(260, 318)",0.457857,0.447523,0.291825,0.637372,1.424221,0.086924,1.523537,0.549416
...,...,...,...,...,...,...,...,...,...,...
216853,(5989),"(2571, 356)",0.323808,0.462302,0.212851,0.657336,1.421876,0.063154,1.569171,0.438786
216854,(1968),(2918),0.321787,0.386647,0.215766,0.670524,1.734201,0.091348,1.861601,0.624237
216855,(2918),(1968),0.386647,0.321787,0.215766,0.558044,1.734201,0.091348,1.534571,0.690248
216856,"(1968, 260)",(1196),0.222838,0.629950,0.202200,0.907383,1.440405,0.061823,3.995501,0.393419


In [1]:
user_movies = [1]
selected_genre = "Crime"

def filter_rules(antecedents, consequents):
    movie_match = all(item in user_movies for item in antecedents)
    genre_match = all(
        selected_genre in movie_genres[movie_genres['movieId'] == item]['genres'].values
        for item in consequents
    )
    return movie_match and genre_match

recommended_movies = rules[rules.apply(lambda row: filter_rules(row['antecedents'], row['consequents']), axis=1)]
recommended_movies = recommended_movies.sort_values('lift', ascending=False)

recommendations = set()
for consequent in recommended_movies['consequents']:
    recommendations.update(consequent)

print("Önerilen filmler:", recommendations)

NameError: name 'rules' is not defined