In [None]:
import pandas as pd

# Load data from CSV file into a DataFrame, considering only the necessary columns
ratings_df = pd.read_csv("/content/ratings.csv", usecols=["userId", "movieId", "rating", "genres", "title"])

# Get unique user and movie IDs
user_ids = ratings_df['userId'].unique()
movie_ids = ratings_df['movieId'].unique()

# Create a matrix filled with NaN values
matrix = pd.DataFrame(index=user_ids, columns=movie_ids)

# Create a matrix for movie vs genres
genres = ["Action", "Adventure", "Animation", "Children's", "Comedy", "Crime", "Documentary",
          "Drama", "Fantasy", "Film-Noir", "Horror", "Musical", "Mystery", "Romance",
          "Sci-Fi", "Thriller", "War", "Western"]

movie_genre_matrix = pd.DataFrame(index=movie_ids, columns=genres)

# Fill the matrix with ratings and movie vs genres
for index, row in ratings_df.iterrows():
    user_id = row['userId']
    movie_id = row['movieId']
    rating = row['rating']
    genres_str = row['genres']

    matrix.at[user_id, movie_id] = rating

    movie_genres = genres_str.split('|')
    for genre in genres:
        if genre in movie_genres:
            movie_genre_matrix.at[movie_id, genre] = 1
        else:
            movie_genre_matrix.at[movie_id, genre] = 0

# Convert the matrix to numeric type (downcast)
matrix = matrix.astype(float)

# Fill NaN values with 0
matrix = matrix.fillna(0)

# Display the resulting matrix
print(matrix)

# Create the transpose of the matrix
transpose_matrix = matrix.transpose()

# Display the transpose matrix
print(transpose_matrix)

# Display the movie vs genres matrix
print(movie_genre_matrix)


     1       3       6       47      50      70      101     110     151     \
1       4.0     4.0     4.0     5.0     5.0     3.0     5.0     4.0     5.0   
2       0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
3       0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
4       0.0     0.0     0.0     2.0     0.0     0.0     0.0     0.0     0.0   
5       4.0     0.0     0.0     0.0     4.0     0.0     0.0     4.0     0.0   
..      ...     ...     ...     ...     ...     ...     ...     ...     ...   
606     2.5     0.0     0.0     3.0     4.5     4.0     0.0     3.5     0.0   
607     4.0     0.0     0.0     0.0     0.0     0.0     0.0     5.0     0.0   
608     2.5     2.0     0.0     4.5     4.5     3.0     0.0     4.0     0.0   
609     3.0     0.0     0.0     0.0     0.0     0.0     0.0     3.0     0.0   
610     5.0     0.0     5.0     5.0     4.0     4.0     0.0     4.5     0.0   

     157     ...  147662  148166  149011  152372  1

In [None]:
# Initialize a dictionary to store user preferences
user_preferences = {}

# Iterate through each user
for user_id in user_ids:
    # Initialize a dictionary to store genre preferences for the current user
    user_genre_preferences = {genre: 0 for genre in genres}

    # Get movies rated by the current user
    rated_movies = ratings_df[ratings_df['userId'] == user_id]

    # Check if the user has rated any movies
    if not rated_movies.empty:
        # Iterate through movies rated by the user
        for index, row in rated_movies.iterrows():
            movie_id = row['movieId']
            rating = row['rating']

            # Look up movie genres in the movie-genres matrix
            movie_genres = movie_genre_matrix.loc[movie_id]

            # Update genre preferences for the current user based on movie genres and rating
            for genre, value in movie_genres.items():
                user_genre_preferences[genre] += value * rating

        # Normalize preferences using Min-Max normalization
        max_value = max(user_genre_preferences.values())
        min_value = min(user_genre_preferences.values())
        if max_value != min_value:
            for genre in user_genre_preferences:
                user_genre_preferences[genre] = (user_genre_preferences[genre] - min_value) / (max_value - min_value)

        # Store user preferences in the dictionary
        user_preferences[user_id] = user_genre_preferences

# Convert user preferences dictionary to DataFrame for better organization
user_preferences_df = pd.DataFrame(user_preferences).T

# Display user preferences
print(user_preferences_df)


       Action  Adventure  Animation  Children's    Comedy     Crime  \
1    1.000000   0.958869   0.349614         0.0  0.912596  0.503856   
2    0.659091   0.189394   0.000000         0.0  0.424242  0.575758   
3    0.793651   0.476190   0.031746         0.0  0.142857  0.015873   
4    0.198565   0.253589   0.057416         0.0  0.873206  0.246411   
5    0.294737   0.273684   0.273684         0.0  0.547368  0.484211   
..        ...        ...        ...         ...       ...       ...   
606  0.181543   0.194781   0.059002         0.0  0.567700  0.183812   
607  0.814590   0.474164   0.060790         0.0  0.556231  0.313070   
608  0.949563   0.600103   0.176531         0.0  1.000000  0.542975   
609  0.531250   0.500000   0.046875         0.0  0.359375  0.328125   
610  1.000000   0.531561   0.138329         0.0  0.823798  0.557346   

     Documentary     Drama   Fantasy  Film-Noir    Horror   Musical   Mystery  \
1       0.000000  0.791774  0.519280   0.012853  0.151671  0.26478

In [None]:
# Initialize a dictionary to store lists of users for each genre
genre_liking_users = {genre: [] for genre in genres}

# Iterate through each genre
for genre in genres:
    # Filter users who have a preference greater than 0.65 for the current genre
    liking_users = user_preferences_df[user_preferences_df[genre] > 0.65]

    # Sort users in decreasing order of their likeness for the current genre
    sorted_users = liking_users.sort_values(by=genre, ascending=False).index.tolist()

    # Add sorted list of users to the dictionary
    genre_liking_users[genre] = sorted_users

# Display lists of users for each genre
for genre, users in genre_liking_users.items():
    print(f"Users who like {genre}: {users}")

Users who like Action: [1, 320, 272, 277, 279, 292, 299, 313, 340, 256, 344, 354, 361, 363, 366, 368, 267, 249, 380, 212, 180, 183, 184, 203, 208, 211, 213, 244, 219, 220, 228, 230, 231, 243, 376, 382, 164, 551, 515, 524, 532, 534, 540, 549, 559, 493, 561, 569, 573, 582, 592, 596, 494, 486, 393, 439, 394, 395, 407, 408, 428, 438, 441, 482, 452, 457, 463, 464, 466, 478, 176, 610, 56, 30, 7, 86, 11, 129, 91, 34, 25, 79, 96, 97, 24, 21, 112, 116, 82, 134, 139, 71, 62, 52, 48, 67, 78, 138, 119, 75, 77, 239, 370, 172, 46, 447, 69, 44, 81, 386, 304, 165, 550, 586, 166, 154, 248, 197, 608, 126, 594, 425, 63, 102, 475, 223, 402, 584, 314, 544, 196, 434, 136, 335, 522, 257, 507, 99, 580, 529, 128, 350, 15, 389, 174, 305, 576, 192, 374, 18, 399, 364, 115, 205, 94, 186, 570, 548, 353, 557, 179, 485, 274, 604, 217, 347, 72, 459, 93, 122, 135, 266, 95, 352, 400, 423, 39, 545, 234, 57, 247, 270, 160, 155, 607, 546, 471, 577, 68, 189, 3, 541, 232, 332, 120, 328, 17, 240, 492, 379, 371, 238, 528, 31, 

In [37]:
def recommend_movie(movie_name, movies_df, movie_genre_matrix, user_preferences_df):
    # Find the movie ID based on the movie name
    movie_id = movies_df[movies_df['title'] == movie_name]['movieId'].values[0]

    # Find the genres of the specified movie
    movie_genres = movie_genre_matrix.loc[movie_id]

    # Initialize a dictionary to store recommendation scores for each user
    recommendation_scores = {}

    # Iterate through each user
    for user_id, user_preferences in user_preferences_df.iterrows():
        # Initialize recommendation score for the current user
        recommendation_score = 0

        # Calculate recommendation score based on user preferences and movie genres
        for genre, value in movie_genres.items():
            if value == 1:
                recommendation_score += user_preferences[genre]

        # Store recommendation score for the current user
        recommendation_scores[user_id] = recommendation_score

    # Sort users based on recommendation score in descending order
    sorted_users = sorted(recommendation_scores.items(), key=lambda x: x[1], reverse=True)

    # Display recommended users
    recommended_users = [user_id for user_id, _ in sorted_users]
    return recommended_users

In [38]:
# Call the recommend_movie function with the necessary dataframes
recommended_users = recommend_movie("Canadian Bacon (1995)", ratings_df, movie_genre_matrix, user_preferences_df)

# Print the recommended users
print("Recommended users for 'Canadian Bacon (1995)':", recommended_users)

Recommended users for 'Canadian Bacon (1995)': [289, 471, 194, 218, 173, 516, 310, 172, 335, 278, 431, 116, 57, 38, 565, 188, 420, 92, 1, 552, 115, 136, 347, 8, 266, 63, 314, 281, 498, 328, 283, 566, 361, 333, 288, 508, 157, 147, 597, 463, 42, 384, 232, 377, 353, 453, 541, 98, 224, 68, 217, 122, 240, 488, 477, 174, 276, 369, 436, 155, 519, 483, 559, 321, 99, 409, 356, 45, 144, 51, 102, 514, 100, 274, 94, 555, 169, 608, 135, 334, 449, 287, 222, 520, 365, 128, 200, 201, 517, 179, 43, 448, 355, 31, 587, 280, 141, 381, 341, 525, 73, 574, 284, 35, 579, 255, 10, 167, 600, 479, 298, 143, 216, 327, 27, 307, 256, 293, 294, 104, 226, 181, 575, 229, 20, 543, 394, 563, 426, 509, 539, 367, 273, 594, 177, 82, 282, 89, 19, 542, 484, 538, 583, 370, 492, 159, 491, 66, 54, 412, 500, 263, 564, 138, 111, 489, 395, 304, 24, 156, 225, 58, 37, 44, 67, 120, 151, 161, 163, 236, 252, 257, 259, 270, 306, 316, 342, 350, 388, 396, 406, 456, 470, 513, 535, 545, 576, 584, 330, 36, 363, 127, 438, 397, 592, 146, 487, 

In [51]:
def user_based_recommendation(user_id, user_preferences_df, ratings_df, genre_liking_users, movies_df, threshold=3, top_n=20):
    # Initialize a set to store recommended movie IDs
    recommended_movies = set()

    # Get similar users based on genre preferences
    similar_users = set()
    for genre, users in genre_liking_users.items():
        if user_id in users:
            similar_users.update(users)

    # Remove the specified user from the set of similar users
    similar_users.remove(user_id)

    # Check ratings of similar users for highly rated movies
    for user in similar_users:
        user_ratings = ratings_df[(ratings_df['userId'] == user) & (ratings_df['rating'] > threshold)]
        recommended_movies.update(user_ratings['movieId'].tolist())

    # Remove movies already rated by the specified user
    user_ratings = ratings_df[(ratings_df['userId'] == user_id) & (ratings_df['rating'] > threshold)]
    recommended_movies -= set(user_ratings['movieId'].tolist())

    # Limit the number of recommended movies to top_n
    recommended_movies = list(recommended_movies)[:top_n]

    # Print title and ID of each recommended movie
    for movie_id in recommended_movies:
        title = movies_df[movies_df['movieId'] == movie_id]['title'].values[0]
        print("Movie:", title)
        print("ID:", movie_id)
        print()

In [55]:
user_id = 1  # Specify the user ID for whom recommendations are to be generated
user_based_recommendation(user_id, user_preferences_df, ratings_df, genre_liking_users, ratings_df, top_n=20)

Movie: Jumanji (1995)
ID: 2

Movie: Brothers (Brødre) (2004)
ID: 32770

Movie: Father of the Bride Part II (1995)
ID: 5

Movie: Sabrina (1995)
ID: 7

Movie: Tom and Huck (1995)
ID: 8

Movie: Sudden Death (1995)
ID: 9

Movie: GoldenEye (1995)
ID: 10

Movie: American President, The (1995)
ID: 11

Movie: Dracula: Dead and Loving It (1995)
ID: 12

Movie: Balto (1995)
ID: 13

Movie: Nixon (1995)
ID: 14

Movie: Cutthroat Island (1995)
ID: 15

Movie: Casino (1995)
ID: 16

Movie: Sense and Sensibility (1995)
ID: 17

Movie: Four Rooms (1995)
ID: 18

Movie: Ace Ventura: When Nature Calls (1995)
ID: 19

Movie: Money Train (1995)
ID: 20

Movie: Get Shorty (1995)
ID: 21

Movie: Copycat (1995)
ID: 22

Movie: Assassins (1995)
ID: 23

