1. Data Preprocessing

In [1]:
import pandas as pd

# Load the data frame
data = pd.read_excel(r"C:\Users\Alee Abbas\Downloads\Newfile.xlsx")
print(data)

      show_id   type                                           title  \
0      498716  Movie                        Ferris Bueller's Day Off   
1    60000861  Movie                                 American Psycho   
2    60010488  Movie            Indiana Jones and the Temple of Doom   
3      516154  Movie                           For the Love of Benji   
4    20764666  Movie                           English Babu Desi Mem   
..        ...    ...                                             ...   
248  17687959  Movie                                    The Waterboy   
249  60001807  Movie  The Naked Gun: From the Files of Police Squad!   
250   1181550  Movie                                        Scream 2   
251  17457962  Movie                                 Raja Hindustani   
252  20712279  Movie                                          Kaalia   

             director                                               cast  \
0         John Hughes  Matthew Broderick, Alan Ruck, Mia Sa

2. Identying items and Users in Pandas

In [2]:
import pandas as pd
data = pd.read_excel(r"C:\Users\Alee Abbas\Downloads\Newfile.xlsx")
# Identify unique users based on combinations of attributes
user_columns = ['director', 'cast', 'country', 'age', 'gender'] 
users = data[user_columns].drop_duplicates()
# Identify unique items (shows) using the 'show_id' column
items = data['show_id'].unique()

print("Unique Users:", len(users))
print("Unique Items:", len(items))

Unique Users: 253
Unique Items: 250


3. User- Item interaction Matrix

In [3]:
import pandas as pd
import numpy as np

# Load your dataset into a Pandas DataFrame
data = pd.read_excel(r"C:\Users\Alee Abbas\Downloads\Newfile.xlsx")

# Identify the user and item columns
user_col = 'user_id'
item_col = 'show_id'

# Create a list of unique users and items
unique_users = data[user_col].unique()
unique_items = data[item_col].unique()

# Create an empty user-item interaction matrix using NumPy
interaction_matrix = np.zeros((len(unique_users), len(unique_items)))

# Loop through the data to populate the interaction matrix with binary indicators
for index, row in data.iterrows():
    user_index = np.where(unique_users == row[user_col])[0][0]
    item_index = np.where(unique_items == row[item_col])[0][0]
    interaction_matrix[user_index, item_index] = 1  # Fill with binary indicator (1 for interaction, 0 for no interaction)

# Now you have the user-item interaction matrix
print(interaction_matrix)

[[1. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 0. 1.]]


4. Implement user-based recommendations using cosine similarity

In [15]:
#print(data.columns)
#data.columns = data.columns.str.strip()
#item_details = data[data['show_id'] == item_id]
#print(item_details)


import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

user_similarity = cosine_similarity(interaction_matrix)

def get_recommendations_with_details(user_index, num_recommendations=5):
    similar_users = user_similarity[user_index]
    user_interactions = interaction_matrix[user_index]
    
    # Find items the user has not interacted with
    items_not_interacted = np.where(user_interactions == 0)[0]
    
    # Calculate a recommendation score based on similar users' interactions
    recommendation_scores = np.dot(similar_users, interaction_matrix)
    
    # Sort items by recommendation score
    recommended_item_indices = np.argsort(recommendation_scores)[::-1]
    
    # Filter recommended items that the user has not interacted with
    recommended_items = [unique_items[item_idx] for item_idx in recommended_item_indices if item_idx in items_not_interacted]
    
    # Retrieve detailed information for recommended items
    recommended_items_details = []
    for item_id in recommended_items[:num_recommendations]:
        item_details = data[data['show_id'] == item_id].iloc[0]
        recommended_items_details.append({
            'show_id': item_id,
            'title': item_details['title'],
            'release_year': item_details['release_year'],
            'cast': item_details['cast'],
            'director': item_details['director'],
            'genre': item_details['genre']  # Replace 'Genre' with the actual column name for genres
        })
    
    return recommended_items_details

# Example usage
target_user_index = 7  # Replace with the index of your target user
recommended_items_with_details = get_recommendations_with_details(target_user_index)
for item in recommended_items_with_details:
    print("Title:", item['title'])
    print("Release Year:", item['release_year'])
    print("Cast:", item['cast'])
    print("Director:", item['director'])
    print("Genre:", item['genre'])
    print()

Title: Kaalia
Release Year: 1981
Cast: Amitabh Bachchan, Asha Parekh, Parveen Babi, Amjad Khan, Pran, K.N. Singh, Sajjan, Murad, Kader Khan, Jagdeep
Director: Tinnu Anand
Genre: Action & Adventure, Dramas, International Movies

Title: Sling Blade
Release Year: 1996
Cast: Billy Bob Thornton, Dwight Yoakam, J.T. Walsh, John Ritter, Lucas Black, Natalie Canerday, James Hampton, Robert Duvall, Rick Dial, Brent Briscoe
Director: Billy Bob Thornton
Genre: Dramas, Independent Movies

Title: Magnolia
Release Year: 1999
Cast: Jeremy Blackman, Tom Cruise, Melinda Dillon, April Grace, Luis Guzmán, Philip Baker Hall, Philip Seymour Hoffman, Ricky Jay, William H. Macy, Alfred Molina, Julianne Moore, Michael Murphy, John C. Reilly, Jason Robards, Melora Walters
Director: Paul Thomas Anderson
Genre: Dramas, Independent Movies

Title: Mansfield Park
Release Year: 1999
Cast: Frances O'Connor, Embeth Davidtz, Jonny Lee Miller, Alessandro Nivola, Harold Pinter, Lindsay Duncan, Sheila Gish, James Purefoy,