<a href="https://colab.research.google.com/github/couragedike1/-Recommendation-Systems/blob/Recommendation-Systems/Content_Filtering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Step 1: Install required libraries
!pip install scikit-learn pandas



In [2]:
# Step 2: Import necessary libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
import os

In [3]:
# Step 3: Load the MovieLens 100k ratings dataset
ratings_cols = ['user_id', 'item_id', 'rating', 'timestamp']
ratings_path = 'u.data'
if not os.path.exists(ratings_path):
    from google.colab import files
    print("Please upload 'u.data'")
    uploaded = files.upload()

ratings_df = pd.read_csv(ratings_path, sep='\t', names=ratings_cols)
ratings_df

Unnamed: 0,user_id,item_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596
...,...,...,...,...
99995,880,476,3,880175444
99996,716,204,5,879795543
99997,276,1090,1,874795795
99998,13,225,2,882399156


In [4]:

# Step 4: Load movie titles and genres
item_cols = [
    'item_id', 'title', 'release_date', 'video_release_date', 'IMDb_URL',
    'unknown', 'Action', 'Adventure', 'Animation', "Children's", 'Comedy',
    'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
    'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'
]

movies_path = 'u.item'
if not os.path.exists(movies_path):
    print("Please upload 'u.item'")
    uploaded = files.upload()

movies_df = pd.read_csv(movies_path, sep='|', encoding='ISO-8859-1', header=None, names=item_cols)


In [5]:
# Step 5: Create a new column 'genres' by combining genre flags into a string
genre_cols = item_cols[5:]
movies_df['genres'] = movies_df[genre_cols].apply(
    lambda row: ' '.join([genre for genre, val in zip(genre_cols, row) if val == 1]),
    axis=1
)


In [6]:
# Step 6: Merge ratings with movie titles and genres
df = pd.merge(ratings_df, movies_df[['item_id', 'title', 'genres']], on='item_id')

# Optional: View the combined dataset
print("Merged dataset preview:")
print(df.head())

Merged dataset preview:
   user_id  item_id  rating  timestamp                       title  \
0      196      242       3  881250949                Kolya (1996)   
1      186      302       3  891717742    L.A. Confidential (1997)   
2       22      377       1  878887116         Heavyweights (1994)   
3      244       51       2  880606923  Legends of the Fall (1994)   
4      166      346       1  886397596         Jackie Brown (1997)   

                             genres  
0                            Comedy  
1  Crime Film-Noir Mystery Thriller  
2                 Children's Comedy  
3         Drama Romance War Western  
4                       Crime Drama  


In [7]:
# Step 7: Content-Based Filtering using TF-IDF on genres
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies_df['genres'])

In [8]:
# Step 8: Compute cosine similarity
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [12]:
# Step 9: Reverse mapping of movie titles to indices
indices = pd.Series(movies_df.index, index=movies_df['title']).drop_duplicates()

In [11]:

# Step 10: Recommendation function
def recommend(title, num_recommendations=5):
    if title not in indices:
        return f"'{title}' not found in dataset."

    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:num_recommendations+1]
    movie_indices = [i[0] for i in sim_scores]

    return movies_df[['title', 'genres']].iloc[movie_indices]


In [14]:
# Step 11: Try it out
movie_input = 'Gone with the Wind (1939)'
print("Top recommendations for '{movie_input}':")
print(recommend(movie_input))

Top recommendations for '{movie_input}':
                            title             genres
285   English Patient, The (1996)  Drama Romance War
482             Casablanca (1942)  Drama Romance War
548                Rob Roy (1995)  Drama Romance War
1484   Colonel Chabert, Le (1994)  Drama Romance War
1123   Farewell to Arms, A (1932)        Romance War
