In [1]:
!wget https://files.grouplens.org/datasets/movielens/ml-100k.zip
!unzip ml-100k.zip

--2025-08-07 05:28:35--  https://files.grouplens.org/datasets/movielens/ml-100k.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4924029 (4.7M) [application/zip]
Saving to: ‘ml-100k.zip’


2025-08-07 05:28:35 (15.8 MB/s) - ‘ml-100k.zip’ saved [4924029/4924029]

Archive:  ml-100k.zip
   creating: ml-100k/
  inflating: ml-100k/allbut.pl       
  inflating: ml-100k/mku.sh          
  inflating: ml-100k/README          
  inflating: ml-100k/u.data          
  inflating: ml-100k/u.genre         
  inflating: ml-100k/u.info          
  inflating: ml-100k/u.item          
  inflating: ml-100k/u.occupation    
  inflating: ml-100k/u.user          
  inflating: ml-100k/u1.base         
  inflating: ml-100k/u1.test         
  inflating: ml-100k/u2.base         
  inflating: ml-100k/u2.test         
  inflating: ml-100k/u3.base  

In [23]:

import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'])
movies = pd.read_csv('ml-100k/u.item', sep='|', encoding='latin-1', usecols=[0, 1], names=['item_id', 'title'])

data = pd.merge(ratings, movies, on='item_id')


user_item_matrix = data.pivot_table(index='user_id', columns='item_id', values='rating').fillna(0)

item_similarity = cosine_similarity(user_item_matrix.T)
item_similarity_df = pd.DataFrame(item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns)


def get_similar_movies_by_title(title, top_n=5):
    movie = movies[movies['title'].str.lower() == title.lower()]
    if movie.empty:
        return f"Movie '{title}' not found!"

    movie_id = movie['item_id'].values[0]
    similar_scores = item_similarity_df[movie_id].sort_values(ascending=False)
    top_movies = similar_scores.iloc[1:top_n+1]

    results = []
    for mid, score in top_movies.items():
        title_match = movies[movies['item_id'] == mid]['title'].values[0]
        results.append(f"{title_match} (Similarity: {score:.2f})")
    return results


def recommend_for_user(user_id, top_n=5):
    if user_id not in user_item_matrix.index:
        return f"User ID '{user_id}' not found!"

    user_ratings = user_item_matrix.loc[user_id]
    liked_movies = user_ratings[user_ratings >= 4].index.tolist()

    similar_scores = pd.Series(dtype='float64')
    for movie_id in liked_movies:
        similar_scores = similar_scores.add(item_similarity_df[movie_id], fill_value=0)

    already_rated = user_ratings[user_ratings > 0].index
    similar_scores = similar_scores.drop(labels=already_rated, errors='ignore')

    top_recs = similar_scores.sort_values(ascending=False).head(top_n)

    results = []
    for mid, score in top_recs.items():
        title_match = movies[movies['item_id'] == mid]['title'].values[0]
        results.append(f"{title_match} (Score: {score:.2f})")
    return results

# --- USER INPUT ZONE ---
print("MOVIE RECOMMENDATION SYSTEM")

print("Here are all the available movie titles:\n")
print(movies['title'].sort_values().to_string(index=False))
choice = input("Type 'movie' to get similar movies OR 'user' to get personalized recommendations: ").strip().lower()

if choice == 'movie':
    movie_title = input("Enter the movie title (e.g., Toy Story): ").strip()
    print(f"\nBecause you watched '{movie_title}', you might like:")
    output = get_similar_movies_by_title(movie_title)
    if isinstance(output, list):
        for line in output:
            print(line)
    else:
        print(output)

elif choice == 'user':
    try:
        user_id = int(input("Enter User ID (1 to 943): "))
        print(f"\nRecommended movies for User {user_id}:")
        output = recommend_for_user(user_id)
        if isinstance(output, list):
            for line in output:
                print(line)
        else:
            print(output)
    except ValueError:
        print("Please enter a valid numeric User ID.")
else:
    print("Invalid choice. Please type 'movie' or 'user'.")




MOVIE RECOMMENDATION SYSTEM
Here are all the available movie titles:

                         'Til There Was You (1997)
                                      1-900 (1994)
                             101 Dalmatians (1996)
                               12 Angry Men (1957)
                                        187 (1997)
                       2 Days in the Valley (1996)
               20,000 Leagues Under the Sea (1954)
                      2001: A Space Odyssey (1968)
       3 Ninjas: High Noon At Mega Mountain (1998)
                              39 Steps, The (1935)
                                      8 1/2 (1963)
                    8 Heads in a Duffel Bag (1997)
                                  8 Seconds (1994)
                             A Chef in Love (1996)
                              Above the Rim (1994)
                             Absolute Power (1997)
                                 Abyss, The (1989)
                 Ace Ventura: Pet Detective (1994)
            