In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

In [10]:
os.chdir('C:\\Users\\HP\\OneDrive\\Desktop\\DS course\\Elevate lab project\\ml-100k\\ml-100k\\')

In [13]:
# movie_recommendation_system.py

# Step 1: Data Preprocessing
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

# Load data
def load_data():
    ratings = pd.read_csv("u.data", sep="\t", names=["user_id", "movie_id", "rating", "timestamp"])
    movies = pd.read_csv(
        "u.item",
        sep="|",
        encoding="latin-1",
        header=None,
        names=[
            "movie_id", "title", "release_date", "video_release_date", "IMDb_URL",
            "unknown", "Action", "Adventure", "Animation", "Children's", "Comedy",
            "Crime", "Documentary", "Drama", "Fantasy", "Film-Noir", "Horror",
            "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War", "Western"
        ],
        usecols=["movie_id", "title", "Action", "Comedy", "Drama", "Romance", "Thriller"]
    )
    df = pd.merge(ratings, movies, on="movie_id")
    return df

# Step 2: Collaborative Filtering

def create_user_movie_matrix(df):
    return df.pivot_table(index='user_id', columns='title', values='rating')

def get_collaborative_recommendations(title, df, n=5):
    matrix = create_user_movie_matrix(df)
    movie_ratings = matrix.fillna(0).T
    similarity = cosine_similarity(movie_ratings)
    similarity_df = pd.DataFrame(similarity, index=movie_ratings.index, columns=movie_ratings.index)
    if title not in similarity_df:
        return []
    similar_movies = similarity_df[title].sort_values(ascending=False)[1:n+1]
    return similar_movies.index.tolist()

# Step 3: Content-Based Filtering

def get_content_based_recommendations(fav_title, df, n=5):
    genre_cols = ["Action", "Comedy", "Drama", "Romance", "Thriller"]
    genre_map = df.drop_duplicates("title")[["title"] + genre_cols]
    genre_map["genre_str"] = genre_map[genre_cols].astype(str).agg("".join, axis=1)

    vectorizer = CountVectorizer()
    genre_matrix = vectorizer.fit_transform(genre_map["genre_str"])
    similarity = cosine_similarity(genre_matrix)
    sim_df = pd.DataFrame(similarity, index=genre_map['title'], columns=genre_map['title'])

    if fav_title not in sim_df:
        return []
    return sim_df[fav_title].sort_values(ascending=False)[1:n+1].index.tolist()

# Main Function
if __name__ == "__main__":
    df = load_data()
    movie = "Star Wars (1977)"
    print("Collaborative Filtering:", get_collaborative_recommendations(movie, df))
    print("Content-Based Filtering:", get_content_based_recommendations(movie, df))


Collaborative Filtering: ['Return of the Jedi (1983)', 'Raiders of the Lost Ark (1981)', 'Empire Strikes Back, The (1980)', 'Toy Story (1995)', 'Godfather, The (1972)']
Content-Based Filtering: ['Cutthroat Island (1995)', 'Star Wars (1977)', 'Last of the Mohicans, The (1992)', 'Return of the Jedi (1983)', 'Top Gun (1986)']


In [15]:


import streamlit as st
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

# --- Data Loading and Preprocessing ---
def load_data():
    ratings = pd.read_csv("u.data", sep="\t", names=["user_id", "movie_id", "rating", "timestamp"])
    movies = pd.read_csv(
        "u.item",
        sep="|",
        encoding="latin-1",
        header=None,
        names=[
            "movie_id", "title", "release_date", "video_release_date", "IMDb_URL",
            "unknown", "Action", "Adventure", "Animation", "Children's", "Comedy",
            "Crime", "Documentary", "Drama", "Fantasy", "Film-Noir", "Horror",
            "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War", "Western"
        ],
        usecols=["movie_id", "title", "Action", "Comedy", "Drama", "Romance", "Thriller"]
    )
    df = pd.merge(ratings, movies, on="movie_id")
    return df

# --- Recommendation Logic ---
def create_user_movie_matrix(df):
    return df.pivot_table(index='user_id', columns='title', values='rating')

def get_collaborative_recommendations(title, df, n=5):
    matrix = create_user_movie_matrix(df)
    movie_ratings = matrix.fillna(0).T
    similarity = cosine_similarity(movie_ratings)
    similarity_df = pd.DataFrame(similarity, index=movie_ratings.index, columns=movie_ratings.index)
    if title not in similarity_df:
        return []
    similar_movies = similarity_df[title].sort_values(ascending=False)[1:n+1]
    return similar_movies.index.tolist()

def get_content_based_recommendations(fav_title, df, n=5):
    genre_cols = ["Action", "Comedy", "Drama", "Romance", "Thriller"]
    genre_map = df.drop_duplicates("title")[["title"] + genre_cols]
    genre_map["genre_str"] = genre_map[genre_cols].astype(str).agg("".join, axis=1)
    vectorizer = CountVectorizer()
    genre_matrix = vectorizer.fit_transform(genre_map["genre_str"])
    similarity = cosine_similarity(genre_matrix)
    sim_df = pd.DataFrame(similarity, index=genre_map['title'], columns=genre_map['title'])
    if fav_title not in sim_df:
        return []
    return sim_df[fav_title].sort_values(ascending=False)[1:n+1].index.tolist()

# --- Streamlit UI ---
df = load_data()
st.title("🎬 Movie Recommendation System")

movie_list = sorted(df['title'].unique())
selected_movie = st.selectbox("Select a movie you like:", movie_list)

method = st.radio("Choose recommendation method:", ["Collaborative Filtering", "Content-Based Filtering"])

if st.button("Get Recommendations"):
    if method == "Collaborative Filtering":
        results = get_collaborative_recommendations(selected_movie, df)
    else:
        results = get_content_based_recommendations(selected_movie, df)

    if results:
        st.subheader("Top 5 Recommended Movies:")
        for i, movie in enumerate(results, 1):
            st.write(f"{i}. {movie}")
    else:
        st.warning("No recommendations found. Try another movie.")


2025-06-22 13:25:04.871 
  command:

    streamlit run C:\ProgramData\anaconda3\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
