In [2]:
import pandas as pd

# Load ratings and movies data
ratings = pd.read_csv('/content/rating.csv')  # userId, movieId, rating, timestamp
movies = pd.read_csv('/content/movie.csv')    # movieId, title, genres

# Example preprocessing: split genres into lists
movies['genres'] = movies['genres'].str.split('|')


In [3]:
!pip install scikit-surprise




In [4]:
!pip uninstall numpy -y
!pip install numpy==1.26.4


Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Successfully uninstalled numpy-1.26.4
Collecting numpy==1.26.4
  Using cached numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Using cached numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
Installing collected packages: numpy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.26.4 which is incompatible.[0m[31m
[0mSuccessfully installed numpy-1.26.4


In [5]:
from surprise import Dataset, Reader, SVD


In [6]:
print(ratings['userId'].unique())    # list of all user IDs
print(movies['movieId'].unique())    # list of all movie IDs


[    1     2     3 ... 44384 44385 44386]
[     1      2      3 ... 131258 131260 131262]


In [9]:
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split

# Assuming ratings dataframe is loaded with columns: userId, movieId, rating
reader = Reader(rating_scale=(0.5, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

trainset, testset = train_test_split(data, test_size=0.2)
algo = SVD()
algo.fit(trainset)

# Choose a user and a movie to predict rating for
userId = 1
movieId = 50

pred = algo.predict(userId, movieId)
print(f"Predicted rating for user {userId} on movie {movieId}: {pred.est:.2f}")


Predicted rating for user 1 on movie 50: 3.92


In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Convert genres list to string for TF-IDF
movies['genres_str'] = movies['genres'].apply(lambda x: ' '.join(x))

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['genres_str'])

# Compute cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Function to get similar movies by cosine similarity
def get_similar_movies(movie_title, top_n=10):
    idx = movies.index[movies['title'] == movie_title][0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n+1]
    movie_indices = [i[0] for i in sim_scores]
    return movies['title'].iloc[movie_indices]


In [4]:
def hybrid_recommendation(user_id, favorite_movie, top_n=10, alpha=0.5):
    # Collaborative filtering recommendations
    user_ratings = ratings[ratings['userId'] == user_id]
    # Predict scores for all movies user hasn't rated
    unrated_movies = movies[~movies['movieId'].isin(user_ratings['movieId'])]
    cf_scores = []
    for movie_id in unrated_movies['movieId']:
        pred = algo.predict(user_id, movie_id)
        cf_scores.append((movie_id, pred.est))
    cf_scores.sort(key=lambda x: x[1], reverse=True)

    # Content-based recommendations
    cb_titles = get_similar_movies(favorite_movie, top_n=top_n*2)
    cb_scores = [(movies[movies['title'] == title]['movieId'].values[0], 1) for title in cb_titles]

    # Combine scores with weights
    combined_scores = {}
    for movie_id, score in cf_scores:
        combined_scores[movie_id] = combined_scores.get(movie_id, 0) + alpha * score
    for movie_id, score in cb_scores:
        combined_scores[movie_id] = combined_scores.get(movie_id, 0) + (1 - alpha) * score

    # Sort combined scores and get top N
    recommended = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)[:top_n]
    recommended_titles = [movies[movies['movieId'] == movie_id]['title'].values[0] for movie_id, _ in recommended]
    return recommended_titles


In [5]:
!pip install streamlit




In [6]:
import streamlit as st

st.title("Hybrid Recommendation System")

user_id = st.number_input("Enter User ID", min_value=1)
favorite_movie = st.text_input("Enter Your Favorite Movie")

if st.button("Get Recommendations"):
    recommendations = hybrid_recommendation(user_id, favorite_movie)
    st.write("Recommended Movies:")
    for movie in recommendations:
        st.write(movie)


2025-06-09 10:53:25.657 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-06-09 10:53:25.664 Session state does not function when running a script without `streamlit run`


In [7]:
user_id = int(input("Enter user ID: "))
movie_id = int(input("Enter movie ID: "))
# Generate and print recommendations


Enter user ID: 1
Enter movie ID: 2


In [11]:
!pip install streamlit pyngrok




In [12]:
!ngrok config add-authtoken 2yGfN7fIJS3FYP0I36jlee4YZM5_jBobBtyU5CtycP9ii4tv

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [13]:
from pyngrok import ngrok
get_ipython().system_raw('streamlit run app.py &')
public_url = ngrok.connect(8501)

print(public_url)



NgrokTunnel: "https://e5da-34-80-30-251.ngrok-free.app" -> "http://localhost:8501"
