In [None]:
!pip install streamlit
!pip install pyngrok





In [None]:
!pip install streamlit



In [None]:
%%writefile app.py
# Hybrid Movie Recommender with sentiment & content-based filtering

import streamlit as st
import pandas as pd
import numpy as np
import torch
import ast
from sklearn.metrics.pairwise import cosine_similarity
from transformers import pipeline
from sentence_transformers import SentenceTransformer
import nltk

# Download resources
nltk.download("vader_lexicon")

# Sentiment analysis pipeline
sentiment_pipeline = pipeline("sentiment-analysis")

# Load data
@st.cache_data
def load_data():
    movies = pd.read_csv("tmdb_5000_movies.csv")
    credits = pd.read_csv("tmdb_5000_credits.csv")
    return movies, credits

movies, credits = load_data()

# Ensure required columns exist
for col in ['overview', 'genres']:
    movies[col] = movies[col].fillna("")

# Create combined description
movies["description"] = movies["overview"] + " " + movies["genres"]

# Load embedding model
@st.cache_resource
def load_embedding_model():
    return SentenceTransformer('all-MiniLM-L6-v2')

embedder = load_embedding_model()

# Compute cosine similarity matrix
@st.cache_data
def build_similarity_matrix():
    embeddings = embedder.encode(movies["description"].tolist(), show_progress_bar=True)
    sim_matrix = cosine_similarity(embeddings)
    return sim_matrix

cosine_sim = build_similarity_matrix()

# Sentiment prediction function
def predict_sentiment(text):
    try:
        result = sentiment_pipeline(text)[0]
        return result["label"].upper()
    except:
        return "NEUTRAL"

# Safe recommendation function
def recommend(movie_title, sentiment_filter=None, top_n=10):
    idx_list = movies[movies["title"].str.contains(movie_title, case=False, na=False)].index
    if len(idx_list) == 0:
        return pd.DataFrame([{"title": f"No match found for '{movie_title}'"}])

    idx = idx_list[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n+20]
    movie_indices = [i[0] for i in sim_scores]

    recs = movies.iloc[movie_indices].copy()

    if sentiment_filter and sentiment_filter != "All":
        recs["sentiment"] = recs["overview"].apply(predict_sentiment)
        recs = recs[recs["sentiment"] == sentiment_filter.upper()]

    # Limit to top_n after sentiment filter
    recs = recs.head(top_n)

    # Safe column return
    available_columns = [col for col in ["title", "genres", "overview", "poster_path"] if col in recs.columns]
    return recs[available_columns]

# Streamlit UI
st.set_page_config(page_title="🎬 Hybrid Movie Recommender", layout="wide")
st.title("🎬 Hybrid Movie Recommender System")
st.markdown("Get personalized recommendations based on **genre**, **overview**, and **sentiment**.")

# UI Inputs
movie_choice = st.selectbox("Search Movie", sorted(movies["title"].dropna().unique()))
sentiment = st.selectbox("Filter by Sentiment (optional)", ["All", "POSITIVE", "NEGATIVE", "NEUTRAL"])
top_n = st.slider("Number of recommendations:", 5, 20, 10)

# Recommend button
if st.button("Recommend"):
    st.subheader(f"Top {top_n} recommendations similar to *{movie_choice}*:")
    results = recommend(movie_choice, sentiment_filter=sentiment, top_n=top_n)

    if results.empty:
        st.warning("No recommendations found.")
    else:
        for i, row in results.iterrows():
            cols = st.columns([1, 4])
            with cols[0]:
                if "poster_path" in row and pd.notna(row["poster_path"]):
                    st.image(f"https://image.tmdb.org/t/p/w500/{row['poster_path']}", width=150)
                else:
                    st.text("🎬 No image")
            with cols[1]:
                st.markdown(f"**{row['title']}**")
                st.markdown(f"_Genres_: {row.get('genres', 'N/A')}")
                st.markdown(f"_Overview_: {row.get('overview', 'N/A')[:300]}...")

# Raw data preview
with st.expander("🔍 Show Raw Movie Data"):
    st.dataframe(movies[["title", "genres", "overview"]].head(20))

# Visual Analytics
with st.expander("📊 Genre & Sentiment Breakdown"):
    genre_counts = movies["genres"].dropna().str.split(',').explode().value_counts().head(10)
    st.subheader("Top Genres")
    st.bar_chart(genre_counts)

    sample = movies["overview"].dropna().sample(100, random_state=42)
    sentiments = sample.apply(predict_sentiment)
    st.subheader("Sentiment Breakdown (Sample of 100 Overviews)")
    st.bar_chart(sentiments.value_counts())


Overwriting app.py


In [None]:
!pip install pyngrok --quiet
from pyngrok import ngrok

# Replace this string with your actual token
ngrok.set_auth_token("2wfgCH3dyPyzAi3WeW5xl0cGw2N_6Dwk3uzRre4AjgqCZKq2m")


In [None]:
ngrok.kill()

In [None]:
!streamlit run app.py &>/content/log.txt &

In [None]:
!pip install pyngrok --quiet
from pyngrok import ngrok

# Set your ngrok auth token (you must have an ngrok account)
ngrok.set_auth_token("2wfgCH3dyPyzAi3WeW5xl0cGw2N_6Dwk3uzRre4AjgqCZKq2m")  # Replace with your actual token

# Correct way to open a tunnel to port 8501 for Streamlit
public_url = ngrok.connect(addr=8501)
print(f"🚀 Public URL: {public_url}")


🚀 Public URL: NgrokTunnel: "https://614cce67cfe6.ngrok-free.app" -> "http://localhost:8501"
