<a href="https://colab.research.google.com/github/haseebashraf5656/Netflix-Hook-Generator/blob/main/Netflix_Hook_Generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

file_path = "/content/netflix_titles.csv"

# Load the dataset
df = pd.read_csv(file_path)
print(f"Original dataset loaded: {df.shape[0]} rows, {df.shape[1]} columns")
print("\nFirst 5 rows:")
display(df.head())

In [None]:
netflix = df.copy()
print("Missing values before cleaning:")
print(netflix.isnull().sum())
netflix['director'] = netflix['director'].fillna('Unknown')
netflix['cast'] = netflix['cast'].fillna('Unknown')
netflix['country'] = netflix['country'].fillna('Unknown')
netflix['date_added'] = netflix['date_added'].str.strip()  # Remove accidental spaces
netflix['date_added'] = netflix['date_added'].fillna('Unknown')
netflix['rating'] = netflix['rating'].fillna('Not Rated')
netflix['duration'] = netflix['duration'].fillna('Unknown')

#  Ensure title and description are clean strings (critical for our app)
netflix['title'] = netflix['title'].str.strip().str.title()  # Normalize capitalization
netflix['description'] = netflix['description'].str.strip()

# Convert release_year to integer (safe)
netflix['release_year'] = pd.to_numeric(netflix['release_year'], errors='coerce').fillna(0).astype(int)

#  Remove exact duplicates (just in case)
netflix = netflix.drop_duplicates(subset=['show_id'], keep='first')

#  Create a clean title index for fast search (case-insensitive + no punctuation issues)
netflix['title_lower'] = netflix['title'].str.lower()

print

print(f"\nCleaning complete! Final dataset: {netflix.shape[0]} titles")
print(f"Missing descriptions: {netflix['description'].isnull().sum()} (should be 0)")

netflix[['title', 'type', 'release_year', 'rating', 'description']].head(10)

In [None]:
netflix.to_csv('cleaned_netflix_titles.csv', index=False)
files.download('cleaned_netflix_titles.csv')

print("Cleaned file saved and downloaded!")

In [6]:
import pandas as pd
import difflib  # For fuzzy matching

In [None]:
df = pd.read_csv('/content/sample_data/cleaned_netflix_titles.csv')

df.head()

In [None]:
df_unique = df.drop_duplicates(subset='title_lower', keep='first')
title_dict = df_unique.set_index('title_lower').to_dict(orient='index')

print(f"Total unique titles (after removing duplicates): {len(title_dict)}")
print(f"Original rows: {len(df)}")

# Alias mapping (expand as needed)
aliases = {
    'money heist': 'la casa de papel',
    'house of paper': 'la casa de papel',
    'squid games': 'squid game'
}

In [11]:

def get_engaging_description(user_title):
    user_title_lower = user_title.lower().strip()

    # Check for alias
    if user_title_lower in aliases:
        matched_title = aliases[user_title_lower]
    else:
        matched_title = user_title_lower

    # Exact match
    if matched_title in title_dict:
        row = title_dict[matched_title]
        original_desc = row['description']
        # Rephrase to make it engaging
        engaging_desc = f"Get hooked on '{row['title']}': {original_desc.rstrip('.')}! {get_hook_phrase(row['listed_in'])}"
        return engaging_desc

    # Fuzzy match if no exact
    all_titles = list(title_dict.keys())
    close_matches = difflib.get_close_matches(matched_title, all_titles, n=1, cutoff=0.8)
    if close_matches:
        matched_title = close_matches[0]
        row = title_dict[matched_title]
        original_desc = row['description']
        engaging_desc = f"Closest match: '{row['title']}'. Get hooked: {original_desc.rstrip('.')}! {get_hook_phrase(row['listed_in'])}"
        return engaging_desc

    # Not found
    return f"Sorry, '{user_title}' not found in the dataset. Try a different name or check spelling!"

In [10]:
def get_hook_phrase(genres):
    # Simple hooks based on genres
    if 'action' in genres.lower():
        return "Packed with thrills and edge-of-your-seat excitement."
    elif 'drama' in genres.lower():
        return "An emotional rollercoaster you won't forget."
    elif 'comedy' in genres.lower():
        return "Laugh-out-loud fun for everyone."
    elif 'horror' in genres.lower():
        return "Chills and scares that will keep you up at night."
    else:
        return "An unforgettable adventure awaits."

In [12]:
# Import ipywidgets for GUI
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML

In [13]:
# Header
header = widgets.HTML(
    value="<h2 style='color: #E50914; text-align: center;'>üçø Netflix Movie/TV Show Hook Generator</h2>"
          "<p style='text-align: center;'>Type a movie or show name (e.g., Squid Game, Money Heist, Inception) and get an exciting description!</p>"
)

# Input box
text_input = widgets.Text(
    value='',
    placeholder='Enter movie or TV show name...',
    description='Search:',
    layout=widgets.Layout(width='70%'),
    style={'description_width': 'initial'}
)

# Search button
button = widgets.Button(
    description='Get Hook!',
    button_style='danger',  # Netflix red
    tooltip='Click to search',
    icon='search'
)

# Output area
output = widgets.Output(layout=widgets.Layout(margin='20px 0 0 0'))

# Arrange vertically
ui = widgets.VBox([header, text_input, button, output])

In [14]:
def on_button_click(b):
    with output:
        clear_output()
        user_title = text_input.value.strip()
        if not user_title:
            print("‚ö†Ô∏è Please enter a movie or TV show name!")
            return

        result = get_engaging_description(user_title)

        # Check if it's a "not found" message
        if "Sorry" in result or "Closest match" in result:
            display(HTML(f"<p style='font-size:18px; color:orange;'><b>{result}</b></p>"))
        else:
            # Parse the matched title and details
            user_lower = user_title.lower()
            matched_lower = aliases.get(user_lower, user_lower)
            close_matches = difflib.get_close_matches(matched_lower, list(title_dict.keys()), n=1, cutoff=0.8)
            if close_matches:
                matched_lower = close_matches[0]

            if matched_lower in title_dict:
                row = title_dict[matched_lower]
                title = row['title']
                mtype = row['type']
                year = row['release_year']
                rating = row['rating']
                genres = row['listed_in']

                # Display nicely
                display(HTML(f"""
                <div style='background:#141414; padding:20px; border-radius:10px; color:white;'>
                    <h3 style='color:#E50914; margin-top:0;'>üé¨ {title}</h3>
                    <p><b>{mtype}</b> ‚Ä¢ {year} ‚Ä¢ {rating}</p>
                    <p><i>Genres:</i> {genres}</p>
                    <hr style='border-color:#333;'>
                    <p style='font-size:18px; line-height:1.6;'><b>{result.split(': ', 1)[1] if ': ' in result else result}</b></p>
                </div>
                """))
            else:
                display(HTML(f"<p style='font-size:18px; color:orange;'><b>{result}</b></p>"))

# Attach the function to button click
button.on_click(on_button_click)

In [15]:
# output by runing this cell
display(ui)

VBox(children=(HTML(value="<h2 style='color: #E50914; text-align: center;'>üçø Netflix Movie/TV Show Hook Genera‚Ä¶