In [11]:
# ==========================================
# STEP 1: UPLOAD & EXTRACT ANY ZIP FILE
# ==========================================

from google.colab import files
import zipfile
import os

# Upload zip file
uploaded = files.upload()

# Find and extract zip file
for filename in uploaded.keys():
    if filename.endswith(".zip"):
        print(f"ðŸ“¦ Extracting {filename} ...")
        with zipfile.ZipFile(filename, 'r') as zip_ref:
            zip_ref.extractall("/content/")
        print("âœ… Extraction complete")

# List files after extraction
print("\nðŸ“‚ Files in /content:")
os.listdir("/content/")


Saving movies.zip to movies.zip
ðŸ“¦ Extracting movies.zip ...
âœ… Extraction complete

ðŸ“‚ Files in /content:


['.config',
 'movies.zip',
 'movies_metadata.csv',
 'links.csv',
 'links_small.csv',
 'ratings.csv',
 'credits.csv',
 'ratings_small.csv',
 'keywords.csv',
 'sample_data']

In [17]:
# ==========================================
# MOVIE RECOMMENDATION SYSTEM (UI)
# GENRE + LANGUAGE + SPECIFIC INDUSTRY
# DATASET: TMDB (movies_metadata.csv)
# ==========================================

import pandas as pd
import ast
from ipywidgets import Dropdown, Button, Output, VBox

# -------------------------------
# 1. Load Dataset
# -------------------------------
df = pd.read_csv("/content/movies_metadata.csv", low_memory=False)

# Keep required columns
df = df[['title', 'genres', 'vote_average', 'release_date',
         'original_language', 'production_countries']]
df = df.dropna()

# -------------------------------
# 2. Language Mapping (ISO â†’ Full Name)
# -------------------------------
language_map = {
    'en': 'English',
    'hi': 'Hindi',
    'fr': 'French',
    'es': 'Spanish',
    'de': 'German',
    'it': 'Italian',
    'pt': 'Portuguese',
    'ru': 'Russian',
    'ja': 'Japanese',
    'ko': 'Korean',
    'zh': 'Chinese',
    'ta': 'Tamil',
    'te': 'Telugu',
    'ml': 'Malayalam',
    'kn': 'Kannada',
    'bn': 'Bengali',
    'mr': 'Marathi'
}

df['language'] = df['original_language'].map(language_map).fillna('Other')

# -------------------------------
# 3. Extract Genres
# -------------------------------
def extract_genres(genres):
    try:
        return [g['name'] for g in ast.literal_eval(genres)]
    except:
        return []

df['genre_list'] = df['genres'].apply(extract_genres)

# -------------------------------
# 4. Derive Specific Industry
# -------------------------------
def get_industry(countries, language):
    try:
        countries = ast.literal_eval(countries)
        country_names = [c['name'] for c in countries]

        # Hollywood
        if 'United States of America' in country_names:
            return 'Hollywood'

        # Indian film industries
        if 'India' in country_names:
            if language == 'Hindi':
                return 'Bollywood'
            elif language == 'Telugu':
                return 'Tollywood'
            elif language == 'Tamil':
                return 'Kollywood'
            elif language == 'Malayalam':
                return 'Mollywood'
            elif language == 'Kannada':
                return 'Sandalwood'
            elif language == 'Marathi':
                return 'Marathi Cinema'
            elif language == 'Bengali':
                return 'Bengali Cinema'
            else:
                return 'Indian Regional Cinema'

        # Japan
        if 'Japan' in country_names:
            return 'Japanese Cinema'

        # South Korea
        if 'South Korea' in country_names:
            return 'Korean Cinema'

        # Europe
        european_countries = [
            'France', 'Germany', 'Italy', 'Spain', 'United Kingdom',
            'Sweden', 'Norway', 'Denmark', 'Netherlands'
        ]
        if any(c in country_names for c in european_countries):
            return 'European Cinema'

        return 'Other International'

    except:
        return 'Unknown'

df['industry'] = df.apply(
    lambda row: get_industry(row['production_countries'], row['language']),
    axis=1
)

# -------------------------------
# 5. Explode Genres
# -------------------------------
movies_exploded = df.explode('genre_list')

# Genre dropdown options
genre_options = sorted(movies_exploded['genre_list'].dropna().unique())

# -------------------------------
# 6. Recommendation Function
# -------------------------------
def recommend_movies(genre, top_n=20):
    result = movies_exploded[
        movies_exploded['genre_list'] == genre
    ].sort_values(by='vote_average', ascending=False)

    return result[[
        'title',
        'genre_list',
        'language',
        'industry',
        'vote_average',
        'release_date'
    ]].head(top_n)

# -------------------------------
# 7. USER INTERFACE (Google Colab)
# -------------------------------
genre_dropdown = Dropdown(
    options=genre_options,
    description='Select Genre:',
    style={'description_width': 'initial'}
)

recommend_button = Button(
    description='Recommend Movies',
    button_style='success'
)

output = Output()

def on_click(b):
    with output:
        output.clear_output()
        display(recommend_movies(genre_dropdown.value))

recommend_button.on_click(on_click)

VBox([genre_dropdown, recommend_button, output])


VBox(children=(Dropdown(description='Select Genre:', options=('Action', 'Adventure', 'Animation', 'Comedy', 'Câ€¦