In [1]:
import pandas as pd
import requests

In [2]:
# TMDb API key
api_key = f'013b31dd3339a724725d88524cfb37ba'

In [None]:

def add_movies_by_keyword_to_dataset(keyword, current_df=None, weather_type=""):
    # Define the list of all weather columns
    weather_columns = ["Clear Sky", "Few Clouds", "Scattered Clouds", "Broken Clouds",
                       "Shower Rain", "Rain", "Thunderstorm", "Snow", "Mist"]
    
    # Initialize the dataset if it's None
    if current_df is None:
        current_df = pd.DataFrame(columns=[
            "title", "overview", "release_date", "runtime", 
            "genres", "status", "original_language", "tagline", "popularity", 
            "vote_average", "vote_count", "cast", "director", "producer", 
            "cinematographer", "poster", "keywords", "production_companies", 
            "production_countries", "budget", "revenue"
        ] + weather_columns)
    
    # Step 1: Search for the keyword to get the keyword ID
    keyword_url = f'https://api.themoviedb.org/3/search/keyword?api_key={api_key}&query={keyword}'
    keyword_response = requests.get(keyword_url).json()
    
    # Check if the keyword exists
    if not keyword_response['results']:
        print(f"No results found for keyword '{keyword}'.")
        return current_df
    
    keyword_id = keyword_response['results'][0]['id']
    print(f"Found keyword '{keyword}' with ID: {keyword_id}")
    
    # Step 2: Use the keyword ID to get associated movies (with pagination)
    page = 1
    movie_data = []
    
    while True:
        movie_url = f'https://api.themoviedb.org/3/keyword/{keyword_id}/movies?api_key={api_key}&page={page}'
        movie_response = requests.get(movie_url).json()
        
        # Break if no more results
        if 'results' not in movie_response or not movie_response['results']:
            break
        
        # Process each movie in the current page
        for movie in movie_response['results']:
            movie_id = movie['id']
            details_url = f'https://api.themoviedb.org/3/movie/{movie_id}?api_key={api_key}&append_to_response=credits,keywords'
            details_response = requests.get(details_url).json()
            
            # Extract required fields
            movie_info = {
                "title": details_response.get("title"),
                "overview": details_response.get("overview", "No overview available"),
                "release_date": details_response.get("release_date", "No release date"),
                "runtime": details_response.get("runtime"),
                "genres": [genre["name"] for genre in details_response.get("genres", [])],
                "status": details_response.get("status"),
                "original_language": details_response.get("original_language"),
                "tagline": details_response.get("tagline"),
                "popularity": details_response.get("popularity"),
                "vote_average": details_response.get("vote_average"),
                "vote_count": details_response.get("vote_count"),
                "production_companies": [company["name"] for company in details_response.get("production_companies", [])],
                "production_countries": [country["name"] for country in details_response.get("production_countries", [])],
                "budget": details_response.get("budget"),
                "revenue": details_response.get("revenue"),
                "poster": f"https://image.tmdb.org/t/p/w500{details_response.get('poster_path')}" if details_response.get("poster_path") else None,
                "keywords": [keyword["name"] for keyword in details_response.get("keywords", {}).get("keywords", [])]
            }
            
            # Process cast: Get top 3 actors with real name and character name
            cast = details_response.get("credits", {}).get("cast", [])
            top_cast = [{"name": member["name"], "character": member["character"]} for member in cast[:3]]
            movie_info["cast"] = top_cast
            
            # Process crew: Only director, producer, and cinematographer
            crew = details_response.get("credits", {}).get("crew", [])
            for member in crew:
                if member["job"] == "Director":
                    movie_info["director"] = member["name"]
                elif member["job"] == "Producer":
                    movie_info["producer"] = member["name"]
                elif member["job"] == "Director of Photography":
                    movie_info["cinematographer"] = member["name"]

            # Set weather type columns: 1 for the specified weather, 0 for others
            for weather in weather_columns:
                movie_info[weather] = 1 if weather == weather_type else 0

            # Append the movie info to movie_data list
            movie_data.append(movie_info)
        
        # Print progress and move to the next page
        print(f"Processed page {page} for keyword '{keyword}' under weather type '{weather_type}'.")
        page += 1

    # Convert movie_data list to a DataFrame and append to current_df
    new_movies_df = pd.DataFrame(movie_data)
    updated_df = pd.concat([current_df, new_movies_df], ignore_index=True)
    print(f"Added {len(new_movies_df)} movies with keyword '{keyword}' under weather type '{weather_type}'.")
    
    return updated_df


In [None]:
weather_keywords2 = {
    "Clear Sky": [
        "Clear Sky", "Uplifting", "Light-hearted", "Inspirational", "Joyful", "Adventurous",
        "Happy", "Cheerful", "Sunny", "Free-spirited", "Positive", "Bright",
        "Fun", "Playful", "Energetic", "Optimistic", "Joyous", "Carefree", 
        "Exhilarating", "Wholesome", "Feel-good", "Empowering", "High-energy", 
        "Dream-chasing", "Fun-filled"
    ],
    "Few Clouds": [
        "Few Clouds", "Playful", "Relaxed", "Nostalgic", "Romantic", "Warm", "Cozy",
        "Soothing", "Leisurely", "Charming", "Heartwarming", "Sweet", "Dreamy",
        "Sentimental", "Delightful", "Calming", "Endearing", "Soft-hearted", 
        "Tender", "Whimsical", "Lighthearted", "Comforting stories", "Dreamlike", 
        "Affectionate", "Amusing", "Chilled-out"
    ],
    "Scattered Clouds": [
        "Scattered Clouds", "Reflective", "Curious", "Intense", "Melancholic", "Thoughtful",
        "Brooding", "Intriguing", "Pensive", "Questioning", "Somber", "Mellow",
        "Deep", "Insightful", "Soulful", "Wistful", "Philosophical", "Poignant", 
        "Introspective journey", "Contemplative", "Bittersweet", "Quiet strength", 
        "Life lessons", "Inner journey", "Mood-driven", "Calmly intense"
    ],
    "Broken Clouds": [
        "Broken Clouds", "Mysterious", "Suspenseful", "Reflective", "Dark", "Intense",
        "Haunting", "Eerie", "Moody", "Complex", "Shadowy", "Gripping",
        "Unsettling", "Enigmatic", "Foreboding", "Unpredictable", "Intrigue", 
        "Atmospheric tension", "Sinister", "Psychological", "Mind-bending", 
        "Chilling", "Darkly intense", "Surreal", "Strange happenings", "Edge of seat"
    ],
    "Shower Rain": [
        "Shower Rain", "Cozy", "Introspective", "Emotional", "Comforting", "Reflective",
        "Meditative", "Contemplative", "Heartfelt", "Warm", "Sentimental", "Quiet",
        "Reassuring", "Tranquil", "Peaceful", "Gentle", "Soul-soothing", 
        "Intimate moments", "Personal growth", "Tender connections", "Reflective warmth", 
        "Gentle stories", "Warm nostalgia", "Softly dramatic", "Comforting escape", 
        "Simple yet profound", "Delicate storytelling"
    ],
    "Rain": [
        "Rain", "Nostalgic", "Melancholic", "Reflective", "Introspective", "Calm",
        "Serene", "Meditative", "Peaceful", "Thought-provoking", "Somber", "Moody",
        "Soft", "Quiet", "Pensive", "Healing", "Deep contemplation", "Solemn beauty", 
        "Pensive reflection", "Sad yet hopeful", "Soothing sadness", "Inner peace", 
        "Emotional release", "Softly moving", "Heartache", "Life's reflections", 
        "Gentle sadness", "Releasing burdens"
    ],
    "Thunderstorm": [
        "Thunderstorm", "Suspenseful", "Exciting", "Dark", "Adventurous", "Intense",
        "Thrilling", "Dangerous", "Bold", "Fierce", "Dynamic", "Powerful",
        "Gripping", "Dramatic", "Raw", "Electrifying", "High stakes", "Adrenaline rush", 
        "Fearless pursuits", "Shocking revelations", "Unpredictable", "Stormy emotions", 
        "Battle scenes", "Action-packed", "Survival stories", "Strong-willed heroes", 
        "Chaotic situations", "Riveting suspense"
    ],
    "Snow": [
        "Snow", "Cozy", "Nostalgic", "Introspective", "Comforting", "Reflective",
        "Peaceful", "Calm", "Serene", "Heartwarming", "Magical", "Pure",
        "Soft", "Innocent", "Blissful", "Quiet", "Wintry magic", "Simple joys", 
        "Serenity", "Innocence", "Warm hugs", "Winter’s wonder", "Nostalgic memories", 
        "Childlike wonder", "Gentle reflections", "Family warmth", "Magical stories", 
        "Softly lit", "Seasonal joy", "Finding home"
    ],
    "Mist": [
        "Mist", "Mysterious", "Suspenseful", "Dark", "Eerie", "Thought-provoking",
        "Foggy", "Enigmatic", "Creepy", "Haunting", "Vague", "Surreal",
        "Strange", "Bewildering", "Unsettling", "Cryptic", "Unsettling calm", 
        "Otherworldly", "Dreamlike", "Mind-bending", "Haunting suspense", 
        "Unanswered questions", "Psychological tension", "Eerie landscapes", 
        "Unearthly quiet", "Spooky tales", "Shadowy figures", "Intangible fears", 
        "Lost in fog"
    ]
}


In [9]:
# Initialize an empty DataFrame to start (or use an existing one if available)
current_df = None

In [None]:
# Loop through each weather type and its keywords
for weather_type, keywords in weather_keywords.items():
    print(f"Processing weather type: {weather_type}")
    
    # For each keyword associated with the weather type, call the function
    for keyword in keywords:
        print(f"Searching for movies with keyword: '{keyword}' under weather type '{weather_type}'")
        
        # Call the function and update the dataset with movies associated with each keyword
        current_df = add_movies_by_keyword_to_dataset(keyword, current_df, weather_type)
        print("\nCurrent dataset head:")
        print(current_df.head())
        
        print("\nCurrent dataset info:")
        print(current_df.info())

In [None]:
# Display the resulting dataset
print("Final dataset:")
print(current_df.head())

In [12]:
# Optionally, save the final dataset to a CSV file
current_df.to_csv("movies_with_weather_associations.csv", index=False)

In [16]:
current_df = pd.read_csv("movies_with_weather_associations.csv", encoding="utf-8-sig")


ParserError: Error tokenizing data. C error: Buffer overflow caught - possible malformed input file.


In [None]:
for weather_type, keywords in weather_keywords2.items():
    print(f"Processing weather type: {weather_type}")
    
    # For each keyword associated with the weather type, call the function
    for keyword in keywords:
        print(f"Searching for movies with keyword: '{keyword}' under weather type '{weather_type}'")
        
        # Call the function and update the dataset with movies associated with each keyword
        current_df2 = add_movies_by_keyword_to_dataset(keyword, current_df2, weather_type)
        print("\nCurrent dataset head:")
        print(current_df.head())
        
        print("\nCurrent dataset info:")
        print(current_df.info())

In [None]:
current_df2.to_csv("updated_dataset.csv", index=False)
