### Data Loading & Cleaning

In [5]:
import pandas as pd
import warnings
import sys
from pathlib import Path

# Add the parent directory to the system path to import from lib
sys.path.append(str(Path().resolve().parent / 'lib'))

from imdb_data_cleaning import clean_movie_data, map_genres_to_emotions, extract_unique_emotions

# Display options and warnings
pd.set_option('display.max_columns', None)
warnings.filterwarnings('ignore')

# Define genre to emotion mapping
genre_to_emotion = {
    'Action': ['Excited'],
    'Adventure': ['Happy', 'Excited'],
    'Animation': ['Happy', 'Relaxed'],
    'Biography': ['Inspired', 'Down'],
    'Comedy': ['Happy', 'Relaxed'],
    'Crime': ['Scared', 'Excited'],
    'Documentary': ['Inspired'],
    'Drama': ['Down', 'Sweet'],
    'Family': ['Happy', 'Relaxed'],
    'Fantasy': ['Excited', 'Happy'],
    'Film-Noir': ['Scared'],
    'History': ['Inspired', 'Down'],
    'Horror': ['Scared'],
    'Music': ['Happy', 'Inspired'],
    'Musical': ['Happy', 'Relaxed'],
    'Mystery': ['Scared', 'Excited'],
    'Romance': ['Sweet', 'Happy'],
    'Sci-Fi': ['Excited', 'Down'],
    'Sport': ['Inspired'],
    'Thriller': ['Excited', 'Scared'],
    'War': ['Down', 'Inspired'],
    'Western': ['Excited']
}

# Clean data
df = clean_movie_data('../data/imdb.csv')

# Map genres to emotions
df = map_genres_to_emotions(df, genre_to_emotion)

# Extract unique emotions
emotions = extract_unique_emotions(df)

# Display the cleaned dataframe and unique emotions
print(emotions)

{'Down', 'Sweet', 'Relaxed', 'Scared', 'Excited', 'Happy', 'Inspired'}


In [None]:
# df.to_csv('../data/imdb_clean.csv', index=False)

In [6]:
df

Unnamed: 0,poster,title,year,duration,genre,rating,director,cast,description,emotions
0,https://m.media-amazon.com/images/M/MV5BYWRkZj...,The Idea of You,2023,115,"[Comedy, Drama, Romance]",6.4,Michael Showalter,"[Anne Hathaway, Nicholas Galitzine, Ella Rubin...","Solène, a 40-year-old single mom, begins an un...","[Happy, Down, Sweet, Relaxed]"
1,https://m.media-amazon.com/images/M/MV5BZGI4NT...,Kingdom of the Planet of the Apes,2023,145,"[Action, Adventure, Sci-Fi]",7.3,Wes Ball,"[Owen Teague, Freya Allan, Kevin Durand, Peter...","Many years after the reign of Caesar, a young ...","[Happy, Excited, Down]"
2,https://m.media-amazon.com/images/M/MV5BZjIyOT...,Unfrosted,2023,97,"[Biography, Comedy, History]",5.5,Jerry Seinfeld,"[Isaac Bae, Jerry Seinfeld, Chris Rickett, Rac...","In 1963 Michigan, business rivals Kellogg's an...","[Happy, Down, Inspired, Relaxed]"
3,https://m.media-amazon.com/images/M/MV5BMjA5Zj...,The Fall Guy,2023,126,"[Action, Comedy, Drama]",7.3,David Leitch,"[Ryan Gosling, Emily Blunt, Aaron Taylor-Johns...",A down-and-out stuntman must find the missing ...,"[Down, Sweet, Relaxed, Excited, Happy]"
4,https://m.media-amazon.com/images/M/MV5BNTk1MT...,Challengers,2023,131,"[Drama, Romance, Sport]",7.7,Luca Guadagnino,"[Zendaya, Mike Faist, Josh O'Connor, Darnell A...","Tashi, a former tennis prodigy turned coach, t...","[Happy, Down, Inspired, Sweet]"
...,...,...,...,...,...,...,...,...,...,...
9995,https://m.media-amazon.com/images/M/MV5BMzg5MW...,The Greatest Show on Earth,2020,152,"[Drama, Family, Romance]",6.5,Cecil B. DeMille,"[James Stewart, Charlton Heston, Betty Hutton,...","The dramatic lives of trapeze artists, a clown...","[Happy, Down, Sweet, Relaxed]"
9996,https://m.media-amazon.com/images/M/MV5BYzA0ZG...,Berserk: Ougon Jidai-hen I - Haou no Tamago,2020,76,"[Animation, Action, Adventure]",7.5,Toshiyuki Kubooka,"[Hiroaki Iwanaga, Carrie Keranen, Takahiro Sak...",A lone sellsword named Guts gets recruited int...,"[Happy, Excited, Relaxed]"
9997,https://m.media-amazon.com/images/M/MV5BM2U1Mj...,Is-slottet,2020,78,"[Mystery, Drama]",6.5,Per Blom,"[Line Storesund, Hilde Nyeggen Martinsen, Mere...",A couple of twelve-year-old Norwegian girls st...,"[Scared, Down, Sweet, Excited]"
9998,https://m.media-amazon.com/images/M/MV5BMTAwOD...,Loving Pablo,2020,123,"[Biography, Crime, Drama]",6.4,Fernando León de Aranoa,"[Javier Bardem, Penélope Cruz, Peter Sarsgaard...",A journalist strikes up a romantic relationshi...,"[Down, Sweet, Scared, Excited, Inspired]"


### Exploratory Analysis

In [11]:
import sys
sys.path.append('../lib')  # Import the lib directory to access the functions

import imdb_data_analysis as eda  # Importing the functions

# Load the dataset
df = pd.read_csv('../data/imdb_clean.csv')

# EDA: Distribution of Genres
eda.plot_genre_distribution(df)

# EDA: Distribution of Movie Durations
eda.plot_duration_distribution(df)

# EDA: Distribution of Release Years
eda.plot_year_distribution(df)

# EDA: Distribution of Ratings
eda.plot_rating_distribution(df)

# EDA: Distribution of Emotions
eda.plot_emotion_distribution(df)

# EDA: Crosstab of Genres and Emotions
eda.plot_genre_emotion_crosstab(df)

# EDA: Relationship between Ratings and Emotions
eda.plot_ratings_vs_emotions(df)


ModuleNotFoundError: No module named 'imdb_data_analysis'