## Set up

### Imports

In [3]:
import logging
import sys
import os
import pandas as pd


# Add the parent directory to the path so we can import the modules
# note that in /notebooks, use sys.path.append(os.path.abspath(".."))
sys.path.append(os.path.abspath("../.."))

from scripts.database import get_session, load_data_to_db
from scripts.data_cleaning import clean_data
from scripts.utils import load_config, setup_logging
from models import MortalityRate2014, ACS2017CountyData


### Config

In [None]:
try:
    config = load_config()
    setup_logging(config['paths']['log_path'])
    logging.info("Starting the data analysis project.")
except Exception as e:
    logging.error(f"Failed to load config or setup logging: {e}")
    raise

INFO:root:Starting the data analysis project.
Starting the data analysis project.


### Create session

In [5]:
try:
    session = get_session()
    logging.info("Database session created successfully.")
except Exception as e:
    logging.error(f"Failed to create database session: {e}")
    raise

INFO:root:Database session created successfully.
Database session created successfully.


### Fetch data

In [None]:
netflix_df = pd.read_csv('../../data/raw/netflix_titles.csv')

netflix_df.fillna("", inplace=True)
netflix_df['text'] = netflix_df['title'] + " " + netflix_df['cast'] + " " + netflix_df['country'] + " " + netflix_df['listed_in'] + " " + netflix_df['description']
cleaned_data = clean_data(netflix_df)

print(cleaned_data.head())

INFO:scripts.data_cleaning:Data cleaned. Shape: (8807, 12)
Data cleaned. Shape: (8807, 12)


  show_id     type                  title         director  \
0      s1    Movie   Dick Johnson Is Dead  Kirsten Johnson   
1      s2  TV Show          Blood & Water                    
2      s3  TV Show              Ganglands  Julien Leclercq   
3      s4  TV Show  Jailbirds New Orleans                    
4      s5  TV Show           Kota Factory                    

                                                cast        country  \
0                                                     United States   
1  Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...   South Africa   
2  Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...                  
3                                                                     
4  Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...          India   

           date_added  release_year rating   duration  \
0  September 25, 2021          2020  PG-13     90 min   
1  September 24, 2021          2021  TV-MA  2 Seasons   
2  September 24, 2021        

### Load data to database

In [None]:
load_data_to_db(cleaned_data, table_name='netflix_dataset')

### Exploration