## Set up

### Imports

In [None]:
import logging
import sys
import os
import pandas as pd


# Add the parent directory to the path so we can import the modules
# note that in /notebooks, use sys.path.append(os.path.abspath(".."))
sys.path.append(os.path.abspath("../.."))

from scripts.database import get_session, load_data_to_db
from scripts.data_cleaning import clean_data
from scripts.utils import load_config, setup_logging
from orm_models import MortalityRate2014, ACS2017CountyData


### Config

In [2]:
try:
    config = load_config('../config/config.yaml')
    setup_logging(config['paths']['log_path'])
    logging.info("Starting the data analysis project.")
except Exception as e:
    logging.error(f"Failed to load config or setup logging: {e}")
    raise

INFO:root:Starting the data analysis project.
Starting the data analysis project.


### Create session

In [3]:
try:
    session = get_session()
    logging.info("Database session created successfully.")
except Exception as e:
    logging.error(f"Failed to create database session: {e}")
    raise

INFO:root:Database session created successfully.
Database session created successfully.


### Fetch data

In [4]:
car_price_df = pd.read_csv('../data/raw/car_price_dataset.csv')

cleaned_data = clean_data(car_price_df)
print(cleaned_data.head())

INFO:scripts.data_cleaning:Data cleaned. Shape: (10000, 10)
Data cleaned. Shape: (10000, 10)


        brand   model  year  engine_size fuel_type    transmission  mileage  \
0         Kia     Rio  2020          4.2    Diesel          Manual   289944   
1   Chevrolet  Malibu  2012          2.0    Hybrid       Automatic     5356   
2    Mercedes     GLA  2020          4.2    Diesel       Automatic   231440   
3        Audi      Q5  2023          2.0  Electric          Manual   160971   
4  Volkswagen    Golf  2003          2.6    Hybrid  Semi-Automatic   286618   

   doors  owner_count  price  
0      3            5   8501  
1      2            3  12092  
2      4            2  11171  
3      2            1  11780  
4      3            3   2867  


### Load data to database

In [5]:
load_data_to_db(cleaned_data, table_name='car_price_dataset')

INFO:scripts.database:Data successfully loaded into table: car_price_dataset
Data successfully loaded into table: car_price_dataset


### Exploration