In [None]:
import os
# Required Libraries
import sys

import numpy as np
from IPython.display import display


In [None]:
# Add the parent directory of 'source' to the Python path
sys.path.append(os.path.abspath("../src/utils"))
sys.path.append(os.path.abspath("../src"))

In [None]:
# Local imports
from source.utils.logger import setup_logger
from source.eda_exploration import (
    load_data,
    basic_info,
    missing_values,
    distribution_analysis,
    correlation_analysis,
    detect_outliers,
)


In [None]:
# Logger setup
logger = setup_logger(name="eda_notebook", log_file="../logs/eda_notebook.log", log_level="INFO")

In [None]:
# Data Loading
file_path = "../data/processed/epa_long_preprocessed.csv"
try:
    df = load_data(file_path)
    logger.info("Data successfully loaded.")
except Exception as e:
    logger.error(f"Error loading data: {e}")
    raise

In [None]:
# Basic Info
try:
    basic_info_dict = basic_info(df)
    logger.info("Basic info generated successfully.")
    display(basic_info_dict)
except Exception as e:
    logger.error(f"Error generating basic info: {e}")

In [None]:
# Missing Values
try:
    missing_values(df)
    logger.info("Missing values analyzed successfully.")
except Exception as e:
    logger.error(f"Error analyzing missing values: {e}")

In [None]:
# Distribution Analysis
try:
    numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    logger.info(f"Numeric columns identified: {numeric_cols}")
    distribution_analysis(df, numeric_cols)
except Exception as e:
    logger.error(f"Error in distribution analysis: {e}")

In [None]:
# Correlation Analysis
try:
    correlation_analysis(df, numeric_cols)
    logger.info("Correlation analysis completed successfully.")
except Exception as e:
    logger.error(f"Error in correlation analysis: {e}")

In [None]:
# Outlier Detection
try:
    detect_outliers(df, numeric_cols)
    logger.info("Outlier detection completed successfully.")
except Exception as e:
    logger.error(f"Error in outlier detection: {e}")

In [None]:
# EDA notebook completed
logger.info("EDA notebook completed.")