In [None]:
# %% [markdown]
# # Exploratory Data Analysis (EDA)
#
# Bu notebook, veri setiniz üzerinde Keşifsel Veri Analizi (EDA) adımlarını gerçekleştirir ve bulguları analiz eder.


In [None]:
# %%
import sys
import logging
import numpy as np
from pathlib import Path
from IPython.display import display

In [None]:
# %% [1] Imports
from source.eda_exploration import (
    load_data,
    basic_info,
    missing_values,
    distribution_analysis,
    correlation_analysis,
    detect_outliers,
)
from source.utils.config_loader import load_config
from source.utils.logger import setup_logger

In [None]:
# %% [2] Logging Setup
# Define ANSI escape codes for green color
GREEN = "\033[92m"
RESET = "\033[0m"

In [None]:
# Custom logging formatter to include green color
class CustomFormatter(logging.Formatter):
    def format(self, record):
        log_msg = super().format(record)
        return f"{GREEN}{log_msg}{RESET}"

In [None]:
# Configure logging with the custom formatter
handler = logging.StreamHandler()
handler.setFormatter(CustomFormatter("%(asctime)s [%(levelname)s] %(message)s"))
logging.basicConfig(level=logging.INFO, handlers=[handler])

In [None]:
# %% [3] Add Source to Sys Path
source_path = Path("../03-source").resolve()
if str(source_path) not in sys.path:
    sys.path.append(str(source_path))

In [None]:
# %% [4] Import EDA Functions
# Already imported in [1]

In [None]:
# %% [5] Load Configuration
CONFIG_PATH = Path("../00-config/settings.yml").resolve()
config = load_config(CONFIG_PATH)

if config is None:
    logging.error("Failed to load configuration. Terminating notebook.")
    sys.exit(1)

RAW_DIR = Path(config["paths"]["raw_dir"]).resolve()
PROCESSED_DIR = Path(config["paths"]["processed_dir"]).resolve()
LOG_DIR = Path(config["paths"].get("logs_dir", "../04-logs")).resolve()
PLOTS_DIR = Path(config["paths"].get("plots_dir", "../06-plots")).resolve()

In [None]:
# Setup logger
logger = setup_logger(
    name="eda_notebook",
    log_file=LOG_DIR / "eda_notebook.log",
    log_level=config.get("logging", {}).get("level", "INFO").upper()
)

logger.info("=== EDA Notebook Initialized ===")

In [None]:
# %% [6] Load Data
try:
    file_path = PROCESSED_DIR / "epa_long_preprocessed.csv"
    df = load_data(file_path)
    logger.info("Data successfully loaded.")
except Exception as e:
    logger.error(f"Error loading data: {e}")
    raise

In [None]:
# %% [7] Basic Info
try:
    basic_info_dict = basic_info(df)
    logger.info("Basic info generated successfully.")
    display(basic_info_dict)
except Exception as e:
    logger.error(f"Error generating basic info: {e}")

In [None]:
# %% [8] Missing Values Analysis
try:
    missing_values(df, save=False)  # set save=True and provide save_path if needed
    logger.info("Missing values analyzed successfully.")
except Exception as e:
    logger.error(f"Error analyzing missing values: {e}")

In [None]:
# %% [9] Distribution Analysis
try:
    numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    logger.info(f"Numeric columns identified: {numeric_cols}")
    distribution_analysis(df, numeric_cols, save=False)  # set save=True and provide save_dir if needed
except Exception as e:
    logger.error(f"Error in distribution analysis: {e}")

In [None]:
# %% [10] Correlation Analysis
try:
    correlation_analysis(df, numeric_cols, save=False)  # set save=True and provide save_dir if needed
    logger.info("Correlation analysis completed successfully.")
except Exception as e:
    logger.error(f"Error in correlation analysis: {e}")

In [None]:
# %% [11] Outlier Detection
try:
    detect_outliers(df, numeric_cols, save=False)  # set save=True and provide save_dir if needed
    logger.info("Outlier detection completed successfully.")
except Exception as e:
    logger.error(f"Error in outlier detection: {e}")

# %% [12] EDA Notebook Completed
logger.info("=== EDA Notebook Completed ===")