In [None]:
# %% [markdown]
# # Feature Engineering Analysis
#
# Bu notebook, `feature_engineering` modülünü kullanarak veri setiniz üzerinde özellik mühendisliği adımlarını gerçekleştirir ve sonuçları analiz eder.
#
# **Adımlar:**
# 1. Veri Yükleme
# 2. Özellik Ölçeklendirme
# 3. Etkileşim Terimleri Oluşturma
# 4. Özellik Mühendisliği Sonrası Analiz
# 5. Sonuçları Kaydetme

In [None]:
# %%
import sys
import logging
import pandas as pd
from pathlib import Path
from IPython.display import display

In [None]:
# %% [1] Imports
from source.feature_engineering import (
    scale_features,
    create_interaction_terms
)
from source.utils.config_loader import load_config
from source.utils.logger import setup_logger

In [None]:

# %% [2] Logging Setup
# Define ANSI escape codes for green color
GREEN = "\033[92m"
RESET = "\033[0m"

In [None]:
# Custom logging formatter to include green color
class CustomFormatter(logging.Formatter):
    def format(self, record):
        log_msg = super().format(record)
        return f"{GREEN}{log_msg}{RESET}"

In [None]:

# Configure logging with the custom formatter
handler = logging.StreamHandler()
handler.setFormatter(CustomFormatter("%(asctime)s [%(levelname)s] %(message)s"))
logging.basicConfig(level=logging.INFO, handlers=[handler])

In [None]:

# %% [3] Add Source to Sys Path
source_path = Path("../03-source").resolve()
if str(source_path) not in sys.path:
    sys.path.append(str(source_path))


In [None]:
# %% [5] Load Configuration
CONFIG_PATH = Path("../00-config/settings.yml").resolve()
config = load_config(CONFIG_PATH)

if config is None:
    logging.error("Failed to load configuration. Terminating notebook.")
    sys.exit(1)

RAW_DIR = Path(config["paths"]["raw_dir"]).resolve()
PROCESSED_DIR = Path(config["paths"]["processed_dir"]).resolve()
LOG_DIR = Path(config["paths"].get("logs_dir", "../04-logs")).resolve()
PLOTS_DIR = Path(config["paths"].get("plots_dir", "../06-plots")).resolve()

In [None]:
# Setup logger
logger = setup_logger(
    name="feature_engineering_notebook",
    log_file=LOG_DIR / "feature_engineering_notebook.log",
    log_level=config.get("logging", {}).get("level", "INFO").upper()
)

logger.info("=== Feature Engineering Notebook Initialized ===")

In [None]:

# %% [6] Load Data
try:
    file_path = PROCESSED_DIR / "epa_long_preprocessed.csv"
    df = pd.read_csv(file_path)
    logger.info(f"Data successfully loaded from {file_path}")
    display(df.head())
except Exception as e:
    logger.error(f"Error loading data: {e}")
    raise

In [None]:

# %% [7] Feature Scaling
try:
    # Örneğin, 'feature1' ve 'feature2' sütunları varsa
    features_to_scale = ["feature1", "feature2"]  # Bu sütunları config dosyanızdan alabilirsiniz
    logger.info(f"Scaling features: {features_to_scale} using 'standard' method")
    df_scaled = scale_features(df, cols=features_to_scale, method="standard")
    display(df_scaled[features_to_scale].head())
except Exception as e:
    logger.error(f"Error in feature scaling: {e}")
    raise

In [None]:
# %% [8] Create Interaction Term
try:
    logger.info("Creating interaction term between 'feature1' and 'feature2'")
    interaction_term = create_interaction_terms(df_scaled, col1="feature1", col2="feature2", operation="multiply")
    df_scaled["feature1_feature2_multiply"] = interaction_term
    display(df_scaled[["feature1", "feature2", "feature1_feature2_multiply"]].head())
except Exception as e:
    logger.error(f"Error in creating interaction term: {e}")
    raise

In [None]:

# %% [9] Feature Engineering Sonrası Analiz
try:
    logger.info("Generating basic info after feature engineering")
    basic_info_dict = {
        "Shape": df_scaled.shape,
        "Columns": df_scaled.columns.tolist(),
        "Data Types": df_scaled.dtypes.to_dict(),
        "Missing Values": df_scaled.isnull().sum().to_dict()
    }
    display(basic_info_dict)
except Exception as e:
    logger.error(f"Error generating basic info after feature engineering: {e}")


In [None]:
# %% [10] Save the Engineered Features
try:
    output_path = PROCESSED_DIR / "epa_features.csv"
    df_scaled.to_csv(output_path, index=False)
    logger.info(f"Feature engineered data saved to {output_path}")
except Exception as e:
    logger.error(f"Error saving feature engineered data: {e}")
    raise

# %% [11] Feature Engineering Notebook Completed
logger.info("=== Feature Engineering Notebook Completed ===")