In [1]:
import os

In [2]:
%pwd

'd:\\New Projects\\Customer Churn Prediction - End to End (ML)\\Customer-Churn-Prediction\\research'

In [3]:
os.chdir('../')

In [4]:
%pwd

'd:\\New Projects\\Customer Churn Prediction - End to End (ML)\\Customer-Churn-Prediction'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataPreprocessingConfig:
    root_dir: Path
    data_path: Path

In [6]:
from CustomerChurn.constants import *
from CustomerChurn.utils.common import read_yaml, create_directories

In [7]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)
        
        create_directories([self.config.artifacts_root])
        
    def get_data_preprocessing_config(self) -> DataPreprocessingConfig:
        config = self.config.data_preprocessing
        
        create_directories([config.root_dir])
        
        data_preprocessing_config = DataPreprocessingConfig(
            root_dir = config.root_dir,
            data_path = config.data_path
        )
        
        return data_preprocessing_config

In [8]:
import pandas as pd
from CustomerChurn import logger

In [9]:
class DataPreprocessing:
    def __init__(self, config: DataPreprocessingConfig):
        self.config = config
        
    def data_cleaning(self) -> pd.DataFrame:
        try:
            data = pd.read_csv(self.config.data_path)
            data.drop(data[data['TotalCharges'] == " "].index, axis=0, inplace=True)
            data['TotalCharges'] = data['TotalCharges'].astype('float')
            data.drop(columns=['customerID'], axis=1, inplace=True)
            
            cleaned_data_path = os.path.join(self.config.root_dir, "cleaned_data.csv")
            data.to_csv(cleaned_data_path, index=False)
            logger.info(f"Cleaned data saved at {cleaned_data_path}")
            
            return data
        
        except Exception as e:
            logger.exception(e)
            raise e

In [10]:
try:
    config = ConfigurationManager()
    data_preprocessing_config = config.get_data_preprocessing_config()
    data_preprocessing = DataPreprocessing(config=data_preprocessing_config)
    data_preprocessing.data_cleaning()
except Exception as e:
    raise e

[2024-09-19 20:03:24,225] 32 CustomerChurnLogger - common - INFO - yaml file: config\config.yaml loaded successfully
[2024-09-19 20:03:24,228] 32 CustomerChurnLogger - common - INFO - yaml file: params.yaml loaded successfully
[2024-09-19 20:03:24,232] 32 CustomerChurnLogger - common - INFO - yaml file: schema.yaml loaded successfully
[2024-09-19 20:03:24,234] 52 CustomerChurnLogger - common - INFO - Created directory at: artifacts
[2024-09-19 20:03:24,236] 52 CustomerChurnLogger - common - INFO - Created directory at: artifacts/data_preprocessing
[2024-09-19 20:03:24,440] 14 CustomerChurnLogger - 2394257494 - INFO - Cleaned data saved at artifacts/data_preprocessing\cleaned_data.csv
