In [2]:
import os 

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\zeyne\\Containerized-Bank-Customer-Churn-Prediction'

In [5]:
from dataclasses import dataclass
from pathlib import Path

In [18]:
@dataclass (frozen=True)
class DataTransformationConfig:
    root_dir : Path
    data_path: Path
    parameters : dict[str,any]
   
   

In [7]:
from src.Containerized_Bank_Customed_Churn_Prediction.constants import *
from src.Containerized_Bank_Customed_Churn_Prediction.utils.common import read_yaml,create_directories

In [19]:
class ConfigurationManager:
 def __init__(self,config_filepath = CONFIG_FILE_PATH,params_filepath = PARAMS_FILE_PATH,schema_filepath = SCHEMA_FILE_PATH):

    self.config = read_yaml(config_filepath)
    self.params = read_yaml(params_filepath)
    self.schema = read_yaml(schema_filepath)

    create_directories([self.config.artifacts_root])

 def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation

        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir = config.root_dir,
            data_path = config.data_path,
            parameters = config.parameters,
            
        )

        return data_transformation_config



In [9]:
import os 
from src.Containerized_Bank_Customed_Churn_Prediction import logger
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier



In [10]:
import joblib

In [22]:
class DataTransformation:
    def __init__(self,config:DataTransformationConfig):
        self.config = config
        

    def prep_for_pipeline(self):
        data = pd.read_csv(self.config.data_path)

        df = data.copy()
        
        y = df['churn']

        df.drop(columns=['churn','customer_id'],inplace=True)

        X = df

        num_cols = X.select_dtypes(include=['int64','float64']).columns
        cat_cols = X.select_dtypes(include=['object']).columns

        return X,num_cols,cat_cols,y


    def num_cat_pipelines(self,num_cols,cat_cols):

        num_pipe = Pipeline(steps=[
            ('imputer',SimpleImputer(strategy='mean')),
            ('StandardScaler',StandardScaler())
        ])
        
        cat_pipe = Pipeline(steps=[
            ('imputer',SimpleImputer(strategy='most_frequent')),
            ('one-hot',OneHotEncoder(handle_unknown='ignore'))
        ])

        preprocessor=ColumnTransformer(
            transformers=[
                ('num',num_pipe,num_cols),
                ('cat',cat_pipe,cat_cols)
            ]
        )
        joblib.dump(preprocessor, os.path.join(self.config.root_dir, "preprocessor.pkl"))

        return preprocessor
    
    
      
    def train_test_split(self,X,y):
            
            X_train, X_test, y_train, y_test = train_test_split(
                 X, y, test_size=0.2, random_state=42, stratify=y
                )

            train = pd.concat([X_train, y_train], axis=1)
            test = pd.concat([X_test, y_test], axis=1)
       
            train.to_csv(os.path.join(self.config.root_dir,'train.csv'), index = False)
            test.to_csv(os.path.join(self.config.root_dir, 'test.csv'), index = False)

            logger.info(f"Split işlemi yapıldı")
            print(train.shape)
            print(test.shape)

            





In [23]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transformation_config)
    X, num_cols, cat_cols, y = data_transformation.prep_for_pipeline()
    preprocessor=data_transformation.num_cat_pipelines(num_cols,cat_cols)
    data_transformation.train_test_split(X,y)

    
except Exception as e:
    raise e

[2025-08-06 22:22:02,840:INFO: common: yaml file:config\config.yaml loaded successfully]
[2025-08-06 22:22:02,842:INFO: common: yaml file:params.yaml loaded successfully]
[2025-08-06 22:22:02,846:INFO: common: yaml file:schema.yaml loaded successfully]
[2025-08-06 22:22:02,846:INFO: common: created directory at:artifacts]
[2025-08-06 22:22:02,849:INFO: common: created directory at:artifacts/data_transformation]
[2025-08-06 22:22:02,919:INFO: 2274172490: Split işlemi yapıldı]
(8000, 11)
(2000, 11)
