In [1]:
import os

In [2]:
%pwd

'c:\\Users\\SACHIN\\PycharmProjects\\Obesity-Risk-\\notebook'

In [3]:
os.chdir('../')

In [4]:
%pwd

'c:\\Users\\SACHIN\\PycharmProjects\\Obesity-Risk-'

In [5]:
from dataclasses import dataclass
import os
from pathlib import Path

@dataclass(frozen= True)
class DataTransformationConfig:
    root : Path
    train_set : Path
    test_set: Path
    preprocessing_obj: Path


In [6]:
from src.ObesityRisk.constants import *
from src.ObesityRisk.utils.common import read_yaml, create_directories

In [7]:
class ConfigurationsManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_file])

    
    def get_transformation_config(self)-> DataTransformationConfig:
        config = self.config.data_transformation

        create_directories([config.root])
        create_directories([config.train_set])
        create_directories([config.test_set])
        #create_directories([config.preprocessing_obj])

        base_model_config = DataTransformationConfig(
            root = Path(config.root),
            train_set = Path(config.train_set),
            test_set = Path(config.test_set),
            preprocessing_obj = Path(config.preprocessing_obj)
        )

        return base_model_config

In [8]:
import os
import sys

import pandas as pd
import numpy as np
from pathlib import Path

from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

from ObesityRisk.utils.common import *
from ObesityRisk import logger
from ObesityRisk.constants import *

In [9]:

class PrepareTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config

    def data_split(self,
                csv_file_path = CSV_FILE_PATH,
                train_set_path = TRAIN_SET_PATH,
                test_set_path = TEST_SET_PATH,
                ):      
         
        self.csv = read_csv(csv_file_path)
        self.train = train_set_path
        self.test = test_set_path
        
        train_df, test_df = train_test_split(self.csv, test_size=0.2, random_state=42)

        train_df.to_csv(self.train)
        test_df.to_csv(self.test)
        logger.info(f'{train_df} and {test_df} split completed')

        return self.train, self.test


    def get_data_transformation_object(self,
                    csv_file_path = CSV_FILE_PATH):

        self.csv = read_csv(csv_file_path)
        numeric_features = self.csv.select_dtypes(include = [int, float]).columns.drop(['id'])
        categorical_features = self.csv.select_dtypes(include = object).columns.drop(['NObeyesdad'])


        numeric_pipeline = Pipeline(
            steps = [
                ('impiter', SimpleImputer(strategy = 'mean')),
                ('scaler', StandardScaler())
                ]
            )

        categorical_pipeline = Pipeline(
            steps = [
                ('imputer', SimpleImputer(strategy = 'most_frequent')),
                ('onehot', OneHotEncoder(handle_unknown = 'ignore')),
                ('scaler', StandardScaler(with_mean = False))
                ]
            )

        logger.info(f'numeric columns: {numeric_features} ')
        logger.info(f'categorical columns: {categorical_features} ')


        preprocessor = ColumnTransformer(
            [
                ('num_pipeline',numeric_pipeline, numeric_features),
                ('cat_pipeline',categorical_pipeline, categorical_features),
            ]
            )

        return preprocessor


    def initiate_data_transformation(self,
                                train_data, test_data, 
                                preprocessing_obj = PREPROCESSOR_PATH,
                                ):

        train_data = read_csv(TRAIN_SET_PATH)
        test_data = read_csv(TEST_SET_PATH)
        self.preprocessing_obj = preprocessing_obj

        logger.info(f"loading {train_data} and {test_data}")

        target_feature = ["NObeyesdad"]

        input_train_features = train_data.iloc[:,1:-1]
        target_input_train_feature = train_data['NObeyesdad']

        input_test_features = test_data.iloc[:,1:-1]
        traget_input_test_feture = test_data['NObeyesdad']

        logger.info(f'loading preprocessing object')

        preprocessing_obj = self.get_data_transformation_object()

        logger.info(f'applying preprocessing on {input_train_features} and {input_test_features}')

        input_train_array = preprocessing_obj.fit_transform(input_train_features)
        input_test_array = preprocessing_obj.transform(input_test_features)

        train_array = np.c_[input_train_array, np.array(target_input_train_feature)]
        test_array = np.c_[input_test_array, np.array(traget_input_test_feture)]

        logger.info("Saving the prerocessing objest")

        """save_pickle(path = self.preprocessing_obj,
        data = preprocessing_obj) """

        return train_array, test_array


config = ConfigurationsManager()
base_model_config = config.get_transformation_config()
data_transformation = PrepareTransformation(config=base_model_config)
train_data, test_data = data_transformation.data_split()
data_transformation.initiate_data_transformation(train_data, test_data)

[2024-03-01 13:49:27,601: INFO: common: yaml file Config\Config.yaml loaded successfully]
[2024-03-01 13:49:27,606: INFO: common: yaml file params.yaml loaded successfully]
[2024-03-01 13:49:27,610: INFO: common: creating directory at artifacts]
[2024-03-01 13:49:27,612: INFO: common: creating directory at artifacts/data_transformation]
[2024-03-01 13:49:27,614: INFO: common: creating directory at artifacts/data_transformation/train_set]
[2024-03-01 13:49:27,617: INFO: common: creating directory at artifacts/data_transformation/test_set]
[2024-03-01 13:49:28,347: INFO: 995533174:           id  Gender        Age    Height      Weight  \
9958    9958    Male  17.000000  1.770000   97.000000   
7841    7841    Male  22.667596  1.753389   54.877111   
9293    9293    Male  21.838323  1.819867  122.024954   
15209  15209  Female  41.000000  1.580000   80.000000   
16515  16515    Male  23.000000  1.800000   95.000000   
...      ...     ...        ...       ...         ...   
11284  11284  

(array([[-1.19937194293628, 0.8008164212166569, 0.34352742257698393, ...,
         0.0, 0.0, 'Obesity_Type_I'],
        [-0.21319639748705266, 0.6102426172010293, -1.2566015711622052,
         ..., 2.512337998171704, 0.0, 'Insufficient_Weight'],
        [-0.3574919247443444, 1.3729279066689062, 1.2941543955367325, ...,
         2.512337998171704, 0.0, 'Obesity_Type_II'],
        ...,
        [-0.5033621741036197, -0.9200956612795341, -0.7581002653633301,
         ..., 2.512337998171704, 0.0, 'Overweight_Level_I'],
        [0.36665003693720544, -0.5744791513782161, 0.9110431198983275,
         ..., 2.512337998171704, 0.0, 'Obesity_Type_III'],
        [2.454316026335855, 0.850493416664713, 0.7613411859746818, ...,
         0.0, 0.0, 'Obesity_Type_I']], dtype=object),
 array([[0.36665003693720544, -0.5853438429923757, 0.9107900874145558,
         ..., 2.512337998171704, 0.0, 'Obesity_Type_III'],
        [-1.025369500728115, 0.571361476883831, -0.3022543255259587, ...,
         2.512337998

In [10]:
try:
    config = ConfigurationsManager()
    base_model_config = config.get_transformation_config()
    data_transformation = PrepareTransformation(config=base_model_config)
    train_data, test_data = data_transformation.data_split()
    data_transformation.initiate_data_transformation(train_data, test_data)
except Exception as e:
    raise e

[2024-02-29 21:24:02,773: INFO: common: yaml file Config\Config.yaml loaded successfully]
[2024-02-29 21:24:02,780: INFO: common: yaml file params.yaml loaded successfully]
[2024-02-29 21:24:02,782: INFO: common: creating directory at artifacts]
[2024-02-29 21:24:02,786: INFO: common: creating directory at artifacts/data_transformation]
[2024-02-29 21:24:02,787: INFO: common: creating directory at artifacts/data_transformation/train_set]
[2024-02-29 21:24:02,790: INFO: common: creating directory at artifacts/data_transformation/test_set]


[2024-02-29 21:24:03,470: INFO: 1504628871:           id  Gender        Age    Height      Weight  \
9958    9958    Male  17.000000  1.770000   97.000000   
7841    7841    Male  22.667596  1.753389   54.877111   
9293    9293    Male  21.838323  1.819867  122.024954   
15209  15209  Female  41.000000  1.580000   80.000000   
16515  16515    Male  23.000000  1.800000   95.000000   
...      ...     ...        ...       ...         ...   
11284  11284  Female  18.000000  1.722461   80.442775   
11964  11964    Male  22.000000  1.750000   95.000000   
5390    5390    Male  21.000000  1.620000   68.000000   
860      860  Female  26.000000  1.650125  111.939671   
15795  15795    Male  37.997912  1.774330  107.998815   

      family_history_with_overweight FAVC      FCVC       NCP       CAEC  \
9958                             yes  yes  3.000000  3.000000     Always   
7841                             yes  yes  2.000000  4.000000  Sometimes   
9293                             yes  yes  