In [2]:
import os

In [3]:
os.chdir("../")

In [4]:
os.getcwd()

'/home/ajana/Code/MLProjects/Heart-Disease-Detection'

In [5]:
from heart.components.data_ingesion import DataIngestion
from heart.entity.artifact_entity import DataIngestionArtifact
from heart.entity.config_entity import TrainingPipelineConfig,DataIngestionConfig
from heart.constant.training_pipeline import TARGET_COLUMN,SCHEMA_FILE_PATH
from heart.utils import read_yaml_file



In [30]:
training_config = TrainingPipelineConfig()

In [31]:
data_config = DataIngestionConfig(training_pipeline_config=training_config)

In [32]:
ingestion = DataIngestion(data_config)

In [33]:
ingestion.read_data_from_feature_store()

In [38]:
dframe = pd.read_csv(data_config.feature_store_file_path)

In [42]:
dframe.columns

Index(['age', 'gender', 'impluse', 'pressurehight', 'pressurelow', 'glucose',
       'kcm', 'troponin', 'class'],
      dtype='object')

In [44]:
df = pd.read_csv("/home/ajana/Code/MLProjects/Heart-Disease-Detection/artifact/12_25_2023_19_03_57/data_ingestion/feature_store/Heart Attack.csv")

In [46]:
df.columns()

TypeError: 'Index' object is not callable

In [6]:
_schema_config = read_yaml_file(file_path=SCHEMA_FILE_PATH)


In [9]:
cat = _schema_config["categorical_column"]

In [10]:
cat

['gender']

In [12]:
import os,sys
import pandas as pd
import numpy as np

from imblearn.combine import SMOTETomek
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import RobustScaler
from sklearn.compose import ColumnTransformer
from heart.exception import HeartException
from heart.logger import logging
from heart.constant.training_pipeline import TARGET_COLUMN,SCHEMA_FILE_PATH
from heart.entity.artifact_entity import (DataValidationArtifact,
                                          DataTrasformationArtifact)
from heart.entity.config_entity import DataTransformationConfig

from heart.ml.model.estimator import TargetValueMapping
from heart.utils import save_numpy_array_data, save_object
from heart.utils import read_yaml_file


class DataTransformation:
    def __init__(self,
                 data_transformation_config: DataTransformationConfig,
                 data_validation_artifact: DataValidationArtifact):
        try:
            self.data_transformation_config = data_transformation_config
            self.data_validation_artifact = data_validation_artifact
            self._schema_config = read_yaml_file(file_path=SCHEMA_FILE_PATH)
        except Exception as e:
            raise HeartException(e,sys)

    @staticmethod
    def read_data(file_path: str) -> pd.DataFrame:
        try:
            return pd.read_csv(file_path)
        except Exception as e:
            raise HeartException(e,sys)
    
    def get_data_transformer_object(self) -> ColumnTransformer:
        try:
            numerical_cols = self._schema_config["numerical_columns"],
            categorical_cols = self._schema_config["categorical_columns"]
            numerical_transformer = Pipeline(steps=[
                ('imputer', SimpleImputer(strategy='median')),
                ('scaler', RobustScaler())
            ])

            categorical_transformer = Pipeline(steps=[
                ('imputer', SimpleImputer(strategy='most_frequent'))
            ])
            logging.info(f"Categorical columns: {categorical_cols}")
            logging.info(f"Numerical columns: {numerical_cols}")

            preprocessor = ColumnTransformer(
                transformers=[
                    ('num', numerical_transformer, numerical_cols),
                    ('cat', categorical_transformer, categorical_cols)
                ])

            return preprocessor
        except Exception as e:
            raise HeartException(e,sys)

    def initiate_data_transformation(self) -> DataTrasformationArtifact:
        try:
            # logging.info("Initiating data transformation")
            print (self._schema_config["numerical_columns"])
            # preprocessor_obj = self.get_data_transformer_object()
            # print (preprocessor_obj)
            # logging.info("Data transformation object created")
            train_df = DataTransformation.read_data(self.data_validation_artifact.valid_train_file_path)  
            test_df = DataTransformation.read_data(self.data_validation_artifact.valid_test_file_path)
            print (self.data_validation_artifact.valid_train_file_path)
            input_feature_train_df = train_df.drop(columns=[TARGET_COLUMN],axis=1)
            target_feature_train_df = train_df[TARGET_COLUMN]
            target_feature_train_df = target_feature_train_df.repalce(TargetValueMapping()).to_dict()
            logging.info("Got train features and test features of Training dataset")
            input_feature_test_df = test_df.drop(columns=[TARGET_COLUMN],axis=1)
            target_feature_test_df = test_df[TARGET_COLUMN]
            target_feature_test_df = target_feature_test_df.repalce(TargetValueMapping()).to_dict()
            logging.info("Got train features and test features of Testing dataset")

            logging.info(
                "Applying preprocessing object on training dataframe and testing dataframe"
            )

            input_feature_train_arr = preprocessor_obj.fit_transform(input_feature_train_df)
            logging.info(
                "Used the preprocessor object to fit transform the train features"
            )
            input_feature_test_arr = preprocessor_obj.transform(input_feature_test_df)
            logging.info("Used the preprocessor object to transform the test features")

            logging.info("Applying SMOTETomek on Training dataset")
            smt = SMOTETomek(sampling_strategy="minority",random_state=42)
            input_feature_train_arr, target_feature_train_df = smt.fit_resample(input_feature_train_arr, target_feature_train_df)   
            logging.info("Applied SMOTETomek on testing dataset")

            logging.info("Created train array and test array")

            train_arr = np.c_[input_feature_train_arr, np.array(target_feature_train_df)]
            test_arr = np.c_[input_feature_test_arr, np.array(target_feature_test_df)]
            logging.info("Data transformation completed")

            save_object(
                file_path=self.data_transformation_config.transformed_object_file_path,
                obj=preprocessor_obj
            )

            logging.info("Saving train array and test array")
            save_numpy_array_data(
                file_path=self.data_transformation_config.transformed_train_file_path,
                array=train_arr
            )
            save_numpy_array_data(
                file_path=self.data_transformation_config.transformed_test_file_path,
                array=test_arr
            )

            logging.info("Saved train array and test array")
            logging.info("Creating DataTransformationArtifact")
            data_transformation_artifact =  DataTrasformationArtifact(
                transformed_object_file_path=self.data_transformation_config.transformed_object_file_path,
                transformed_train_file_path=self.data_transformation_config.transformed_train_file_path,
                transformed_test_file_path=self.data_transformation_config.transformed_test_file_path,
            )
            return data_transformation_artifact
        except Exception as e:
            HeartException(e,sys)

In [13]:
from heart.exception import HeartException
from heart.logger import logging
from heart.components.data_ingesion import DataIngestion
from heart.components.data_validation import DataValidation
from heart.components.data_transformation import DataTransformation
from heart.entity.artifact_entity import DataIngestionArtifact,DataValidationArtifact,DataTrasformationArtifact
from heart.entity.config_entity import TrainingPipelineConfig,DataIngestionConfig,DataValidationConfig,DataTransformationConfig
import sys


class TrainingPipeline:

    def __init__(self,training_config: TrainingPipelineConfig):
        self.training_config = training_config
        self.data_ingestion_config = DataIngestionConfig(training_pipeline_config=self.training_config)
        self.data_validation_config = DataValidationConfig(training_pipeline_config=self.training_config)
        self.data_transformation_config = DataTransformationConfig(training_pipeline_config=self.training_config)


    def start_data_ingestion(self) -> DataIngestionArtifact:
        try:
            data_ingestion_config = self.data_ingestion_config
            data_ingestion = DataIngestion(data_ingestion_config=data_ingestion_config)
            data_ingestion_artifact = data_ingestion.initiate_data_ingestion()
            logging.info(
                "Exited the start_data_ingestion method of TrainPipeline class"
            )
            return data_ingestion_artifact

        except Exception as e:
            raise HeartException(e, sys)

    def start_data_validation(self, data_ingestion_artifact: DataIngestionArtifact) -> DataValidationArtifact:
        try:
            data_validation_config = self.data_validation_config
            data_validation = DataValidation(data_ingestion_artifact=data_ingestion_artifact,
                                             data_validation_config=data_validation_config)
            data_validation_artifact = data_validation.initiate_data_validation()
            logging.info(
                "Exited the start_data_validation method of TrainPipeline class"
            )
            return data_validation_artifact
        except Exception as e:
            raise HeartException(e, sys)
        
    def start_data_transformation(self, data_validation_artifact: DataValidationArtifact) -> DataTrasformationArtifact:
        try:
            data_transformation_config = self.data_transformation_config
            data_tranformation = DataTransformation(data_transformation_config=data_transformation_config,
                                                    data_validation_artifact=data_validation_artifact) 
            data_tranformation_artifact = data_tranformation.initiate_data_transformation()
            return data_tranformation_artifact
        except Exception as e:
            HeartException(e, sys)


    def start(self):
        try:
            data_ingestion_artifact = self.start_data_ingestion()
            data_validation_artifact = self.start_data_validation(data_ingestion_artifact=data_ingestion_artifact)
            data_transformation_artifact = self.start_data_transformation(data_validation_artifact=data_validation_artifact)
            # model_trainer_artifact = self.start_model_trainer(data_transformation_artifact)
            # model_eval_artifact = self.start_model_evaluation(data_validation_artifact, model_trainer_artifact)
        except Exception as e:
            raise HeartException(e, sys)

In [14]:
from heart.pipeline.training_pipeline import TrainingPipeline
from heart.entity.config_entity import TrainingPipelineConfig

if __name__=="__main__":
    training_config= TrainingPipelineConfig()
    training_pipeline = TrainingPipeline(training_config=training_config)
    training_pipeline.start()


