In [2]:
from shipment.exception import HousingException, ShipmentException
from shipment.logger import logging
from shipment.entity.config_entity import DataTransformationConfig
from shipment.entity.artifact_entity import DataIngestionArtifact,DataValidationArtifact, \
    DataTransformationArtifact
from shipment.constant import *
from shipment.util.util import read_yaml_file,save_object,save_numpy_array_data,load_data

from sklearn import preprocessing
from sklearn.base import BaseEstimator,TransformerMixin
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer

import numpy as np
import pandas as pd
import os,sys

ModuleNotFoundError: No module named 'shipment'

In [None]:

class FeatureGenerator(BaseEstimator, TransformerMixin):

    def __init__(self,columns):
        """
        FeatureGenerator Initialization
        Scheduled Delivery Date: columns of type datetime  in the dataset
        Delivered to Client Date: columns of type datetime  in the dataset

        Genrated Feature
        late_days_between_delivery_scheduled : subtraction of  Delivered to Client Date and Scheduled Delivery Date
        """
        try:
            if SCHEDULED_DELIVERY_DATE_KEY in columns:
                self.scheduled_delivery_date = SCHEDULED_DELIVERY_DATE_KEY

            if DELIVERED_TO_CLIENT_DATE_KEY in columns:
                self.delivered_to_client_date = DELIVERED_TO_CLIENT_DATE_KEY

        except Exception as e:
            raise ShipmentException(e, sys) from e

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        try:
            # train and test file path
            df = pd.DataFrame(X)
            logging.info("df columns: ",df.columns)

            #scheduled = df[self.scheduled_delivery_date].apply(lambda x: pd.to_datetime(x,errors="coerce"))
            #delivered = df[self.delivered_to_client_date].apply(lambda x: pd.to_datetime(x,errors="coerce"))

            #df["late_days_between_delivery_scheduled"] = delivered - scheduled
            #df["late_days_between_delivery_scheduled"] = df['late_days_between_delivery_scheduled'].apply(lambda x:str(x).split(" ")[0]).astype('float')
                                                                                         
        except Exception as e:
            raise ShipmentException(e, sys) from e


class DataTransformation:

    def __init__(self, data_transformation_config: DataTransformationConfig,
                 data_ingestion_artifact: DataIngestionArtifact,
                 data_validation_artifact: DataValidationArtifact
                 ):
        try:
            logging.info(f"{'>>' * 30}Data Transformation log started.{'<<' * 30} ")
            self.data_transformation_config= data_transformation_config
            self.data_ingestion_artifact = data_ingestion_artifact
            self.data_validation_artifact = data_validation_artifact

        except Exception as e:
            raise ShipmentException(e,sys) from e


    def get_data_transformer_object(self) -> ColumnTransformer:
        try:
            schema_file_path = self.data_validation_artifact.schema_file_path
            
            dataset_schema = read_yaml_file(file_path=schema_file_path)

            numerical_columns = dataset_schema[DATASET_NUMERICAL_COLUMNS_KEY]
            categorical_columns = dataset_schema[DATASET_CATEGORICAL_COLUMNS_KEY]
            datetime_columns = dataset_schema[DATASET_DATETIME_COLUMNS_KEYS]

            num_pipeline = Pipeline(steps=[
                ('imputer',SimpleImputer(strategy="median")),
                ('feature_generator',FeatureGenerator(
                    columns=datetime_columns
                )),
                ('scaler',StandardScaler())
            ])

            cat_pipeline = Pipeline(steps=[
                ('imputer',SimpleImputer(strategy="most_frequent")),
                ('one_hot_encoder',OneHotEncoder()),
                ('scaler',StandardScaler(with_mean=False))
            ])

            logging.info(f"Categorical columns : {categorical_columns}")
            logging.info(f"Numerical columns : {numerical_columns}")

            preprocessing = ColumnTransformer([
                ('num_pipeline',num_pipeline,numerical_columns),
                ('cat_pipeline',cat_pipeline,categorical_columns)
            ])

            return preprocessing
        except Exception as e:
            raise ShipmentException(e,sys) from e
