In [1]:
import os

In [2]:
%pwd

'/Users/harendrakumar/Documents/Demand_forecast/research'

In [3]:
os.chdir("../")
%pwd

'/Users/harendrakumar/Documents/Demand_forecast'

In [4]:
from dataclasses import dataclass
from pathlib import Path

In [5]:
@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    data_path: Path

In [6]:
from Demand_Forecast.constants import *
from Demand_Forecast.utils.common import read_yaml, create_directories

In [7]:
class ConfigurationManager:
    def __init__(self, 
                 config_filepath = CONFIG_FILE_PATH,
                 schema_filepath = SCHEMA_FILE_PATH,
                 params_filepath = PARAMS_FILE_PATH) ->None:
        self.config = read_yaml(config_filepath)
        self.schema = read_yaml(schema_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifact_roots])

    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation

        create_directories([config.root_dir])
        return DataTransformationConfig(
            root_dir= config.root_dir,
            data_path= config.data_path
        )

In [8]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from Demand_Forecast import logger

[2023-10-21 15:48:43,445: INFO: utils: NumExpr defaulting to 8 threads.]


In [9]:
class DataTransforamtion:
    def __init__(self, config: DataTransformationConfig) ->None:
        self.config = config
    
    def convert_dtypes(self,data: pd.DataFrame, column_name: str, new_dtypes: str) ->pd.DataFrame:
        """
        Convert the data type of a specific column in the DataFrame.

        Args:
            data (pd.DataFrame): The DataFrame containing the data.
            column_name (str): The name of the column to be converted.
            new_data_type (str): The desired data type for the column.
                Supported types: 'str', 'int', 'float', 'datetime'.

        Returns:
            pd.DataFrame: The DataFrame with the converted column.
        """
        if new_dtypes == "str":
            data[column_name] = data[column_name].astype(str)
            logger.info(f"Data type conversion of {column_name} to {new_dtypes} completed Successfully")
        elif new_dtypes == "int64":
            data[column_name] = data[column_name].astype("int64")
            logger.info(f"Data type conversion of {column_name} to {new_dtypes} completed Successfully")
        elif new_dtypes == "float64":
            data[column_name] = data[column_name].astype("float64")
            logger.info(f"Data type conversion of {column_name} to {new_dtypes} completed Successfully")
        elif new_dtypes == "datetime":
            data[column_name] = pd.to_datetime(data[column_name], errors='coerce')
            logger.info(f"Data type conversion of {column_name} to {new_dtypes} completed Successfully")
        else:
            logger.info(f"Data type conversion of {column_name} to {new_dtypes} could not be completed")
            raise ValueError("Unsupported data type conversion")
        
        return data
            
        
    
    def train_test_split(self) ->None:
        data= pd.read_csv(self.config.data_path)
        data = self.convert_dtypes(data=data, column_name='Date', new_dtypes='datetime')
        data = data[['Date', 'Product_ID', 'Demand', 'Inventory']]
        # data = data.set_index('Date')
        # data.sort_index(ascending=True, inplace=True)
        train, test = train_test_split(data, shuffle=False)
        train.to_csv(os.path.join(self.config.root_dir, "train.csv"), index=True)
        test.to_csv(os.path.join(self.config.root_dir, "test.csv"), index=True)

        logger.info("Splitted data into training and test data")
        logger.info(train.shape)
        logger.info(test.shape)

In [10]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransforamtion(config=data_transformation_config)
    data_transformation.train_test_split()
except Exception as e:
    logger.info(e)
    raise e

[2023-10-21 15:48:45,086: INFO: common: yaml file: config/config.yaml loaded successfully]
[2023-10-21 15:48:45,089: INFO: common: yaml file: schema.yaml loaded successfully]
[2023-10-21 15:48:45,091: INFO: common: yaml file: params.yaml loaded successfully]
[2023-10-21 15:48:45,092: INFO: common: created directory at: artifacts]
[2023-10-21 15:48:45,092: INFO: common: created directory at: artifacts/data_transformation]
[2023-10-21 15:48:45,097: INFO: 1336587283: Data type conversion of Date to datetime completed Successfully]
[2023-10-21 15:48:45,102: INFO: 1336587283: Splitted data into training and test data]
[2023-10-21 15:48:45,103: INFO: 1336587283: (46, 4)]
[2023-10-21 15:48:45,104: INFO: 1336587283: (16, 4)]
