In [3]:
import os
import pandas as pd

In [4]:
%pwd

'/Users/main/Desktop/LSTM_Forecast/research'

In [5]:
os.chdir('../')

In [6]:
%pwd

'/Users/main/Desktop/LSTM_Forecast'

In [7]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataCleaningConfig:
    root_dir: Path
    data_path: Path

In [8]:
from TimeSeriesForecast.constants import *
from TimeSeriesForecast.utils.common import read_yaml,create_directories


In [9]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath= CONFIG_FILE_PATH,
        params_filepath= PARAMS_FILE_PATH,
        schema_filepath= SCHEMA_FILE_PATH):
        
        self.config= read_yaml(config_filepath)
        self.params= read_yaml(params_filepath)
        self.schema= read_yaml(schema_filepath)
        
        create_directories([self.config.artifacts_root])

    def get_data_cleaning_config(self)->DataCleaningConfig:
        config= self.config.data_cleaning
        
        create_directories([config.root_dir])
        
        data_cleaning_config= DataCleaningConfig(
            root_dir= config.root_dir,
            
            data_path= config.data_path)

        return data_cleaning_config

In [10]:
from TimeSeriesForecast import logger

In [None]:
class DataCleaning:
    def __init__(self, config: DataCleaningConfig):
        self.config = config
        self.df = None

    def read_and_prepare_data(self):
        try:
            self.df = pd.read_csv(self.config.data_path)
            self.df['Date'] = pd.to_datetime(self.df['Date'])
            self.df.sort_values(by='Date', inplace=True)
            self.df.reset_index(drop=True, inplace=True)
            self.df.set_index('Date', inplace=True)
            return self
        except Exception as e:
            raise Exception(f"Error in read_and_prepare_data: {str(e)}")

    def clean_data(self):
        try:
            self.df.columns = self.df.columns.str.strip()
            for column in self.df.columns:
                if self.df[column].dtype == 'object' and column != 'Date':
                    self.df[column] = self.df[column].str.replace('$', '', regex=False).astype(float)
            if 'Close/Last' in self.df.columns:
                self.df.rename(columns={'Close/Last': 'Value'}, inplace=True)
            return self
        except Exception as e:
            raise Exception(f"Error in clean_data: {str(e)}")

    def filter_data(self):
        try:
            self.df = self.df[['Value']]
            self.df = self.df.loc['2023':'2023'].copy()
            return self
        except Exception as e:
            raise Exception(f"Error in filter_data: {str(e)}")

    def transform_data(self):
        try:
            for i in range(1, 1 + 1):
                self.df[f'Value (t-{i})'] = self.df['Value'].shift(i)
            self.df.dropna(inplace=True)
            self.df = self.df.asfreq('D')
            self.df.ffill(inplace=True)

            output_path = os.path.join(self.config.root_dir, 'data.csv')
            self.df.to_csv(output_path, index=False)
            
            logger.info('Train and test data prepared and saved.')
            logger.info(f'Data shape after transformation: {self.df.shape}')

            return self.df
        except Exception as e:
            raise Exception(f"Error in transform_data: {str(e)}")

try:
    config= ConfigurationManager()
    data_transformation_config= config.get_data_cleaning_config()
    data_transformation= DataCleaning(config=data_transformation_config)
    data_transformation.read_and_prepare_data()
    data_transformation.rename_clean_data()
    data_transformation.loc_data()
    data_transformation.lag_transform_data()
except Exception as e:
    raise e