In [3]:
import os
import pandas as pd

In [4]:
%pwd

'/Users/main/Desktop/LSTM_Forecast/research'

In [5]:
os.chdir('../')

In [6]:
%pwd

'/Users/main/Desktop/LSTM_Forecast'

In [7]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataCleaningConfig:
    root_dir: Path
    data_path: Path

In [8]:
from TimeSeriesForecast.constants import *
from TimeSeriesForecast.utils.common import read_yaml,create_directories


In [9]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath= CONFIG_FILE_PATH,
        params_filepath= PARAMS_FILE_PATH,
        schema_filepath= SCHEMA_FILE_PATH):
        
        self.config= read_yaml(config_filepath)
        self.params= read_yaml(params_filepath)
        self.schema= read_yaml(schema_filepath)
        
        create_directories([self.config.artifacts_root])

    def get_data_transformation_config(self)->DataCleaningConfig:
        config= self.config.data_transformation
        
        create_directories([config.root_dir])
        
        data_transformation_config= DataCleaningConfig(
            root_dir= config.root_dir,
            
            data_path= config.data_path)

        return data_transformation_config

In [10]:
from sklearn.preprocessing import MinMaxScaler
from copy import deepcopy as dc
import numpy as np
from TimeSeriesForecast import logger

In [None]:
class DataCleaning:
    def __init__(self,config: DataCleaningConfig):
        self.config= config
    def read_and_prepare_data(self):
        try:
            df= pd.read_csv(self.config.data_path)
            df['Date']= pd.to_datetime(df['Date'])
            df.sort_values(by='Date',inplace=True)
            df.sort_index(inplace=True,drop=True)
            df.set_index('Date',inplace=True)
            return df
        except Exception as e:
            raise e
    
    def rename_clean_data(self,df):
        for column in df.column:
            if column != 'Date' and df[column].dtype == 'object':
                df[column]= df[column].str.replace('$','',regex=False).astype(float)
            elif "Close/last" in df.columns:
                df.rename(columns={'Close/Last':'Value'},inplace=True)
            return df
    def loc_data(self,df):
        df= df[['Value']]
        df= df.loc['2023':'2023'].copy()
        return df
    def lag_transform_data(self,df,n_steps):
        try:
            for i in range(1,n_steps+1):
                df[f'Value (t-{i})']= df.iloc[:,0].shift(i)
            
            df.dropna(inplace=True)
            df= df.asfreq('D')
            df.ffill(inplace=True)
            
            split_index= int(len(df*0.90))
            train= df[:split_index]
            test= df[split_index:]
            train.to_csv(os.path.join(self.config.root_dir,'train.csv',index=False))
            test.to_csv(os.path.join(self.config.root_dir,'test.csv',index=False))
            logger.info('Train and test splitted')
            logger.info(f'Train data shape: {train.shape}')
            logger.info(f'Test data shape: {test.shape}')
        except Exception as e:
            raise e

try:
    config= ConfigurationManager()
    data_transformation_config= config.get_data_transformation_config()
    data_transformation= DataCleaning(config=data_transformation_config)
    df= data_transformation.read_and_prepare_data()
    df= data_transformation.rename_clean_data(df)
    df= data_transformation.loc_data(df)
    df= data_transformation.lag_transform_data(df,n_steps=7)
except Exception as e:
    raise e