In [1]:
import os
import pandas as pd

In [2]:
%pwd

'/Users/main/Desktop/LSTM_Forecast/research'

In [3]:
os.chdir('../')

In [4]:
%pwd

'/Users/main/Desktop/LSTM_Forecast'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    data_path: Path

In [6]:
from TimeSeriesForecast.constants import *
from TimeSeriesForecast.config.configuration import read_yaml,create_directories

In [7]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_transformation_config(self)-> DataTransformationConfig:
        config= self.config.data_transformation
        
        create_directories([config.root_dir])

        data_transformation_config= DataTransformationConfig(
            root_dir= config.root_dir,
            data_path= config.data_path,
        )
        return data_transformation_config
        

In [8]:
from sklearn.preprocessing import MinMaxScaler
from copy import deepcopy as dc
import numpy as np
import torch
from TimeSeriesForecast import logger


class DataTransformation:
    def __init__(self,config: DataTransformationConfig):
        self.config = config

    def read_and_prepare_data(self):
        try:
            df= pd.read_csv(self.config.data_path)
            df['Date'] = pd.to_datetime(df['Date'])
            df.sort_values(by='Date', inplace=True)
            df.reset_index(inplace=True, drop=True)
            df.set_index('Date', inplace=True)
            return df
        except Exception as e:
            raise e

    def rename_clean_data(self,df):
        for column in df.columns:
            if column != 'Date' and df[column].dtype == 'object':
                df[column] = df[column].str.replace('$', '', regex=False).astype(float)
                
        if 'Close/Last' in df.columns:
            df.rename(columns={'Close/Last':'Value'},inplace=True)       
        return df

    def loc_data(self,df):
        df= df[['Value']]
        df= df.loc['2023':'2024'].copy()
        return df

    def lag_transform_data(self,df,n_steps):
        try:
            for i in range(1,n_steps+1):
                df[f'Value (t-{i})']= df.iloc[:,0].shift(i)

            df.dropna(inplace=True)
            df=df.asfreq('D')
            df.ffill(inplace=True)
            
            split_index= int(len(df) * 0.90)
            train= df[:split_index]
            test= df[split_index:]
            train.to_csv(os.path.join(self.config.root_dir, "train.csv"),index = False)
            test.to_csv(os.path.join(self.config.root_dir, "test.csv"),index = False)
            logger.info("Splitted data into train and test")
            logger.info(f"Train data shape: {train.shape}")
            logger.info(f"Test data shape: {test.shape}")
        except Exception as e:
            raise e

In [9]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transformation_config)
    df= data_transformation.read_and_prepare_data()
    df= data_transformation.rename_clean_data(df)
    df= data_transformation.loc_data(df)
    data_transformation.lag_transform_data(df,n_steps=7)
except Exception as e:
    raise e

[2024-07-24 12:32:45,525:INFO:common:yaml file: config/config.yaml loaded successfully]
[2024-07-24 12:32:45,527:INFO:common:yaml file: params.yaml loaded successfully]
[2024-07-24 12:32:45,529:INFO:common:yaml file: schema.yaml loaded successfully]
[2024-07-24 12:32:45,530:INFO:common:created directory at: artifacts]
[2024-07-24 12:32:45,531:INFO:common:created directory at: artifacts/data_transformation]
[2024-07-24 12:32:45,562:INFO:662788182:Splitted data into train and test]
[2024-07-24 12:32:45,562:INFO:662788182:Train data shape: (453, 8)]
[2024-07-24 12:32:45,563:INFO:662788182:Test data shape: (51, 8)]


In [None]:

# data_array= df.to_numpy()
# scaler= MinMaxScaler(feature_range=(-1,1))
# transformed= scaler.fit_transform(data_array)

# X= transformed[:,1:]
# y= transformed[:,0]

# X= dc(np.flip(X,axis=1))
# split_index= int(len(X) * 0.90)
# X_train= X[:split_index]
# X_test= X[split_index:]
# y_train= y[:split_index]
# y_test= y[split_index:]
# X_train= X_train.reshape((-1,7,1))
# X_test= X_test.reshape((-1,7,1))
# y_train= y_train.reshape((-1,1))
# y_test= y_test.reshape((-1,1))
# X_train= torch.tensor(X_train).float()
# X_test= torch.tensor(X_test).float()
# y_train= torch.tensor(y_train).float()
# y_test= torch.tensor(y_test).float()
# except Exception as e:
# raise e