In [2]:
import os

In [3]:
%pwd

'd:\\Projects\\Stock_Price_Prediction_Project\\research'

In [4]:
os.chdir("../")

In [5]:
%pwd

'd:\\Projects\\Stock_Price_Prediction_Project'

In [16]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    apple_data_dir: Path
    amazon_data_dir: Path
    google_data_dir: Path
    microsoft_data_dir: Path
    transformed_apple_data_dir: Path
    transformed_amazon_data_dir: Path
    transformed_google_data_dir: Path
    transformed_microsoft_data_dir: Path

In [9]:
from smPredictor.constants import *
from smPredictor.utils.common import read_yaml, create_directories

In [17]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath=CONFIG_FILE_PATH,
        params_filepath=PARAMS_FILE_PATH
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])
        
    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation
        
        create_directories([config.root_dir])
        
        data_transformation_config = DataTransformationConfig(
            root_dir=config.root_dir,
            apple_data_dir=config.apple_data_dir,
            amazon_data_dir=config.apple_data_dir,
            google_data_dir=config.apple_data_dir,
            microsoft_data_dir=config.apple_data_dir,
            transformed_apple_data_dir= config.transformed_apple_data_dir,
            transformed_amazon_data_dir=config.transformed_amazon_data_dir,
            transformed_google_data_dir=config.transformed_google_data_dir,
            transformed_microsoft_data_dir=config.transformed_microsoft_data_dir
            
        )
        
        return data_transformation_config

In [27]:
import pandas as pd
from smPredictor import logger
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import os



In [28]:
class DataTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config

    def transform_data(self):
        # Transformations for Apple data
        apple_data = pd.read_csv(self.config.apple_data_dir)
        apple_data = apple_data.iloc[1:]
        apple_data.rename(columns={"Price": "Date"}, inplace=True)
        apple_data = apple_data.drop(apple_data.index[0])

        # Keep only 'Close' column and scale the data
        apple_data = apple_data.filter(['Close'])
        apple_dataset = apple_data.values
        apple_training_data_len = int(np.ceil(len(apple_dataset) * 0.95))
        scaler = MinMaxScaler(feature_range=(0, 1))
        apple_scaled_data = scaler.fit_transform(apple_dataset)
        apple_train_data = apple_scaled_data[:apple_training_data_len]

        # Convert to DataFrame
        apple_train_data_df = pd.DataFrame(apple_train_data)

        # Ensure output directory exists
        os.makedirs(self.config.transformed_apple_data_dir, exist_ok=True)

        # Save the transformed Apple data
        apple_train_data_df.to_csv(
            os.path.join(self.config.transformed_apple_data_dir, "transformed_apple.csv"), 
            index=False
        )
        logger.info("Transformed Apple Data saved successfully")
        print(f"Apple data shape: {apple_train_data_df.shape}")

        # Repeat for other datasets (Amazon, Google, Microsoft)
        for company, data_dir, save_dir, save_name in [
            ("Amazon", self.config.amazon_data_dir, self.config.transformed_amazon_data_dir, "transformed_amazon.csv"),
            ("Google", self.config.google_data_dir, self.config.transformed_google_data_dir, "transformed_google.csv"),
            ("Microsoft", self.config.microsoft_data_dir, self.config.transformed_microsoft_data_dir, "transformed_microsoft.csv"),
        ]:
            data = pd.read_csv(data_dir)
            data = data.iloc[1:]
            data.rename(columns={"Price": "Date"}, inplace=True)
            data = data.drop(data.index[0])
            data = data.filter(['Close'])
            dataset = data.values
            training_data_len = int(np.ceil(len(dataset) * 0.95))
            scaled_data = scaler.fit_transform(dataset)
            train_data = scaled_data[:training_data_len]
            train_data_df = pd.DataFrame(train_data)
            os.makedirs(save_dir, exist_ok=True)
            train_data_df.to_csv(os.path.join(save_dir, save_name), index=False)
            logger.info(f"Transformed {company} Data saved successfully")
            print(f"{company} data shape: {train_data_df.shape}")


In [26]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transformation_config)
    data_transformation.transform_data()
except Exception as e:
    raise e

[2024-11-29 00:43:50,054: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-11-29 00:43:50,057: INFO: common: yaml file: params.yaml loaded successfully]
[2024-11-29 00:43:50,060: INFO: common: created directory at: artifacts]
[2024-11-29 00:43:50,062: INFO: common: created directory at: artifacts/data_transformation]
[2024-11-29 00:43:50,138: INFO: 1006999031: Transformed Apple Data saved successfully]
Apple data shape: (3086, 1)
[2024-11-29 00:43:50,205: INFO: 1006999031: Transformed Amazon Data saved successfully]
Amazon data shape: (3086, 1)
[2024-11-29 00:43:50,255: INFO: 1006999031: Transformed Google Data saved successfully]
Google data shape: (3086, 1)
[2024-11-29 00:43:50,342: INFO: 1006999031: Transformed Microsoft Data saved successfully]
Microsoft data shape: (3086, 1)
