In [1]:
import os

In [2]:
%pwd

'd:\\Projects\\Stock_Price_Prediction_Project\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'd:\\Projects\\Stock_Price_Prediction_Project'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    apple_data_dir: Path
    amazon_data_dir: Path
    google_data_dir: Path
    microsoft_data_dir: Path
    transformed_apple_data_dir: Path
    transformed_amazon_data_dir: Path
    transformed_google_data_dir: Path
    transformed_microsoft_data_dir: Path
    silk_data_dir: Path
    pace_data_dir: Path
    fauji_data_dir: Path
    punjab_data_dir: Path
    transformed_silk_data_dir: Path
    transformed_pace_data_dir: Path
    transformed_fauji_data_dir: Path
    transformed_punjab_data_dir: Path

In [6]:
from smPredictor.constants import *
from smPredictor.utils.common import read_yaml, create_directories

In [None]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath=CONFIG_FILE_PATH,
        params_filepath=PARAMS_FILE_PATH
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])
        
    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation
        
        create_directories([config.root_dir])
        
        data_transformation_config = DataTransformationConfig(
            root_dir=config.root_dir,
            apple_data_dir=config.apple_data_dir,
            amazon_data_dir=config.apple_data_dir,
            google_data_dir=config.apple_data_dir,
            microsoft_data_dir=config.apple_data_dir,
            transformed_apple_data_dir= config.transformed_apple_data_dir,
            transformed_amazon_data_dir=config.transformed_amazon_data_dir,
            transformed_google_data_dir=config.transformed_google_data_dir,
            transformed_microsoft_data_dir=config.transformed_microsoft_data_dir,
            silk_data_dir=config.silk_data_dir,
            pace_data_dir=config.pace_data_dir,
            fauji_data_dir=config.fauji_data_dir,
            punjab_data_dir=config.punjab_data_dir,
            transformed_silk_data_dir=config.transformed_silk_data_dir,
            transformed_pace_data_dir=config.transformed_pace_data_dir,
            transformed_fauji_data_dir=config.transformed_fauji_data_dir,
            transformed_punjab_data_dir=config.transformed_punjab_data_dir
            
        )
        
        return data_transformation_config

In [8]:
import pandas as pd
from smPredictor import logger
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import os



In [11]:

class DataTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config

    def transform_data(self):
        # Transformations for Apple data
        apple_data = pd.read_csv(self.config.apple_data_dir)
        apple_data = apple_data.iloc[1:]
        apple_data.rename(columns={"Price": "Date"}, inplace=True)
        apple_data = apple_data.drop(apple_data.index[0])
        apple_data = apple_data.filter(['Close'])
        apple_dataset = apple_data.values
        apple_training_data_len = int(np.ceil(len(apple_dataset) * 0.95))
        scaler = MinMaxScaler(feature_range=(0, 1))
        apple_scaled_data = scaler.fit_transform(apple_dataset)
        apple_train_data = apple_scaled_data[:apple_training_data_len]
        apple_train_data_df = pd.DataFrame(apple_train_data)
        os.makedirs(self.config.transformed_apple_data_dir, exist_ok=True)
        apple_train_data_df.to_csv(
            os.path.join(self.config.transformed_apple_data_dir, "transformed_apple.csv"), 
            index=False
        )
        logger.info("Transformed Apple Data saved successfully")
        print(f"Apple data shape: {apple_train_data_df.shape}")
        
        # Transformation for Amazon Data
        amazon_data = pd.read_csv(self.config.amazon_data_dir)
        amazon_data = amazon_data.iloc[1:]
        amazon_data.rename(columns={"Price": "Date"}, inplace=True)
        amazon_data = amazon_data.drop(amazon_data.index[0])
        amazon_data = amazon_data.filter(['Close'])
        amazon_dataset = amazon_data.values
        amazon_training_data_len = int(np.ceil(len(amazon_dataset) * 0.95))
        scaler = MinMaxScaler(feature_range=(0, 1))
        amazon_scaled_data = scaler.fit_transform(amazon_dataset)
        amazon_train_data = amazon_scaled_data[:amazon_training_data_len]
        amazon_train_data_df = pd.DataFrame(amazon_train_data)
        os.makedirs(self.config.transformed_amazon_data_dir, exist_ok=True)
        amazon_train_data_df.to_csv(
            os.path.join(self.config.transformed_amazon_data_dir, "transformed_amazon.csv"), 
            index=False
        )
        logger.info("Transformed Amazon Data saved successfully")
        print(f"Amazon data shape: {amazon_train_data_df.shape}")
        
        # Transformation for Google Data
        google_data = pd.read_csv(self.config.google_data_dir)
        google_data = google_data.iloc[1:]
        google_data.rename(columns={"Price": "Date"}, inplace=True)
        google_data = google_data.drop(google_data.index[0])
        google_data = google_data.filter(['Close'])
        google_dataset = google_data.values
        google_training_data_len = int(np.ceil(len(google_dataset) * 0.95))
        scaler = MinMaxScaler(feature_range=(0, 1))
        google_scaled_data = scaler.fit_transform(google_dataset)
        google_train_data = google_scaled_data[:google_training_data_len]
        google_train_data_df = pd.DataFrame(google_train_data)
        os.makedirs(self.config.transformed_google_data_dir, exist_ok=True)
        google_train_data_df.to_csv(
            os.path.join(self.config.transformed_google_data_dir, "transformed_google.csv"), 
            index=False
        )
        logger.info("Transformed Google Data saved successfully")
        print(f"Google data shape: {google_train_data_df.shape}")
        
        # Transformation for Microsoft Data
        microsoft_data = pd.read_csv(self.config.microsoft_data_dir)
        microsoft_data = microsoft_data.iloc[1:]
        microsoft_data.rename(columns={"Price": "Date"}, inplace=True)
        microsoft_data = microsoft_data.drop(microsoft_data.index[0])
        microsoft_data = microsoft_data.filter(['Close'])
        microsoft_dataset = microsoft_data.values
        microsoft_training_data_len = int(np.ceil(len(microsoft_dataset) * 0.95))
        scaler = MinMaxScaler(feature_range=(0, 1))
        microsoft_scaled_data = scaler.fit_transform(microsoft_dataset)
        microsoft_train_data = microsoft_scaled_data[:microsoft_training_data_len]
        microsoft_train_data_df = pd.DataFrame(microsoft_train_data)
        os.makedirs(self.config.transformed_microsoft_data_dir, exist_ok=True)
        microsoft_train_data_df.to_csv(
            os.path.join(self.config.transformed_microsoft_data_dir, "transformed_microsoft.csv"), 
            index=False
        )
        logger.info("Transformed Microsoft Data saved successfully")
        print(f"Microsoft data shape: {microsoft_train_data_df.shape}")
        
        # Transformation for Silk Bank Data
        silk_data = pd.read_csv(self.config.silk_data_dir)
        silk_data = silk_data.filter(['Close'])
        silk_dataset = silk_data.values
        silk_training_data_len = int(np.ceil(len(silk_dataset) * 0.95))
        scaler = MinMaxScaler(feature_range=(0, 1))
        silk_scaled_data = scaler.fit_transform(silk_dataset)
        silk_train_data = silk_scaled_data[:silk_training_data_len]
        silk_train_data_df = pd.DataFrame(silk_train_data)
        os.makedirs(self.config.transformed_silk_data_dir, exist_ok=True)
        silk_train_data_df.to_csv(
            os.path.join(self.config.transformed_silk_data_dir, "transformed_silk.csv"), 
            index=False
        )
        logger.info("Transformed Silk Data saved successfully")
        print(f"Silk data shape: {silk_train_data_df.shape}")
        
        # Transformation for Pace Pakistan Data
        pace_data = pd.read_csv(self.config.pace_data_dir)
        pace_data = pace_data.filter(['Close'])
        pace_dataset = pace_data.values
        pace_training_data_len = int(np.ceil(len(pace_dataset) * 0.95))
        scaler = MinMaxScaler(feature_range=(0, 1))
        pace_scaled_data = scaler.fit_transform(pace_dataset)
        pace_train_data = pace_scaled_data[:pace_training_data_len]
        pace_train_data_df = pd.DataFrame(pace_train_data)
        os.makedirs(self.config.transformed_pace_data_dir, exist_ok=True)
        pace_train_data_df.to_csv(
            os.path.join(self.config.transformed_pace_data_dir, "transformed_pace.csv"), 
            index=False
        )
        logger.info("Transformed Pace Data saved successfully")
        print(f"Pace data shape: {pace_train_data_df.shape}")
        
        # Transformation for Fauji Foods Limited Data
        fauji_data = pd.read_csv(self.config.fauji_data_dir)
        fauji_data = fauji_data.filter(['Close'])
        fauji_dataset = fauji_data.values
        fauji_training_data_len = int(np.ceil(len(fauji_dataset) * 0.95))
        scaler = MinMaxScaler(feature_range=(0, 1))
        fauji_scaled_data = scaler.fit_transform(fauji_dataset)
        fauji_train_data = fauji_scaled_data[:fauji_training_data_len]
        fauji_train_data_df = pd.DataFrame(fauji_train_data)
        os.makedirs(self.config.transformed_fauji_data_dir, exist_ok=True)
        fauji_train_data_df.to_csv(
            os.path.join(self.config.transformed_fauji_data_dir, "transformed_fauji.csv"), 
            index=False
        )
        logger.info("Transformed Fauji Data saved successfully")
        print(f"Fauji data shape: {fauji_train_data_df.shape}")
        
        # Transformation for Bank of Punjab Data
        punjab_data = pd.read_csv(self.config.punjab_data_dir)
        punjab_data = punjab_data.filter(['Close'])
        punjab_dataset = punjab_data.values
        punjab_training_data_len = int(np.ceil(len(punjab_dataset) * 0.95))
        scaler = MinMaxScaler(feature_range=(0, 1))
        punjab_scaled_data = scaler.fit_transform(punjab_dataset)
        punjab_train_data = punjab_scaled_data[:punjab_training_data_len]
        punjab_train_data_df = pd.DataFrame(punjab_train_data)
        os.makedirs(self.config.transformed_punjab_data_dir, exist_ok=True)
        punjab_train_data_df.to_csv(
            os.path.join(self.config.transformed_punjab_data_dir, "transformed_punjab.csv"), 
            index=False
        )
        logger.info("Transformed Punjab Data saved successfully")
        print(f"Punjab data shape: {punjab_train_data_df.shape}")


        
        

       

In [12]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transformation_config)
    data_transformation.transform_data()
except Exception as e:
    raise e

[2024-11-30 18:59:36,339: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-11-30 18:59:36,349: INFO: common: yaml file: params.yaml loaded successfully]
[2024-11-30 18:59:36,351: INFO: common: created directory at: artifacts]
[2024-11-30 18:59:36,351: INFO: common: created directory at: artifacts/data_transformation]
[2024-11-30 18:59:36,461: INFO: 536690641: Transformed Apple Data saved successfully]
Apple data shape: (3087, 1)
[2024-11-30 18:59:36,633: INFO: 536690641: Transformed Amazon Data saved successfully]
Amazon data shape: (3087, 1)
[2024-11-30 18:59:36,739: INFO: 536690641: Transformed Google Data saved successfully]
Google data shape: (3087, 1)
[2024-11-30 18:59:36,833: INFO: 536690641: Transformed Microsoft Data saved successfully]
Microsoft data shape: (3087, 1)
[2024-11-30 18:59:36,874: INFO: 536690641: Transformed Silk Data saved successfully]
Silk data shape: (3040, 1)
[2024-11-30 18:59:36,919: INFO: 536690641: Transformed Pace Data saved successf