In [1]:
import os

In [2]:
%pwd

'd:\\Projects\\Stock_Price_Prediction_Project\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'd:\\Projects\\Stock_Price_Prediction_Project'

In [20]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    itickers: list
    ptickers: list
    istart_date: str
    iend_date: str
    pstart_date: str
    pend_date: str
    internationalStocks_local_data_dir: Path
    pakistanStocks_local_data_dir: Path

In [21]:
from smPredictor.constants import *
from smPredictor.utils.common import read_yaml, create_directories

In [22]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath=CONFIG_FILE_PATH,
        params_filepath=PARAMS_FILE_PATH
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion

        create_directories([config.root_dir])

        # Ensure tickers is a list
        if not isinstance(config.itickers, list):
            raise ValueError("Pakistani Tickers should be a list in the configuration file.")
        if not isinstance(config.ptickers, list):
            raise ValueError("Pakistani Tickers should be a list in the configuration file.")

        data_ingestion_config = DataIngestionConfig(
            root_dir=config.root_dir,
            itickers=config.itickers,
            ptickers=config.ptickers,
            istart_date=config.istart_date,
            iend_date=config.iend_date,
            pstart_date=config.pstart_date,
            pend_date=config.pend_date,
            internationalStocks_local_data_dir=config.internationalStocks_local_data_dir,
            pakistanStocks_local_data_dir=config.pakistanStocks_local_data_dir
        )
        
        return data_ingestion_config


In [23]:
import os
import yfinance as yf
from pandas_datareader.data import DataReader
from pandas_datareader import data as pdr
from datetime import datetime, date
from smPredictor import logger
from psx import stocks

In [26]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config
        
    def get_data(self):
        os.makedirs(self.config.internationalStocks_local_data_dir, exist_ok=True)
        os.makedirs(self.config.pakistanStocks_local_data_dir, exist_ok=True)

        itickers = self.config.itickers
        ptickers = self.config.ptickers
        
        # Validate tickers to ensure they are lists
        if not isinstance(itickers, list):
            raise ValueError("International Tickers should be a list.")
        if not isinstance(ptickers, list):
            raise ValueError("Pakistan Tickers should be a list.")

        istart_date = self.config.istart_date
        iend_date = (
            datetime.now().strftime("%Y-%m-%d") 
            if isinstance(self.config.iend_date, str) and self.config.iend_date.lower() == "now"
            else self.config.iend_date
        )

        # Download stock data for International tickers
        for iticker in itickers:
            try:
                print(f"Downloading data for {iticker}...")
                idata = yf.download(iticker, start=istart_date, end=iend_date)
                if idata.empty:
                    print(f"No data found for {iticker}. Skipping.")
                    continue
                ifile_path = os.path.join(self.config.internationalStocks_local_data_dir, f"{iticker}.csv")
                idata.to_csv(ifile_path)
                print(f"Saved data for {iticker} to {ifile_path}")
            except Exception as e:
                print(f"Failed to download data for {iticker}: {e}")
        
        # Handle date parsing for Pakistani tickers
        pstart_date = (
            self.config.pstart_date
            if isinstance(self.config.pstart_date, date)
            else datetime.strptime(self.config.pstart_date, "%Y-%m-%d").date()
        )
        pend_date = (
            date.today()
            if isinstance(self.config.pend_date, str) and self.config.pend_date.lower() == "now"
            else self.config.pend_date
            if isinstance(self.config.pend_date, date)
            else datetime.strptime(self.config.pend_date, "%Y-%m-%d").date()
        )

        # Download stock data for Pakistani tickers
        for pticker in ptickers:
            try:
                print(f"Downloading data for {pticker}...")
                pdata = stocks(pticker, start=pstart_date, end=pend_date)
                if pdata.empty:
                    print(f"No data found for {pticker}. Skipping.")
                    continue
                pfile_path = os.path.join(self.config.pakistanStocks_local_data_dir, f"{pticker}.csv")
                pdata.to_csv(pfile_path)
                print(f"Saved data for {pticker} to {pfile_path}")
            except Exception as e:
                print(f"Failed to download data for {pticker}: {e}")


In [27]:
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.get_data()
except Exception as e:
    print(f"An error occurred: {e}")
    raise e


[2024-11-30 17:50:04,546: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-11-30 17:50:04,555: INFO: common: yaml file: params.yaml loaded successfully]
[2024-11-30 17:50:04,562: INFO: common: created directory at: artifacts]
[2024-11-30 17:50:04,566: INFO: common: created directory at: artifacts/data_ingestion]


[*********************100%***********************]  1 of 1 completed

Downloading data for AAPL...





Saved data for AAPL to artifacts/data_ingestion/international_stocks/raw_data\AAPL.csv
Downloading data for GOOG...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Saved data for GOOG to artifacts/data_ingestion/international_stocks/raw_data\GOOG.csv
Downloading data for MSFT...



[*********************100%***********************]  1 of 1 completed

Saved data for MSFT to artifacts/data_ingestion/international_stocks/raw_data\MSFT.csv
Downloading data for AMZN...





Saved data for AMZN to artifacts/data_ingestion/international_stocks/raw_data\AMZN.csv
Downloading data for SILK...


Downloading SILK's Data: 100%|██████████| 158/158 [00:24<00:00,  6.48it/s]
  data = pd.concat(data)


Saved data for SILK to artifacts/data_ingestion/pakistan_stocks/raw_data\SILK.csv
Downloading data for PACE...


Downloading PACE's Data: 100%|██████████| 158/158 [00:24<00:00,  6.42it/s]
  data = pd.concat(data)


Saved data for PACE to artifacts/data_ingestion/pakistan_stocks/raw_data\PACE.csv
Downloading data for FFL...


Downloading FFL's Data: 100%|██████████| 158/158 [00:23<00:00,  6.69it/s]
  data = pd.concat(data)


Saved data for FFL to artifacts/data_ingestion/pakistan_stocks/raw_data\FFL.csv
Downloading data for BOP...


Downloading BOP's Data: 100%|██████████| 158/158 [00:23<00:00,  6.79it/s]
  data = pd.concat(data)


Saved data for BOP to artifacts/data_ingestion/pakistan_stocks/raw_data\BOP.csv
