In [1]:
import os

In [2]:
%pwd

'd:\\Projects\\Stock_Price_Prediction_Project\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'd:\\Projects\\Stock_Price_Prediction_Project'

In [24]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    tickers: list
    start_date: str
    end_date: str
    local_data_dir: Path

In [8]:
from smPredictor.constants import *
from smPredictor.utils.common import read_yaml, create_directories

In [32]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath=CONFIG_FILE_PATH,
        params_filepath=PARAMS_FILE_PATH
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion

        create_directories([config.root_dir])

        # Ensure tickers is a list
        if not isinstance(config.tickers, list):
            raise ValueError("Tickers should be a list in the configuration file.")

        data_ingestion_config = DataIngestionConfig(
            root_dir=config.root_dir,
            tickers=config.tickers,  # No change in how tickers is passed
            start_date=config.start_date,
            end_date=config.end_date,
            local_data_dir=config.local_data_dir
        )
        
        return data_ingestion_config


In [28]:
import os
import yfinance as yf
from pandas_datareader.data import DataReader
from pandas_datareader import data as pdr
from datetime import datetime
from smPredictor import logger

In [29]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config
        
    def get_data(self):
        # Ensure the local data directory and all subdirectories exist
        os.makedirs(self.config.local_data_dir, exist_ok=True)

        # Parse tickers, start_date, and end_date from the config
        tickers = self.config.tickers
        
        # Validate tickers to ensure it is a list
        if not isinstance(tickers, list):
            raise ValueError("Tickers should be a list.")

        start_date = self.config.start_date
        
        # Convert 'now' to current date if 'end_date' is 'now'
        end_date = (
            datetime.now().strftime("%Y-%m-%d") 
            if self.config.end_date.lower() == "now" 
            else self.config.end_date
        )

        # Download stock data for each ticker
        for ticker in tickers:
            try:
                print(f"Downloading data for {ticker}...")
                # Fetch the stock data using yfinance
                data = yf.download(ticker, start=start_date, end=end_date)

                # Check if data is empty
                if data.empty:
                    print(f"No data found for {ticker}. Skipping.")
                    continue

                # Ensure the directory for each stock exists
                file_path = os.path.join(self.config.local_data_dir, f"{ticker}.csv")
                
                # Save the data to a CSV file
                data.to_csv(file_path)
                print(f"Saved data for {ticker} to {file_path}")

            except Exception as e:
                print(f"Failed to download data for {ticker}: {e}")


In [30]:
try:
    # Initialize Configuration Manager
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    
    # Initialize Data Ingestion and fetch data
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.get_data()  # Fetch stock data and save as CSVs

except Exception as e:
    print(f"An error occurred: {e}")
    raise e


[2024-11-28 20:31:22,737: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-11-28 20:31:22,740: INFO: common: yaml file: params.yaml loaded successfully]
[2024-11-28 20:31:22,743: INFO: common: created directory at: artifacts]
[2024-11-28 20:31:22,746: INFO: common: created directory at: artifacts/data_ingestion]
Downloading data for AAPL...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Saved data for AAPL to artifacts/data_ingestion/raw_data\AAPL.csv
Downloading data for GOOG...
Saved data for GOOG to artifacts/data_ingestion/raw_data\GOOG.csv
Downloading data for MSFT...



[*********************100%***********************]  1 of 1 completed

Saved data for MSFT to artifacts/data_ingestion/raw_data\MSFT.csv
Downloading data for AMZN...
Saved data for AMZN to artifacts/data_ingestion/raw_data\AMZN.csv



