In [None]:
import os
import pandas as pd

In [None]:
class CminData:
    def __init__(self, base_path: str):
        self.prices_path = f"{base_path}/price/raw"
        self.news_path = f"{base_path}/news/raw"
        
    def get(self, ticker, date):
        """
        Get the news and prices for a specific ticker and date.
        
        Parameters
        ----------
        ticker : str
            The ticker symbol of the stock.
        date : str
            The date of the news and prices.
            
        Returns
        -------
        dict
            A dictionary containing the news and prices for the ticker on the given date.
        """
        return {
            "news": self.get_news(ticker, date),
            "prices": self.get_prices(ticker, date),
        }
    
    def get_news(self, ticker: str, date: str):
        """
        Get news for a specific ticker and date.
        
        Parameters
        ----------
        ticker : str
            The ticker symbol of the stock.
        date : str
            The date of the news.
            
        Returns
        -------
        list
            A list of dictionaries containing the news for the ticker on the given date.
        """
        # Load and process the data
        df = pd.read_csv(f"{self.news_path}/{ticker}.csv")
        df["date"] = pd.to_datetime(df["date"])
        df = df[df["date"] == date]

        # Map rows to JSON
        news_json = []
        for i, rows in df.iterrows():
            news_json.append({"title": rows["title"], "summary": rows["summary"]})

        return news_json
    
    def get_prices(self, ticker: str, date: str):
        """
        Get prices for a specific ticker and date.
        
        Parameters
        ----------
        ticker : str
            The ticker symbol of the stock.
        date : str
            The date of the prices.
            
        Returns
        -------
        list
            A list of dictionaries containing the prices for the ticker on the given date.
        """
        # Load and process the data
        df = pd.read_csv(f"{self.prices_path}/{ticker}.csv")
        df["Date"] = pd.to_datetime(df["Date"])
        df = df[df["Date"] == date]

        # Map rows to JSON
        prices_json = []
        for i, rows in df.iterrows():
            prices_json.append(
                {
                    "open": rows["Open"],
                    "close": rows["Close"],
                    "high": rows["High"],
                    "low": rows["Low"],
                    "volume": rows["Volume"],
                }
            )

        return prices_json

    def get_tickers(self):
        """
        Get a list of all tickers in the CMIN dataset.
            
        Returns
        -------
        list
            A list of all tickers in the CMIN-US dataset.
        """
        tickers = []
        for root, dirs, files in os.walk(self.prices_path):
            for file in files:
                if file.endswith(".csv"):
                    tickers.append(file.split(".")[0])

        tickers = [ticker.upper() for ticker in tickers]  # transform to uppercase
        tickers.sort()  # sort alphabetically

        return tickers

In [None]:
class SnP500Data: ...

In [None]:
class Dataset:
    def __init__(self, base_path: str = "data"):
        self.base_path = base_path
        self.cmin = CminData()
        self.snp = SnP500Data()

In [None]:
data = Dataset()

data.cmin.get(ticker="AAPL", date="2020-01-01")