In [7]:
import pandas as pd

# This is done so that there is no schmea mismatch
REQUIRED_COLUMNS = {
    "date", "volume", "open", "high", "low",
    "close", "adjclose", "ticker"
}


#Load the data from local directory
def load_data(file_path: str) -> pd.DataFrame:
    """
    Loads stock data from CSV, performs basic validations,
    and returns a clean DataFrame.

    Validations:
    - Required columns exist
    - No duplicate (date, ticker) records
    - Exactly 10 unique tickers
    """

    df = pd.read_csv(file_path)

    #schema validation
    missing_cols = REQUIRED_COLUMNS - set(df.columns)
    if missing_cols:
        raise ValueError(f"Missing required columns: {missing_cols}")

    # Correct the datatypes so that analysis becomes easy
    df["date"] = pd.to_datetime(df["date"], errors="raise")

    # Sor the data by ticker and date, so that analysis can be done for the most recent data
    df = df.sort_values(by=["ticker", "date"]).reset_index(drop=True)

    # Check if there any duplicates within the dataset and raise an error saying that duplicate data is flowing to the dataset
    duplicate_count = df.duplicated(subset=["date", "ticker"]).sum()
    if duplicate_count > 0:
        raise ValueError(f"Found {duplicate_count} duplicate (date, ticker) records")

    print('Loading and Transforming Data is done !')
    return df
