In [151]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
from pathlib import Path

In [152]:
def scrape_stock_data(tickers, start_date, end_date):
    start = pd.to_datetime(start_date)
    end   = pd.to_datetime(end_date)

    # yfinance end date is exclusive, so add +1 day to include `end`
    end_plus_one = end + timedelta(days=1)

    frames = []

    for tkr in tickers:
        print(f"Downloading {tkr}: {start.date()} → {end.date()}")

        df = yf.download(
            tkr,
            start=start,
            end=end_plus_one,
            auto_adjust=False,
            progress=False,
            threads=False,
        )

        if df.empty:
            print(f"  ↳ no data returned, skipping")
            continue

        df = (
            df.reset_index()               # Date becomes a column
              .rename(columns={"Date": "date"})
              .assign(
                  ticker=tkr,
                  date_only=lambda x: x["date"].dt.date
              )[["date", "date_only", "ticker", "Open", "High", "Low", "Close", "Volume"]]
        )

        frames.append(df)

    return pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()




In [153]:
def append_to_master_csv(new_df, csv_path, dedupe_on=("date_only", "ticker")):
    """
    Append `new_df` into `csv_path` (create if absent) and drop duplicates
    based on `dedupe_on` columns, keeping the NEWEST scrape for any clash.
    """
    csv_path = Path(csv_path)

    if csv_path.exists():
        master   = pd.read_csv(csv_path)
        combined = pd.concat([master, new_df], ignore_index=True)
        combined = combined.drop_duplicates(subset=dedupe_on, keep="last")
    else:
        combined = new_df.copy()

    combined = (combined
                .sort_values(list(dedupe_on))
                .reset_index(drop=True))

    combined.to_csv(csv_path, index=False)
    print(f"✔ Saved {len(combined)} rows to {csv_path}")


In [155]:

if __name__ == "__main__":
    tickers    = ["GME"]
    start_date = datetime(2025, 2, 1)
    end_date   = datetime(2025, 4, 17)

    stock_df = scrape_stock_data(tickers, start_date, end_date)

    if stock_df.empty:
        print("No stock data returned; nothing appended.")
    else:
        print("\nSample of scraped data:")
        print(stock_df.head())
        append_to_master_csv(stock_df, "../data/01/perstock/stockvalues_00_GME.csv")


Downloading GME: 2025-02-01 → 2025-04-17

Sample of scraped data:
Price        date   date_only ticker       Open       High        Low  \
Ticker                                      GME        GME        GME   
0      2025-02-03  2025-02-03    GME  25.570000  26.540001  25.500000   
1      2025-02-04  2025-02-04    GME  25.850000  26.250000  25.799999   
2      2025-02-05  2025-02-05    GME  25.700001  25.809999  24.900000   
3      2025-02-06  2025-02-06    GME  24.930000  25.389999  24.530001   
4      2025-02-07  2025-02-07    GME  24.900000  25.020000  24.600000   

Price       Close   Volume  
Ticker        GME      GME  
0       25.889999  5740500  
1       25.900000  3353700  
2       24.930000  4580200  
3       24.799999  4349500  
4       24.730000  3403300  
✔ Saved 53 rows to ../data/01/perstock/stockvalues_00_GME.csv
