In [1]:
import yfinance as yf
import pandas as pd
import datetime
from google.cloud import storage, bigquery
import os
import json
from dotenv import load_dotenv

In [9]:
# === CONFIGURATION ===
ticker = "^GSPC"

In [16]:
def download_data(ticker, start_date, end_date):
    if start_date >= end_date:
        print(f"No new data to download for {ticker} (start_date: {start_date}, end_date: {end_date})")
        return pd.DataFrame()
    
    print(f"Downloading {ticker} data from {start_date} to {end_date} (15-minute interval)")
    df = yf.download(ticker, start=start_date, end=end_date, interval='15m')

    if df.empty:
        print(f"No data available for {ticker} in the requested date range")
        return df

    df = df.reset_index()

    # If MultiIndex, flatten by combining levels with underscore
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = [col[0] for col in df.columns.values]

    df['Ticker'] = ticker
    df['Datetime'] = df['Datetime'].dt.tz_convert('UTC').dt.strftime('%Y-%m-%d %H:%M:%S%z')
    return df

In [17]:
current_date = datetime.datetime.now()
start_date = current_date - datetime.timedelta(days=5)


In [18]:
df = download_data(ticker, start_date, current_date)

[*********************100%***********************]  1 of 1 completed

Downloading ^GSPC data from 2025-04-13 02:05:39.812396 to 2025-04-18 02:05:39.812396 (15-minute interval)





In [19]:
df.head()

Unnamed: 0,Datetime,Close,High,Low,Open,Volume,Ticker
0,2025-04-14 13:30:00+0000,5425.160156,5459.459961,5411.609863,5441.959961,116713794,^GSPC
1,2025-04-14 13:45:00+0000,5424.149902,5436.680176,5411.790039,5425.25,132679832,^GSPC
2,2025-04-14 14:00:00+0000,5433.129883,5453.129883,5420.060059,5424.0,124629795,^GSPC
3,2025-04-14 14:15:00+0000,5438.450195,5441.680176,5418.029785,5433.410156,104136933,^GSPC
4,2025-04-14 14:30:00+0000,5442.209961,5447.810059,5434.810059,5438.919922,98297399,^GSPC


In [7]:
len(df)

449