In [8]:
import requests
import pandas as pd
import time


API_KEY = "o9MHKBamfGm9xCIxiLlbUPDwCWmjdWk4"

def download_historical_data(ticker, start_date, end_date, timespan="day", retries=3):
    """
    Downloads historical data from Polygon.io for backtesting with retries and pagination.

    :param ticker: Stock ticker symbol (e.g., "NVDA").
    :param start_date: Start date in the format "YYYY-MM-DD".
    :param end_date: End date in the format "YYYY-MM-DD".
    :param timespan: Timespan of data ("minute", "hour", "day").
    :param retries: Number of retries in case of API failure.
    :return: Pandas DataFrame containing the historical data or None if no data is found.
    """
    url = f"https://api.polygon.io/v2/aggs/ticker/{ticker}/range/1/{timespan}/{start_date}/{end_date}"
    params = {"apiKey": API_KEY, "limit": 50000}  
    all_data = []

    for attempt in range(retries):
        try:
            while url:
                response = requests.get(url, params=params)
                
                
                if response.status_code == 429:  
                    print(f"Rate limit reached. Waiting before retrying...")
                    time.sleep(60)  # Wait 60 seconds before retrying due to potential rate limiting
                    continue
                
                response.raise_for_status()  
                
                data = response.json()
                if "results" in data and data["results"]:
                    all_data.extend(data["results"])
                
                # Check for "next_url" to continue pagination
                url = data.get("next_url", None)

            # Break if data is fetched successfully
            break
        except requests.exceptions.RequestException as e:
            print(f"Attempt {attempt + 1} failed for {ticker}: {e}")
            time.sleep(5)  # Wait before retrying
        except Exception as e:
            print(f"Unexpected error: {e}")
            return None
    
    if not all_data:
        print(f"No data found for {ticker}.")
        return None

   
    df = pd.DataFrame(all_data)
    df["timestamp"] = pd.to_datetime(df["t"], unit="ms")  
    df = df.rename(columns={"o": "open", "h": "high", "l": "low", "c": "close", "v": "volume"})
    df = df[["timestamp", "open", "high", "low", "close", "volume"]]
    df["ticker"] = ticker  

    return df


def save_combined_data(tickers, start_date, end_date, timespan="day", output_file="microprocessor_stocks.csv"):
    """
    Downloads and combines historical data for multiple tickers and saves it to a single CSV file.
    
    :param tickers: List of stock ticker symbols.
    :param start_date: Start date in the format "YYYY-MM-DD".
    :param end_date: End date in the format "YYYY-MM-DD".
    :param timespan: Timespan of data ("minute", "hour", "day").
    :param output_file: Name of the output CSV file.
    """
    all_data = [] 
    
    for ticker in tickers:
        print(f"Fetching data for {ticker}...")
        df = download_historical_data(ticker, start_date, end_date, timespan)
        if df is not None:
            all_data.append(df)
        
        # Adding a delay to respect API rate limits
        time.sleep(15) 
    
    if all_data:
        combined_df = pd.concat(all_data, ignore_index=True)
        combined_df.to_csv(output_file, index=False)
        print(f"Data saved to {output_file}")
    else:
        print("No data was fetched. No file created.")


if __name__ == "__main__":
    # Names of the companies whose stocks will be used for the analysis
    tickers = [
        "NVDA",  # NVIDIA
        "AMD",   # Advanced Micro Devices
        "INTC",  # Intel Corporation
        "TSM",   # Taiwan Semiconductor
        "QCOM",  # Qualcomm
        "TXN",   # Texas Instruments
        "AVGO",  # Broadcom
        "MU",    # Micron Technology
        "ASML",  # ASML Holding
        "MRVL"   # Marvell Technology
    ]
    
    #Date range for backtesting
    start_date = "2023-02-01"
    end_date = "2025-01-18"
    
    #Minute frequency of the data
    timespan = "minute"
    
  
    output_file = "microprocessor_stocks_2023-25.csv"
    
    
    save_combined_data(tickers, start_date, end_date, timespan, output_file)


Fetching data for NVDA...
Rate limit reached. Waiting before retrying...
Fetching data for AMD...
Rate limit reached. Waiting before retrying...
Fetching data for INTC...
Rate limit reached. Waiting before retrying...
Fetching data for TSM...
Rate limit reached. Waiting before retrying...
Fetching data for QCOM...
Rate limit reached. Waiting before retrying...
Fetching data for TXN...
Fetching data for AVGO...
Rate limit reached. Waiting before retrying...
Fetching data for MU...
Rate limit reached. Waiting before retrying...
Fetching data for ASML...
Rate limit reached. Waiting before retrying...
Fetching data for MRVL...
Rate limit reached. Waiting before retrying...
Data saved to microprocessor_stocks_2023-25.csv
