## This file use to retrieve all companies' stock data for Nikkei 225 (^N225) from 01/01/2007 to 31/12/2025
## After that save all the data in a csv file name: "N225_companies_stock.csv"

In [1]:
import yfinance as yf
import pandas as pd
import time
import requests
import random
from requests.exceptions import HTTPError

In [2]:
# Configure custom session with browser-like headers
session = requests.Session()
session.headers.update({
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
})

In [3]:
# Define Tickers and Data Parameters 
# Ticker list (FTSE 100 companies)
TICKERS = ["4151.T",
    "4502.T", "4503.T", "4506.T", "4507.T", "4519.T", "4523.T", "4568.T", "4578.T", "6479.T", "6501.T", "6503.T", "6504.T", "6506.T", "6526.T", "6594.T", "6645.T", "6674.T", "6701.T", 
    "6702.T", "6723.T", "6724.T", "6752.T", "6753.T", "6758.T", "6762.T", "6770.T", "6841.T", "6857.T", "6861.T", "6902.T", "6920.T", "6952.T", "6954.T", "6971.T", "6976.T", "6981.T",
    "7735.T", "7751.T", "7752.T", "8035.T", "7201.T", "7202.T", "7203.T", "7205.T", "7211.T", "7261.T", "7267.T", "7269.T", "7270.T", "7272.T", "4543.T", "4902.T", "6146.T", "7731.T", 
    "7733.T", "7741.T", "7762.T", "9432.T", "9433.T", "9434.T", "9613.T", "9984.T", "5831.T", "7186.T", "8304.T", "8306.T", "8308.T", "8309.T", "8316.T", "8331.T", "8354.T", "8411.T", 
    "8253.T", "8591.T", "8697.T", "8601.T", "8604.T", "8630.T", "8725.T", "8750.T", "8766.T", "8795.T", "1332.T", "2002.T", "2269.T", "2282.T", "2501.T", "2502.T", "2503.T", "2801.T",
    "2802.T", "2871.T", "2914.T", "3086.T", "3092.T", "3099.T", "3382.T", "7453.T","8233.T","8252.T","8267.T","9843.T","9983.T","2413.T","2432.T","3659.T","4307.T","4324.T","4385.T",
    "4661.T","4689.T","4704.T","4751.T","4755.T","6098.T","6178.T","7974.T","9602.T","9735.T","9766.T","1605.T","3401.T","3402.T",
    "3861.T","3405.T","3407.T","4004.T","4005.T","4021.T","4042.T","4043.T","4061.T","4063.T",
    "4183.T","4188.T","4208.T","4452.T","4901.T","4911.T","6988.T","5019.T","5020.T","5101.T",
    "5108.T","5201.T","5214.T","5233.T","5301.T","5332.T","5333.T","5401.T","5406.T","5411.T",
    "3436.T","5706.T","5711.T","5713.T","5714.T","5801.T","5802.T","5803.T","2768.T","8001.T",
    "8002.T","8015.T","8031.T","8053.T","8058.T","1721.T","1801.T","1802.T","1803.T","1808.T",
    "1812.T","1925.T","1928.T","1963.T","5631.T","6103.T","6113.T","6273.T","6301.T","6302.T",
    "6305.T","6326.T","6361.T","6367.T","6471.T","6472.T","6473.T","7004.T","7011.T","7013.T",
    "7012.T","7832.T","7911.T","7912.T","7951.T","3289.T","8801.T","8802.T","8804.T","8830.T",
    "9001.T","9005.T","9007.T","9008.T","9009.T","9020.T","9021.T","9022.T","9064.T","9147.T",
    "9101.T","9104.T","9107.T","9201.T","9202.T","9301.T","9501.T","9502.T","9503.T","9531.T","9532.T"
]

# Date parameters
START_DATE = "2007-01-01"
END_DATE = "2025-12-31"

In [4]:
# Function to fetch stock data
def fetch_stock_data(ticker, max_retries=3, initial_delay=5):
    current_delay = initial_delay
    for attempt in range(max_retries + 1):
        try:
            stock = yf.Ticker(ticker, session=session)
            data = stock.history(
                start=START_DATE,
                end=END_DATE,
                interval="1d",
                actions=False
            )
            
            if data.empty:
                print(f"No data for {ticker}")
                return None
                
            processed_data = data[['Open', 'Close', 'Volume']].reset_index()
            processed_data['Date'] = pd.to_datetime(processed_data['Date']).dt.date
            processed_data['Ticker'] = ticker  # Add Ticker column
            return processed_data
            
        except Exception as e:
            if attempt < max_retries:
                sleep_time = current_delay + random.uniform(0, 3)
                print(f"Retry {attempt+1} for {ticker} in {sleep_time:.1f}s: {str(e)}")
                time.sleep(sleep_time)
                current_delay *= 2
            else:
                print(f"Failed {ticker} after {max_retries} retries")
                return None

In [5]:
# Main function to fetch data for all companies
def main():
    all_data = []
    total_tickers = len(TICKERS)
    success_count = 0
    
    for idx, ticker in enumerate(TICKERS, 1):
        print(f"\nProcessing {ticker} ({idx}/{total_tickers})")
        
        # Random delay to avoid detection
        time.sleep(random.uniform(0.5, 1.5))
        
        data = fetch_stock_data(ticker)
        
        if data is not None:
            all_data.append(data)
            success_count += 1
            
    if all_data:
        # Merge all DataFrames
        combined_df = pd.concat(all_data, ignore_index=True)
        
        # Save merged data
        combined_df.to_csv("N225_companies_stock.csv", index=False)
        print(f"\nSuccessfully saved {len(combined_df)} rows from {success_count} companies")
        print("Columns in final dataset:", combined_df.columns.tolist())
    else:
        print("\nNo data was collected")
        
    print(f"\nSuccess rate: {success_count}/{total_tickers} ({success_count/total_tickers:.1%})")

In [6]:
# Execute the Script
if __name__ == "__main__":
    start_time = time.time()
    main()
    print(f"\nTotal execution time: {(time.time() - start_time)/60:.2f} minutes")


Processing 4151.T (1/225)

Processing 4502.T (2/225)

Processing 4503.T (3/225)

Processing 4506.T (4/225)

Processing 4507.T (5/225)

Processing 4519.T (6/225)

Processing 4523.T (7/225)

Processing 4568.T (8/225)

Processing 4578.T (9/225)

Processing 6479.T (10/225)

Processing 6501.T (11/225)

Processing 6503.T (12/225)

Processing 6504.T (13/225)

Processing 6506.T (14/225)

Processing 6526.T (15/225)

Processing 6594.T (16/225)

Processing 6645.T (17/225)

Processing 6674.T (18/225)

Processing 6701.T (19/225)

Processing 6702.T (20/225)

Processing 6723.T (21/225)

Processing 6724.T (22/225)

Processing 6752.T (23/225)

Processing 6753.T (24/225)

Processing 6758.T (25/225)

Processing 6762.T (26/225)

Processing 6770.T (27/225)

Processing 6841.T (28/225)

Processing 6857.T (29/225)

Processing 6861.T (30/225)

Processing 6902.T (31/225)

Processing 6920.T (32/225)

Processing 6952.T (33/225)

Processing 6954.T (34/225)

Processing 6971.T (35/225)

Processing 6976.T (36/225)

