In [3]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
import os

In [6]:

# Defining sectors
sectors = {
    "Technology": ['AAPL', 'MSFT', 'ORCL', 'CRM', 'INTU', 'ADBE', 'AVGO', 'TXN', 'AMD', 'MU'],
    "Consumer_Discretionary": ['AMZN', 'TSLA', 'HD', 'MCD', 'NKE', 'SBUX', 'TGT', 'LOW', 'BKNG', 'RCL'],
    "Communication_Services": ['GOOGL', 'META', 'NFLX', 'T', 'VZ', 'DIS', 'TMUS', 'CHTR', 'WBD', 'PARA'],
    "Healthcare": ['JNJ', 'PFE', 'MRK', 'ABT', 'LLY', 'UNH', 'TMO', 'BMY', 'AMGN', 'CVS'],
    "Financials": ['JPM', 'BAC', 'WFC', 'GS', 'MS', 'AXP', 'C', 'BLK', 'TFC', 'SCHW'],
    "Industrials": ['HON', 'UPS', 'CAT', 'BA', 'GE', 'MMM', 'LMT', 'DE', 'RTX', 'CSX'],
    "Energy": ['XOM', 'CVX', 'COP', 'SLB', 'PSX', 'EOG', 'MPC', 'VLO', 'BKR', 'OXY']
}

# Defining indices
indices = {
    "S&P_500": '^GSPC',
    "NASDAQ_Composite": '^IXIC',
    "Dow_Jones": '^DJI'
}

 # Combining all tickers
all_tickers = [ticker for sector in sectors.values() for ticker in sector] + list(indices.values())

# Define timeframe
start_date = "2024-07-01"
end_date = "2025-06-30"  # Adjust the end date as needed, e.g.,
# end_date =  datetime.today().strftime('%Y-%m-%d')  # Uncomment to use today's date

# Download historical data
data = yf.download(all_tickers, start=start_date, end=end_date, auto_adjust=False)['Adj Close']
df = data.reset_index().melt(id_vars='Date', var_name='Ticker', value_name='Adj_Close')
# Save raw data
df.to_csv("~/portfolio_projects/stocks/Data/multi_sector_with_indices.csv")

[                       0%                       ]

[*********************100%***********************]  73 of 73 completed


In [8]:
# Prepare metadata
metadata = []

for sector, tickers in sectors.items():
    for ticker in tickers:
        metadata.append({"Ticker": ticker, "Sector": sector, "Type": "Stock"})

for name, ticker in indices.items():
    metadata.append({"Ticker": ticker, "Sector": name, "Type": "Index"})

metadata_df = pd.DataFrame(metadata)
metadata_df.to_csv("~/portfolio_projects/stocks/Data/ticker_metadata.csv", index=False)

In [9]:
merged_df = pd.merge(df, metadata_df, on="Ticker", how="left")

In [10]:
merged_df.to_csv("~/portfolio_projects/stocks/Data/merged_sector_index_data.csv", index=False)