## Resources: 
- https://huggingface.co/spaces/mteb/leaderboard (Looking for open source embedding models)
- 

In [14]:
import yfinance as yf
import pandas as pd
from datetime import datetime
import os

# Create a directory to save the data
if not os.path.exists('financial_data'):
    os.makedirs('financial_data')

# Define the list of major AI/tech/social media companies with their ticker symbols
tech_tickers = {
    # Big Tech / AI companies
    'NVIDIA': 'NVDA',
    'Apple': 'AAPL',
    'Microsoft': 'MSFT',
    'Alphabet': 'GOOGL',
    'Amazon': 'AMZN',
    'Meta': 'META',
    'Tesla': 'TSLA',
    'IBM': 'IBM',
    'Intel': 'INTC',
    'AMD': 'AMD',
    'Oracle': 'ORCL',
    'Salesforce': 'CRM',
    'Adobe': 'ADBE',
    'Palantir': 'PLTR',
    'C3.ai': 'AI',
    'Baidu': 'BIDU',
    'Taiwan Semi': 'TSM',
    
    # Social Media and Content Platforms
    'Snap': 'SNAP',
    'Pinterest': 'PINS',
    'Reddit': 'RDDT',
    'Match Group': 'MTCH',
    'Bumble': 'BMBL',
    'Roblox': 'RBLX',
    'Unity': 'U',
    'Tencent': 'TCEHY',
    
    # Digital Advertising / Social Media Adjacent
    'Trade Desk': 'TTD',
    'Spotify': 'SPOT',
    'Roku': 'ROKU',
    
    # ByteDance/TikTok investors
    'SoftBank': 'SFTBY',
    'KKR': 'KKR',
}

# Set the time period for data
start_date = '2020-01-01'
end_date = datetime.now().strftime('%Y-%m-%d')

def download_data():
    """Download and save stock data for all companies"""
    print(f"Downloading financial data from {start_date} to {end_date}...")
    
    # Download data for all companies at once
    all_data = yf.download(list(tech_tickers.values()), start=start_date, end=end_date)
    
    # Save the complete data to a CSV file
    all_data.to_csv('financial_data/tech_stocks.csv')
    print(f"Saved complete data to financial_data/all_tech_stocks_data.csv")
    
    return all_data

if __name__ == "__main__":
    print("Tech/AI/Social Media Companies Data Download")
    print("=" * 50)
    
    # Download the data
    all_data = download_data()
    
    print("\nData download complete!")
    print(f"Data for {len(tech_tickers)} companies downloaded and saved to 'financial_data/all_tech_stocks_data.csv'")
    print(f"Date range: {start_date} to {end_date}")
    print(f"Total trading days: {len(all_data)}")
    
    # Print column information to help with data access
    print("\nData structure information:")
    print(f"Data columns (multiindex): {list(all_data.columns.values)}")
    print("\nTo access specific data in pandas, use:")
    print("  df['Close']['NVDA']  # For NVIDIA closing prices")
    print("  df.loc['2023-01-15']  # For all data on a specific date")

Tech/AI/Social Media Companies Data Download
Downloading financial data from 2020-01-01 to 2025-04-14...


[*********************100%***********************]  30 of 30 completed


Saved complete data to financial_data/all_tech_stocks_data.csv

Data download complete!
Data for 30 companies downloaded and saved to 'financial_data/all_tech_stocks_data.csv'
Date range: 2020-01-01 to 2025-04-14
Total trading days: 1327

Data structure information:
Data columns (multiindex): [('Close', 'AAPL'), ('Close', 'ADBE'), ('Close', 'AI'), ('Close', 'AMD'), ('Close', 'AMZN'), ('Close', 'BIDU'), ('Close', 'BMBL'), ('Close', 'CRM'), ('Close', 'GOOGL'), ('Close', 'IBM'), ('Close', 'INTC'), ('Close', 'KKR'), ('Close', 'META'), ('Close', 'MSFT'), ('Close', 'MTCH'), ('Close', 'NVDA'), ('Close', 'ORCL'), ('Close', 'PINS'), ('Close', 'PLTR'), ('Close', 'RBLX'), ('Close', 'RDDT'), ('Close', 'ROKU'), ('Close', 'SFTBY'), ('Close', 'SNAP'), ('Close', 'SPOT'), ('Close', 'TCEHY'), ('Close', 'TSLA'), ('Close', 'TSM'), ('Close', 'TTD'), ('Close', 'U'), ('High', 'AAPL'), ('High', 'ADBE'), ('High', 'AI'), ('High', 'AMD'), ('High', 'AMZN'), ('High', 'BIDU'), ('High', 'BMBL'), ('High', 'CRM'), ('H