In [1]:
# Install required packages
#!pip install yfinance pandas numpy torch transformers pickle-mixin pyarrow
!pip install faiss-cpu
!pip install yfinance 

import yfinance as yf
import pandas as pd
import numpy as np
import torch
import json
import pickle
import re
from datetime import datetime
from itertools import groupby
from operator import itemgetter
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM

import warnings
warnings.filterwarnings('ignore')

Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Downloading faiss_cpu-1.12.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (31.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m103.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.12.0
[0mCollecting yfinance
  Downloading yfinance-0.2.65-py2.py3-none-any.whl.metadata (5.8 kB)
Collecting multitasking>=0.0.7 (from yfinance)
  Downloading multitasking-0.0.12.tar.gz (19 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting peewee>=3.16.2 (from yfinance)
  Downloading peewee-3.18.2.tar.gz (949 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m949.2/949.2 kB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to 

In [3]:
import yfinance as yf
import pandas as pd
import time

# List of 50 companies
companies = [
    "AAPL", "MSFT", "GOOGL", "AMZN", "META", "TSLA", "NVDA", "JPM", "V", "WMT",
    "INTC", "AMD", "ADBE", "CRM", "ORCL", "IBM", "CSCO", "QCOM", "TSM", "NKE",
    "HD", "MCD", "SBUX", "DIS", "BKNG", "MA", "BAC", "WFC", "GS", "AXP",
    "JNJ", "PFE", "MRK", "ABT", "UNH", "LLY", "BA", "CAT", "HON", "GE",
    "UPS", "PG", "KO", "PEP", "XOM", "CVX", "COP", "T", "VZ", "NFLX"
]

def process_company_data(ticker, start_date, end_date):
    """Download and process data for a single company"""
    try:
        print(f"Processing {ticker}...")

        # Download data
        df = yf.download(ticker, start=start_date, end=end_date, auto_adjust=False)
        df.reset_index(inplace=True)

        # Clean column names
        df.columns = [f"{col[0]}_{col[1]}" if isinstance(col, tuple) and col[1] else col[0]
                     for col in df.columns]

        # Select and rename columns
        df = df[[
            'Date',
            f'Open_{ticker}',
            f'High_{ticker}',
            f'Low_{ticker}',
            f'Close_{ticker}',
            f'Volume_{ticker}',
            f'Adj Close_{ticker}'
        ]]

        df.rename(columns={
            f'Close_{ticker}': 'close',
            f'Open_{ticker}': 'open',
            f'High_{ticker}': 'high',
            f'Low_{ticker}': 'low',
            f'Volume_{ticker}': 'volume',
            f'Adj Close_{ticker}': 'adj_close',
            'Date': 'date'
        }, inplace=True)

        # Add ticker column
        df['ticker'] = ticker

        # Reorder columns
        df = df[['date', 'ticker', 'open', 'high', 'low', 'close', 'volume', 'adj_close']]

        # Save individual company file
        df.to_csv(f"{ticker}.csv", index=False)
        print(f"Saved {ticker}.csv")

        return df

    except Exception as e:
        print(f"Error processing {ticker}: {str(e)}")
        return None

def process_all_companies(companies, start_date, end_date):
    """Process all companies and combine into one file"""
    all_data = []

    for ticker in companies:
        df = process_company_data(ticker, start_date, end_date)
        if df is not None:
            all_data.append(df)
            time.sleep(0.2)  # Small delay to avoid rate limiting

    # Combine all data
    if all_data:
        combined_df = pd.concat(all_data, ignore_index=True)
        combined_df.to_csv("all_companies.csv", index=False)
        print("Saved combined data to all_companies.csv")
        return combined_df
    else:
        print("No data was processed successfully")
        return None

# Main execution
if __name__ == "__main__":
    start_date = "2025-01-01"
    end_date = "2025-04-30"

    # Process all companies
    final_df = process_all_companies(companies, start_date, end_date)

    if final_df is not None:
        print("\nSample of combined data:")
        print(final_df.head())
        print("\nData processing complete!")

[*********************100%***********************]  1 of 1 completed

Processing AAPL...
Saved AAPL.csv



[*********************100%***********************]  1 of 1 completed

Processing MSFT...
Saved MSFT.csv



[*********************100%***********************]  1 of 1 completed

Processing GOOGL...
Saved GOOGL.csv



[*********************100%***********************]  1 of 1 completed

Processing AMZN...
Saved AMZN.csv



[*********************100%***********************]  1 of 1 completed

Processing META...
Saved META.csv



[*********************100%***********************]  1 of 1 completed

Processing TSLA...
Saved TSLA.csv



[*********************100%***********************]  1 of 1 completed

Processing NVDA...
Saved NVDA.csv



[*********************100%***********************]  1 of 1 completed

Processing JPM...
Saved JPM.csv



[*********************100%***********************]  1 of 1 completed

Processing V...
Saved V.csv



[*********************100%***********************]  1 of 1 completed

Processing WMT...
Saved WMT.csv



[*********************100%***********************]  1 of 1 completed

Processing INTC...
Saved INTC.csv



[*********************100%***********************]  1 of 1 completed

Processing AMD...
Saved AMD.csv



[*********************100%***********************]  1 of 1 completed

Processing ADBE...
Saved ADBE.csv



[*********************100%***********************]  1 of 1 completed

Processing CRM...
Saved CRM.csv



[*********************100%***********************]  1 of 1 completed

Processing ORCL...
Saved ORCL.csv



[*********************100%***********************]  1 of 1 completed

Processing IBM...
Saved IBM.csv



[*********************100%***********************]  1 of 1 completed

Processing CSCO...
Saved CSCO.csv



[*********************100%***********************]  1 of 1 completed

Processing QCOM...
Saved QCOM.csv



[*********************100%***********************]  1 of 1 completed

Processing TSM...
Saved TSM.csv



[*********************100%***********************]  1 of 1 completed

Processing NKE...
Saved NKE.csv



[*********************100%***********************]  1 of 1 completed

Processing HD...
Saved HD.csv



[*********************100%***********************]  1 of 1 completed

Processing MCD...
Saved MCD.csv



[*********************100%***********************]  1 of 1 completed

Processing SBUX...
Saved SBUX.csv



[*********************100%***********************]  1 of 1 completed

Processing DIS...
Saved DIS.csv



[*********************100%***********************]  1 of 1 completed

Processing BKNG...
Saved BKNG.csv



[*********************100%***********************]  1 of 1 completed

Processing MA...
Saved MA.csv



[*********************100%***********************]  1 of 1 completed

Processing BAC...
Saved BAC.csv



[*********************100%***********************]  1 of 1 completed

Processing WFC...
Saved WFC.csv



[*********************100%***********************]  1 of 1 completed

Processing GS...
Saved GS.csv



[*********************100%***********************]  1 of 1 completed

Processing AXP...
Saved AXP.csv



[*********************100%***********************]  1 of 1 completed

Processing JNJ...
Saved JNJ.csv



[*********************100%***********************]  1 of 1 completed

Processing PFE...





Saved PFE.csv
Processing MRK...


[*********************100%***********************]  1 of 1 completed


Saved MRK.csv
Processing ABT...


[*********************100%***********************]  1 of 1 completed


Saved ABT.csv
Processing UNH...


[*********************100%***********************]  1 of 1 completed


Saved UNH.csv
Processing LLY...


[*********************100%***********************]  1 of 1 completed


Saved LLY.csv
Processing BA...


[*********************100%***********************]  1 of 1 completed


Saved BA.csv
Processing CAT...


[*********************100%***********************]  1 of 1 completed


Saved CAT.csv
Processing HON...


[*********************100%***********************]  1 of 1 completed


Saved HON.csv
Processing GE...


[*********************100%***********************]  1 of 1 completed


Saved GE.csv
Processing UPS...


[*********************100%***********************]  1 of 1 completed


Saved UPS.csv
Processing PG...


[*********************100%***********************]  1 of 1 completed


Saved PG.csv
Processing KO...


[*********************100%***********************]  1 of 1 completed


Saved KO.csv
Processing PEP...


[*********************100%***********************]  1 of 1 completed


Saved PEP.csv
Processing XOM...


[*********************100%***********************]  1 of 1 completed


Saved XOM.csv
Processing CVX...


[*********************100%***********************]  1 of 1 completed


Saved CVX.csv
Processing COP...


[*********************100%***********************]  1 of 1 completed


Saved COP.csv
Processing T...


[*********************100%***********************]  1 of 1 completed


Saved T.csv
Processing VZ...


[*********************100%***********************]  1 of 1 completed


Saved VZ.csv
Processing NFLX...


[*********************100%***********************]  1 of 1 completed


Saved NFLX.csv
Saved combined data to all_companies.csv

Sample of combined data:
        date ticker        open        high         low       close    volume  \
0 2025-01-02   AAPL  248.929993  249.100006  241.820007  243.850006  55740700   
1 2025-01-03   AAPL  243.360001  244.179993  241.889999  243.360001  40244100   
2 2025-01-06   AAPL  244.309998  247.330002  243.199997  245.000000  45045600   
3 2025-01-07   AAPL  242.979996  245.550003  241.350006  242.210007  40856000   
4 2025-01-08   AAPL  241.919998  243.710007  240.050003  242.699997  37628900   

    adj_close  
0  242.987427  
1  242.499161  
2  244.133347  
3  241.353226  
4  241.841461  

Data processing complete!
