This is a code to analyse the fundamentals of all companies in a certain ETF/Index


In [32]:
import pandas as pd
import requests
from io import BytesIO
import yfinance as yf
import os
import os.path
from urllib.parse import urlparse
from datetime import datetime, timezone, timedelta

In [33]:
def extract_SPX_holdings(url="https://www.ssga.com/us/en/intermediary/library-content/products/fund-data/etfs/us/holdings-daily-us-en-spy.xlsx", start_row=5, end_row=509, csv_filename="spy_holdings.csv"):
    """Extracts holdings data, merges with ticker info (or loads from CSV), and saves to CSV.

    Args:
        url (str, optional): URL of the Excel file.
        start_row (int, optional): Starting row of data (1-based).
        end_row (int, optional): Ending row of data (1-based).
        csv_filename (str, optional): Name of the CSV file.

    Returns:
        pd.DataFrame: Merged DataFrame or None on error.
    """

    if os.path.exists(csv_filename):
        print(f"Loading data from existing CSV: {csv_filename}")
        try:
            return pd.read_csv(csv_filename)
        except pd.errors.ParserError as e:
            print(f"Error parsing CSV file: {e}. Proceeding with data extraction.")
    
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()
         # Extract filename from URL
        parsed_url = urlparse(url)
        excel_filename = os.path.basename(parsed_url.path)
        # Save the Excel file
        with open(excel_filename, 'wb') as f:
            f.write(response.content)
        print(f"Excel file saved as: {excel_filename}")


        excel_file = BytesIO(response.content)
        df = pd.read_excel(excel_file, header=None, skiprows=start_row - 1, nrows=end_row - start_row + 1)

        df.columns = df.iloc[0].values
        df = df.iloc[1:].reset_index(drop=True)

        df['Weight'] = pd.to_numeric(df['Weight'], errors='coerce')

        info_list = []
        for _, row in df.iterrows():
            ticker = row["Ticker"]
            try:
                ticker_info = yf.Ticker(ticker).info
                info_list.append({'Ticker': ticker, **ticker_info})
            except Exception as e:
                print(f"Error retrieving info for {ticker}: {e}")
                info_list.append({'Ticker': ticker})
        info_df = pd.DataFrame(info_list)
        merged_df = pd.merge(df, info_df, on="Ticker", how="left")

        merged_df.to_csv(csv_filename, index=False)
        print(f"Data saved to {csv_filename}")
        return merged_df

    except requests.exceptions.RequestException as e:
        print(f"Error downloading file: {e}")
        return None
    except pd.errors.ParserError as e:
        print(f"Error parsing Excel file: {e}")
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None

def transforms(df):
    """Converts firstTradeDateEpochUtc to IPOdate and adds it as a new column.

    Args:
        df (pd.DataFrame): The input DataFrame with a 'firstTradeDateEpochUtc' column.

    Returns:
        pd.DataFrame: The updated DataFrame with the 'IPOdate' column, or the original DataFrame if the column is not found.
    """
    if 'firstTradeDateEpochUtc' not in df.columns:
        print("Warning: 'firstTradeDateEpochUtc' column not found. Returning original DataFrame.")
        return df

    def convert_timestamp(x):
        if pd.notna(x):
            try:
                epoch = datetime(1970, 1, 1, tzinfo=timezone.utc)
                return (epoch + timedelta(seconds=x)).strftime('%Y-%m-%d')
            except (OverflowError, OSError, TypeError):  #Catch timestamps that are too large or small and also handle potential type errors
                return None
        return None

    df['IPOdate'] = df['firstTradeDateEpochUtc'].apply(convert_timestamp)
    return df

In [None]:
# Example usage:
merged_data = extract_and_merge_holdings()

if merged_data is not None:
    print(merged_data.head())
else:
    print("Failed to retrieve and process data.")

#Call the function again, demonstrating that it loads from CSV
merged_data_from_csv = extract_and_merge_holdings()
if merged_data_from_csv is not None:
    print("Data loaded from CSV:")
    print(merged_data_from_csv.head())
else:
    print("Failed to load from CSV")



Loading data from existing CSV: spy_holdings.csv
                         Name Ticker Identifier    SEDOL    Weight Sector  \
0                   APPLE INC   AAPL  037833100  2046251  7.631114      -   
1                 NVIDIA CORP   NVDA  67066G104  2379504  6.725229      -   
2              MICROSOFT CORP   MSFT  594918104  2588173  6.386520      -   
3              AMAZON.COM INC   AMZN  023135106  2000019  4.191108      -   
4  META PLATFORMS INC CLASS A   META  30303M102  B7TL820  2.590474      -   

   Shares Held Local Currency                   address1         city  ...  \
0  188106137.0            USD         One Apple Park Way    Cupertino  ...   
1  305258931.0            USD  2788 San Tomas Expressway  Santa Clara  ...   
2   92521924.0            USD          One Microsoft Way      Redmond  ...   
3  116458138.0            USD     410 Terry Avenue North      Seattle  ...   
4   27128488.0            USD                 1 Meta Way   Menlo Park  ...   

  earningsGrowth re

In [34]:
print(transforms(extract_and_merge_holdings()).head())

Loading data from existing CSV: spy_holdings.csv
                         Name Ticker Identifier    SEDOL    Weight Sector  \
0                   APPLE INC   AAPL  037833100  2046251  7.631114      -   
1                 NVIDIA CORP   NVDA  67066G104  2379504  6.725229      -   
2              MICROSOFT CORP   MSFT  594918104  2588173  6.386520      -   
3              AMAZON.COM INC   AMZN  023135106  2000019  4.191108      -   
4  META PLATFORMS INC CLASS A   META  30303M102  B7TL820  2.590474      -   

   Shares Held Local Currency                   address1         city  ...  \
0  188106137.0            USD         One Apple Park Way    Cupertino  ...   
1  305258931.0            USD  2788 San Tomas Expressway  Santa Clara  ...   
2   92521924.0            USD          One Microsoft Way      Redmond  ...   
3  116458138.0            USD     410 Terry Avenue North      Seattle  ...   
4   27128488.0            USD                 1 Meta Way   Menlo Park  ...   

  revenueGrowth gro

In [40]:
yf_ticker = yf.Ticker('AAPL')
print( yf_ticker.institutional_holders)

  Date Reported                             Holder  pctHeld      Shares  \
0    2023-06-30                 Vanguard Group Inc   0.0834  1303688506   
1    2023-06-30                     Blackrock Inc.   0.0665  1039640859   
2    2023-06-30            Berkshire Hathaway, Inc   0.0586   915560382   
3    2023-06-30           State Street Corporation   0.0370   578897858   
4    2023-06-30                           FMR, LLC   0.0196   307066638   
5    2023-06-30      Geode Capital Management, LLC   0.0186   291538165   
6    2023-06-30      Price (T.Rowe) Associates Inc   0.0145   226650943   
7    2023-06-30                     Morgan Stanley   0.0131   204714950   
8    2022-12-31  Norges Bank Investment Management   0.0107   167374278   
9    2023-06-30         Northern Trust Corporation   0.0105   164536073   

          Value  
0  252876459508  
1  201659137420  
2  177591247296  
3  112288817516  
4   59561715772  
5   56549657865  
6   43963483413  
7   39708558851  
8   21746939