In [None]:
import pandas as pd

# Load the dataset
# Note : in future iterations of this project, the company_list.csv file will be replaced with a live API call
file_path = 'company_list.csv'
data = pd.read_csv(file_path)

# Inspect column names to adjust the script dynamically
print("Columns in the dataset:", data.columns)

# Define a Company class to represent each company's data
class Company:
    def __init__(self, name, market_cap, other_data):
        self.name = name
        self.market_cap = market_cap
        self.other_data = other_data
        self.lei = None  # Placeholder for the LEI, initially set to None

    def __repr__(self):
        return f"Company(name={self.name}, market_cap={self.market_cap}, lei={self.lei})"

# Adjust column names based on actual dataset headers
name_column = 'Name'  # Updated based on dataset
market_cap_column = 'marketcap'  # Updated based on dataset

# Convert the DataFrame into a list of Company objects
companies = []

for _, row in data.iterrows():
    company = Company(
        name=row[name_column],
        market_cap=row[market_cap_column],
        other_data=row.drop([name_column, market_cap_column]).to_dict()
    )
    companies.append(company)


# The `companies` list now holds all the company objects, and each object includes a placeholder for LEI


Columns in the dataset: Index(['Rank', 'Name', 'Symbol', 'marketcap', 'price (USD)', 'country'], dtype='object')


In [None]:
import requests
from fuzzywuzzy import fuzz


# # Function to fetch LEI based on company name
def fetch_lei(company_name):
    base_url = "https://api.gleif.org/api/v1/lei-records"
    params = {
        "filter[entity.legalName]": company_name
    }
    response = requests.get(base_url, params=params)
    if response.status_code == 200:
        data = response.json()
        if data['data']:
            # Perform fuzzy matching to find the best candidate
            best_match = None
            best_score = 0
            for record in data['data']:
                legal_name = record['attributes']['entity']['legalName']['name']
                score = fuzz.ratio(company_name.lower(), legal_name.lower())
                if score > best_score:
                    best_score = score
                    best_match = record

            if best_match and best_score > 75:  # Threshold for a good match
                # Check to avoid holding companies
                if "Holding" not in best_match['attributes']['entity']['legalName']:
                    return best_match['id']  # LEI is in the 'id' field
            print(f"No precise LEI match for {company_name}, best score: {best_score}")
        else:
            print(f"No LEI found for {company_name}")
    else:
        print(f"Error fetching LEI for {company_name}: {response.status_code}")
    return None

# Update each company with its LEI
def update_leis(companies):
    for company in companies:
        company.lei = fetch_lei(company.name)
        print(f"Updated {company.name} with LEI: {company.lei}")

# Run the LEI update
update_leis(companies)


# The `companies` list now holds all the company objects, and each object includes a placeholder for LEI

No precise LEI match for NVIDIA, best score: 50
Updated NVIDIA with LEI: None
No precise LEI match for TSMC, best score: 40
Updated TSMC with LEI: None
Updated Broadcom with LEI: 549300GSA37RXJ3GLX48
No precise LEI match for ASML, best score: 50
Updated ASML with LEI: None
No precise LEI match for Samsung, best score: 56
Updated Samsung with LEI: None
Updated AMD with LEI: 969500OFRFA9H8WHNR56
No precise LEI match for QUALCOMM, best score: 55
Updated QUALCOMM with LEI: None
No precise LEI match for Texas Instruments, best score: 72
Updated Texas Instruments with LEI: None
Updated Arm Holdings with LEI: 2138001E66EELTE7Y904
Updated Applied Materials with LEI: 41BNNE1AFPNAZELZ6K07
Updated Micron Technology with LEI: B3DXGBC8GAIYWI2Z0172
Updated Analog Devices with LEI: 529900QFKTMBQFW48N21
No precise LEI match for Lam Research, best score: 67
Updated Lam Research with LEI: None
Updated Marvell Technology Group with LEI: 8DF36O58U3QIHUCGZB18
No precise LEI match for Intel, best score: 59


In [13]:
# Filter and print companies that have a successful LEI match
successful_companies = [company for company in companies if company.lei is not None]

for sc in successful_companies:
    print(sc)

print("Total successful matches:", len(successful_companies))

Company(name=Broadcom, market_cap=831199772672, lei=549300GSA37RXJ3GLX48)
Company(name=AMD, market_cap=210251251712, lei=969500OFRFA9H8WHNR56)
Company(name=Arm Holdings, market_cap=156120793088, lei=2138001E66EELTE7Y904)
Company(name=Applied Materials, market_cap=139682889728, lei=41BNNE1AFPNAZELZ6K07)
Company(name=Micron Technology, market_cap=110535057408, lei=B3DXGBC8GAIYWI2Z0172)
Company(name=Analog Devices, market_cap=107547557888, lei=529900QFKTMBQFW48N21)
Company(name=Marvell Technology Group, market_cap=94300389376, lei=8DF36O58U3QIHUCGZB18)
Company(name=NXP Semiconductors, market_cap=55159476224, lei=724500M9BY5293JDF951)
Company(name=Monolithic Power Systems, market_cap=29250195456, lei=529900TB7O85LSZ1XU31)
Company(name=ASM International, market_cap=27091730797, lei=2549002MJCGTGYIA5817)
Company(name=GlobalFoundries, market_cap=24338925568, lei=549300BA76VK784VMX48)
Company(name=STMicroelectronics, market_cap=23676508160, lei=213800Z8NOHIKRI42W10)
Company(name=United Microel