# Core code

This notebook imports the data, cleans it, and forms the neo4j Cypher file which can be used to generate the graph database.

## Loading main company list and forming a list of company objects 

In [None]:
import pandas as pd

# Load the dataset
# Note : in future iterations of this project, the company_list.csv file will be replaced with a live API call
file_path = 'Companies/company_list.csv'
data = pd.read_csv(file_path)

# Inspect column names to adjust the script dynamically
print("Columns in the dataset:", data.columns)

# Define a Company class to represent each company's data
class Company:
    def __init__(self, name, market_cap, other_data):
        self.name = name
        self.market_cap = market_cap
        self.other_data = other_data
        self.lei = None  # Placeholder for the LEI, initially set to None

    def __repr__(self):
        return f"Company(name={self.name}, market_cap={self.market_cap}, lei={self.lei})"

# Adjust column names based on actual dataset headers
name_column = 'Name'  # Updated based on dataset
market_cap_column = 'marketcap'  # Updated based on dataset

# Convert the DataFrame into a list of Company objects
companies = []

for _, row in data.iterrows():
    company = Company(
        name=row[name_column],
        market_cap=row[market_cap_column],
        other_data=row.drop([name_column, market_cap_column]).to_dict()
    )
    companies.append(company)


# The `companies` list now holds all the company objects, and each object includes a placeholder for LEI


## Running fuzzy matching on the GLEIF LEI public API to attempt to find matching LEIs for our companies. 

In [None]:
import requests
from fuzzywuzzy import fuzz


# # Function to fetch LEI based on company name
def fetch_lei(company_name):
    base_url = "https://api.gleif.org/api/v1/lei-records"
    params = {
        "filter[entity.legalName]": company_name
    }
    response = requests.get(base_url, params=params)
    if response.status_code == 200:
        data = response.json()
        if data['data']:
            # Perform fuzzy matching to find the best candidate
            best_match = None
            best_score = 0
            for record in data['data']:
                legal_name = record['attributes']['entity']['legalName']['name']
                score = fuzz.ratio(company_name.lower(), legal_name.lower())
                if score > best_score:
                    best_score = score
                    best_match = record

            if best_match and best_score > 75:  # Threshold for a good match
                # Check to avoid holding companies
                if "Holding" not in best_match['attributes']['entity']['legalName']:
                    return best_match['id']  # LEI is in the 'id' field
            print(f"No precise LEI match for {company_name}, best score: {best_score}")
        else:
            print(f"No LEI found for {company_name}")
    else:
        print(f"Error fetching LEI for {company_name}: {response.status_code}")
    return None

# Update each company with its LEI
def update_leis(companies):
    for company in companies:
        company.lei = fetch_lei(company.name)
        print(f"Updated {company.name} with LEI: {company.lei}")

# Run the LEI update
update_leis(companies)


# The `companies` list now holds all the company objects, and each object includes a placeholder for LEI

## Printing the companies for which we have successfully found and matches the LEI

In [None]:
# Filter and print companies that have a successful LEI match
successful_companies = [company for company in companies if company.lei is not None]

for sc in successful_companies:
    print(sc)

print("Total successful matches:", len(successful_companies))

## Based on the list of companies, generating Cypher output to main.cypher. The Cypher code will create a node for each company and add the LEI if it is known. 

In [15]:
import os

# File path for the Cypher script
cypher_file = 'main.cypher'

# Delete the file if it already exists
if os.path.exists(cypher_file):
    os.remove(cypher_file)

# Open the file in write mode
with open(cypher_file, 'w') as file:
    # Write the header
    file.write("// Cypher script to create Company nodes\n")

    # Iterate through the companies and generate Cypher commands
    for company in companies:
        # Base CREATE command
        create_command = f"CREATE (:Company {{name: '{company.name}'"
        
        # Add LEI if available
        if company.lei:
            create_command += f", lei: '{company.lei}'"
        
        # Close the node declaration
        create_command += "});\n"

        # Write the command to the file
        file.write(create_command)

    # Add a query to display the whole graph
    file.write("\n// Query to display the whole graph\n")
    file.write("MATCH (n) RETURN n;\n")

print(f"Cypher script generated and saved to {cypher_file}")


Cypher script generated and saved to main.cypher
