# Add Fossil Classification for a given Holding File
## Prepare environment

In [1]:
import os
os.chdir('..')

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from fossil_classification import *
from enrich_holdings import *

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.options.display.float_format = "{:,.2f}".format

# Classify quarterly holdings file

In [4]:
# TODO: add better matching for non-Israeli companies, using Figi as name normalization, fall back to fuzzy matching on normalized name and then fuzzy matching on original name

In [6]:
company_or_fund_level = "company"
year = "2023"
q = "1"
folder_path = "data/downloaded reports/" + company_or_fund_level + " reports/" + year + "Q" + q + "/"
holdings_path = folder_path + "holdings_for_cls.csv"

In [7]:
classify_holdings(holdings_path)


1. Preparing holding file

** Holdings file for classification **
data/downloaded reports/company reports/2023Q1/holdings_for_cls.csv
columns: Index(['שם המנפיק/שם נייר ערך', 'מספר ני"ע', 'מספר מנפיק', 'שווי',
       'שעור מנכסי אפיק ההשקעה', 'שעור מסך נכסי השקעה', 'holding_type',
       'זירת מסחר', 'תאריך רכישה', 'ערך נקוב', 'שער', 'שעור מערך נקוב מונפק',
       'ענף מסחר', 'SystemName', 'ParentCorpName', 'ReportPeriodDesc'],
      dtype='object')


  isin_cnt = sum(df[col].astype(str).str.strip().str.contains(isin_pattern, na=False))



Holding file ISIN col is: מספר ני"ע
number of ISINs: 7452 out of 27665 rows


  pattern_cnt = sum(df[col].astype(str).str.strip().str.contains(pattern, na=False))



Holding file Israel Corp col is: מספר מנפיק
number of Israel Corp Numbers: 19146 out of 27665 rows

2. Preparing mapping files


ParserError: Error tokenizing data. C error: Calling read(nbytes) on source failed. Try engine='python'.

# Manual Review
In a google spreadsheet or excel.
Download the fully classifed file into a csv, then use it in holding_cls_path to update prev_class (see below).

## Tips
1. Look at the output of the script, review conflicting classification (by ISIN, LEI, Israeli security number)
2. Look at holdings that get is_fossil_conflict=True
3. Sort by security name, Israeli security number or ISIN for faster manual classification
4. Carefully review holdings that have only is_fossil by FFF name match, as there are false matches.
<br>Review both holdings for which all the other is_fossil_x flags are null, and such that have is_fossil by FFF = 1and other is_fossil_x = 0
5. Review holdings from suspicious industries: energy, oil and gas, utilities, materials.

# Add classification results to prev_class

In [16]:
holdings_cls_path = folder_path + "holdings_for_cls with fossil classification - reviewed.csv"
prev_class_path = "data_sources/prev_class.csv"
update_prev_class(holdings_cls_path, prev_class_path)
# prev_class_fixed = add_all_id_types_to_holdings(prev_class, tlv_s2i, isin2lei)

Adding classifications to prev_class, saving the previous version as data_sources/prev_class backup/prev_class 2023-07-17 16-00-21.csv


# Classify fund holdings
## Israeli funds
Data is scraped from https://mayaapi.tase.co.il/api/fund/details?fundId=
<br>Page address: https://maya.tase.co.il/fund/5132287?view=assets

In [None]:
import json

In [None]:
response_directory = "data/holdings_for_classification/5132287/"
response_path = response_directory + "response.json"
fund = pd.read_json(response_path, orient="index")
assets = pd.DataFrame(fund.loc["AssetCompostion"][0]['Assets'])
# holdings["AssetCompostion"].head()
cols_rename = {
    'AssetName': 'שם המנפיק/שם נייר ערך',
    'IdentityCd': 'מספר ני"ע',
    'Id': 'fund_id'
}
assets = assets.rename(cols_rename, axis=1)
assets["מספר מנפיק"] = '00'
assets["מספר תאגיד"] = '00'
assets.to_csv(response_directory+"assets.csv", index=False)

In [None]:
classify_holdings(response_directory+"assets.csv")

## holdings CSV file

In [None]:
holdings_csv_dir = "/Users/urimarom/PycharmProjects/fossil_classification/data/holdings_for_classification/IE000PSF3A70/"
holdings_filename = 'fund_weights.csv'
holdings_csv_path = holdings_csv_dir + holdings_filename
holdings = pd.read_csv(holdings_csv_path)
cols_rename = {
    'Name': 'שם המנפיק/שם נייר ערך',
    'ISIN': 'מספר ני"ע',
    'Type of Security': 'holding_type'
}
holdings = holdings.rename(cols_rename, axis=1)
# fix missing columns
holdings["מספר מנפיק"] = '00'
holdings["מספר תאגיד"] = '00'
holdings["is_fossil_prev_il_sec_num"] = np.nan
holdings.to_csv(holdings_csv_dir+"fund_weights_fixed.csv", index=False)

In [None]:
classify_holdings(holdings_csv_dir+"fund_weights_fixed.csv")

----

# Add additional screens - under construction

In [None]:
holdings_path = "data/downloaded reports/company reports/2022Q4/holdings_for_cls.csv"
holdings_ticker_col=None
holdings_company_col="שם המנפיק/שם נייר ערך"
sheet_num=0

In [None]:
# 1. prepare holdings file for classification
print("\n1. Preparing holding file")
holdings, holdings_il_sec_num_col, holdings_il_corp_col = prepare_holdings(holdings_path, sheet_num=sheet_num)
# If ticker exists, remove ticker information from instrument name
if holdings_ticker_col:
    holdings = clean_instrument_from_ticker(holdings, holdings_company_col, holdings_ticker_col)
    holdings_company_col = "company_name_cut_ticker"
# 2. prepare mapping files: TLV security number to issuer & isin to LEI for international holdings
print("\n2. Preparing mapping files")
tlv_s2i = prepare_tlv_sec_num_to_issuer(fetch_latest_tlv_sec_num_to_issuer())
isin2lei = fetch_latest_isin2lei()
# 3. enrich holdings file
print("\n3. Enriching holding file")
holdings_enriched = add_all_id_types_to_holdings(holdings, tlv_s2i, isin2lei)
if holdings_ticker_col:
    holdings_enriched = add_tlv_issuer_by_ticker(
        holdings_enriched,
        tlv_s2i,
        df_isin_col=holdings_il_sec_num_col,
        df_issuer_col="מספר מנפיק",
        df_ticker_col=holdings_ticker_col,
        mapping_heb_ticker_col="סימול(עברית)",
        mapping_eng_ticker_col="סימול(אנגלית)"
    )
# 4. prepare previously classified as is_fossil
# print("\n4. Preparing previously classified file")
# prev_class = prepare_prev_class(fetch_latest_prev_classified())
# prev_class = add_all_id_types_to_holdings(prev_class, tlv_s2i, isin2lei)
# 5. match holdings with previously classified - by ISIN, issuer or LEI
# print("\n5. Matching holdings with previously classified")
# holdings_with_prev = match_holdings_with_prev(
#     holdings_enriched,
#     prev_class,
#     holdings_il_sec_num_col
# )
# tlv = prepare_tlv(fetch_latest_tlv_list())
# holdings_with_tlv = match_holdings_with_tlv(holdings_with_prev, tlv)

In [None]:
print("\n6. Preparing Fossil Free Funds company list")
fff_all = fetch_latest_fff_list()
fff_all.head()

In [None]:
common = get_common_words_in_company_name(
        holdings_enriched,
        fff_all,
        holdings_company_col=holdings_company_col,
        fff_company_col="Company"
)

In [None]:
# flags_family = 'Deforestation Free Funds'
# flags = [c for c in fff_all.columns if c.startswith(flags_family)]
flags = [
    'Deforestation Free Funds: Producer screen', # Leave in?
    'Deforestation Free Funds: Financier screen', # Leave in?
    'Deforestation Free Funds: Consumer brand screen', # Leave in?
    'Gun Free Funds: Gun manufacturers screen',
    'Gun Free Funds: Gun retailers screen', # Leave in?
    'Weapons Free Funds: Major military contractor screen',
    'Weapons Free Funds: Cluster munitions / landmines screen',
    'Weapons Free Funds: Nuclear weapons screen',
    'Tobacco Free Funds: Tobacco producers screen'
        ]

In [None]:
# generalized version of the function - working on any set of flags
def prepare_fff(df, flags, flagged_only=False):
    # Input: Fossil Free Funds list as dataframe
    # Output:
    # map flags to 1/0 instead of Y/None
    df[flags] = df[flags].applymap(lambda x: 1 if x == 'Y' else 0)
    # define fossil criteria := any of these are true: coal, oil / gas or fossil-fired utility
    criteria = df[flags].sum(axis=1) > 0
    df['fff_flag_any'] = criteria.astype(int)
    print("\ncompanies with any of the chosen flags in Fossil Free Funds list")
    print(df['fff_flag_any'].value_counts(dropna=False))
    print("\nFlags breakdown")
    print(
        pd.crosstab(
            df[flags[0]],
            [
                df[flags[1]],
                df[flags[2]]
               ],
            rownames=[flags[0]],
            colnames=[flags[1], flags[2]],
            dropna=False
        )
    )
    df['Company'] = df['Company'].str.upper().str.strip()
    df['Tickers'] = df['Tickers'].str.upper().str.strip()
    # narrow down to companies tagged as fossil only
    if flagged_only:
        fff = df[criteria]
    else:
        fff = df
    # explode lists, to get one row per ticker
    fff = fff.assign(Tickers=fff['Tickers'].str.split(',')).explode('Tickers')
    # remove irrelevant columns
    id_cols = ["Company", "Country", "Tickers"]
    fff = fff[id_cols + flags + ['fff_flag_any']]
    fff = fff[fff['Tickers'].notnull()]
    fff['Tickers'] = fff['Tickers'].str.strip().str.upper()
    return fff

fff = prepare_fff(fff_all, flags)

In [None]:
def match_holdings_with_fff_by_company_name(
        holdings,
        fff,
        common_words_in_company,
        holdings_company_col,
        flags,
        agg_flag_name,
        fff_company_col="Company",
        min_match_threshold=60,
        flagged_match_threshold=90
):
    # prepare company names for fuzzy matching
    # remove common words (LTD, Corp etc.)
    holdings["company_clean"] = holdings[holdings_company_col].map(lambda s: clean_company(s))
    holdings["company_clean"] = remove_common_words(holdings["company_clean"], common_words_in_company)
    # TODO: maybe use ASA, PLC, INC etc. as separator? remove everything after separator if got >= n (3?) words
    holdings_company_names = holdings["company_clean"].dropna().str.upper().str.strip().unique()
    fff["company_clean"] = remove_common_words(fff[fff_company_col], common_words_in_company)
    fff["company_clean"] = fff["company_clean"].str.upper().str.strip()
    fff_company_names = fff["company_clean"].dropna().unique()
    # fuzzy matching company names
    print("\n** fuzzy matching company names ** (this could take a few minutes)")
    agg_matches = {}
    for c in holdings_company_names:
        agg_matches[c] = best_match(c, fff_company_names)
    agg_fuzzy_results = pd.DataFrame(agg_matches).transpose()
    agg_fuzzy_results.rename({0: 'fff_by_name', 1: 'company_name_match_score'}, axis=1, inplace=True)
    agg_fuzzy_results = agg_fuzzy_results[agg_fuzzy_results['company_name_match_score'] > min_match_threshold]
    # join back to fff to get fff_fossil_any
    fff_company_with_fff_flag_any = fff.groupby('company_clean').first()
    agg_fuzzy_results = pd.merge(
        left=agg_fuzzy_results,
        right=fff_company_with_fff_flag_any[flags + ['fff_flag_any']],
        left_on='fff_by_name',
        right_index=True,
        how='left'
    )
    # add fuzzy match results to holdings
    holdings_with_fuzzy = pd.merge(
        left=holdings,
        right=agg_fuzzy_results,
        left_on="company_clean",
        right_index=True,
        how='left'
    )
    holdings_with_fuzzy["is_" + agg_flag_name + "_company_name"] = holdings_with_fuzzy.apply(
        lambda row: row['fff_flag_any'] if row['company_name_match_score'] > flagged_match_threshold else np.nan,
        axis='columns'
    )
    # rename columns
    holdings_with_fuzzy = holdings_with_fuzzy.rename({'fff_flag_any': 'fff_by_name_' + agg_flag_name}, axis=1)
    # drop redundant columns
    if 'company_name_cut_ticker' in holdings_with_fuzzy.columns:
        holdings_with_fuzzy = holdings_with_fuzzy.drop(['company_name_cut_ticker'], axis=1)
    print("Matching by Company Name coverage:")
    print("classified: {} out of total holdings: {}".format(
        holdings_with_fuzzy["is_" + agg_flag_name + "_company_name"].notnull().sum(),
        holdings_with_fuzzy.shape[0]
    ))
    return holdings_with_fuzzy

In [None]:
holdings_with_fff_by_company_name = match_holdings_with_fff_by_company_name(
    holdings_enriched,
    fff,
    common_words_in_company=common,
    holdings_company_col=holdings_company_col,
    flags=flags,
    agg_flag_name='dirty',
    fff_company_col="Company"
)

In [None]:
# check coverage for non-Israeli holdings
# check coverage for non-Israeli holdings
print(
    holdings_with_fff_by_company_name.loc[
    ~holdings_with_fff_by_company_name.apply(is_il_holding, axis='columns'),
    'is_dirty_company_name'
    ].notnull().mean()
)

holdings_with_fff_by_company_name.loc[
    ~holdings_with_fff_by_company_name.apply(is_il_holding, axis='columns'),
    'is_dirty_company_name'
].value_counts(dropna=False)

In [None]:
holdings_with_fff_by_company_name['is_dirty_company_name'].value_counts(dropna=False)

In [None]:
holdings_with_fff_by_company_name.loc[
    holdings_with_fff_by_company_name["is_dirty_company_name"] == 1,
    'שווי'
].astype(float).sum()

In [None]:
holdings_with_fff_by_company_name['שווי'].astype(float).sum()

In [None]:
cols_rename = {
    'Deforestation Free Funds: Producer screen': 'Deforestation: Producer',
    'Deforestation Free Funds: Financier screen': 'Deforestation: Financier',
    'Deforestation Free Funds: Consumer brand screen': 'Deforestation: Consumer brand',
    'Gun Free Funds: Gun manufacturers screen': 'Gun manufacturers',
    'Gun Free Funds: Gun retailers screen': 'Gun retailers',
    'Weapons Free Funds: Major military contractor screen': 'Major military contractor',
    'Weapons Free Funds: Cluster munitions / landmines screen': 'Cluster munitions / landmines',
    'Weapons Free Funds: Nuclear weapons screen': 'Nuclear weapons',
    'Tobacco Free Funds: Tobacco producers screen': 'Tobacco producers'
}
holdings_with_fff_by_company_name.rename(cols_rename, axis=1, inplace=True)
holdings_with_fff_by_company_name.sort_values("is_dirty_company_name", ascending=False).to_excel("/Users/urimarom/Downloads/do_no_evil_test.xlsx")

In [None]:
# CONTINUE WORKING FROM HERE

print("\n8. Calculating is_fossil")
holdings_final = consolidate_is_fossil(holdings_with_fff_by_company_name)
# output(holdings_final, "debug_" + output_path)
# 9. propagate is_fossil across ISIN and LEI (fill in missing is_fossil according to existing ones within group)
print("\n9. Propagating is_fossil across il_sec_num, ISIN and LEI")
holdings_propagate_is_fossil = propagate_is_fossil(holdings_final, holdings_il_sec_num_col)
holdings_propagate_is_fossil = propagate_is_fossil(holdings_propagate_is_fossil, "ISIN")
holdings_propagate_is_fossil = propagate_is_fossil(holdings_propagate_is_fossil, "LEI")
holdings_propagate_is_fossil = add_is_fossil_conflict(holdings_propagate_is_fossil)
# output path = input path with 'with fossil classification' added
output_path = ''.join(holdings_path.split('.')[:-1]) + ' with fossil classification.' + holdings_path.split('.')[-1]
output(holdings_propagate_is_fossil, output_path)

# Enrich company level data before matching
Try to add identifiers that can be used for matching to holdings rather than name matching

In [None]:
import time
# mapping - get the most generalized ID (shareClassFIGI? doesn't always exist... name?!)
# TODO: try using openFIGI as name normalizer - get to figi_name from both ends
def figi_mapping_api_call(id_type, id_value):
    api_url = 'https://api.openfigi.com/v3/mapping'
    headers = {
        'Content-Type': 'application/json',
        'X-OPENFIGI-APIKEY': 'be5badc9-ca32-495a-b4f5-226da836816c'
    }
    search = {
        "idType":id_type,
        "idValue":id_value
    }
    try:
        response = requests.post(api_url, headers=headers, json=[search])
        # keep below 25 queries per 6 seconds
        time.sleep(240/1000)
        if response.status_code == 200 and 'data' in response.json()[0]:
            return pd.json_normalize(response.json()[0],'data')
        else:
            print('Error mapping {}'.format(id_value), response.status_code)
            return None
    except:
        print("*Error during mapping*")
        return None

def figi_search_api_call(search_term, raw=False):
    api_url = 'https://api.openfigi.com/v3/search'
    headers = {
        'Content-Type': 'application/json',
        'X-OPENFIGI-APIKEY': 'be5badc9-ca32-495a-b4f5-226da836816c'
    }
    search = {
        "query": search_term
    }
    try:
        response = requests.post(api_url, headers=headers, json=search)
        # keep below 25 queries per 6 seconds
        time.sleep(3)
        # return response if it's valid and not empty
        if response.status_code == 200 and 'data' in response.json():
            # raw=True mode for debugging
            if raw:
                return response.json()
            elif response.json()['data']:
                return pd.json_normalize(response.json(),'data')
            else:
                print('No results for {}'.format(search_term), response.status_code)
                return None
        else:
            print('Error searching {}'.format(search_term), response.status_code)
            return None
    except:
        print("*Error during search*")
        return None

In [None]:
from fuzzywuzzy import fuzz

def find_most_similar_string(target, string_list):
    similarity_scores = []

    for string in string_list:
        score = fuzz.ratio(target, string)
        similarity_scores.append(score)

    max_score = max(similarity_scores)
    max_index = similarity_scores.index(max_score)
    most_similar_string = string_list[max_index]

    return most_similar_string

def add_figi_name_to_row_by_ISIN(row):
    if row['ISIN']:
        figi = figi_mapping_api_call('ID_ISIN', row['ISIN'])
        # return None if no results from figi
        if figi is None:
            return None
        else:
        # taking the first returned name
            name = figi.iloc[0]['name']
            return name
    else:
        return None

def add_figi_names_to_row_by_ticker(row):
    if row['ticker']:
        figi = figi_mapping_api_call('TICKER', row['ticker'])
        # return None if no results from figi
        if figi is None:
            return None
        else:
        # return all names
            names = figi['name'].unique()
            return names
    else:
        return None

def add_figi_names_to_row_by_name(row):
    if row['name']:
        figi = figi_search_api_call(row['name'])
        # return None if no results from figi
        if figi is None:
            return None
        else:
            return figi['name'].unique()
        # taking the most similar name
#             names = figi['name']
#             most_similar_name = find_most_similar_string(row['name'], names)
#             return most_similar_name
    else:
        return None

In [None]:
target_string = "apple"
string_list = ["Apple d", "banana", "Apple d", "oranges"]

find_most_similar_string(target_string, string_list)

In [None]:
all_ISINs = pd.DataFrame(holdings_enriched['ISIN'].unique(), columns = ['ISIN'])
all_ISINs.shape[0]

In [None]:
# timed apply
from tqdm.notebook import tqdm
tqdm.pandas()

all_ISINs['FIGI_company_name'] = all_ISINs.progress_apply(add_figi_name_to_row_by_ISIN, axis='columns')

In [None]:
result_df = pd.merge(
    left=holdings_enriched,
    right=all_ISINs,
    on='ISIN',
    how='left'
)

result_df.to_csv("holdings_enriched_with_figi_names.csv")

In [None]:
print("holdings with FIGI company name: {:,.2f}%".format(100 * result_df['FIGI_company_name'].notnull().mean()))
print("holdings missing FIGI company name: {}".format(result_df['FIGI_company_name'].isnull().sum()))

In [None]:
# Join back to holdings

# write function that adds figi names to df
def add_figi_name_to_holdings_by_id_type(df, id_type, id_col_name):
    """Add openFIGI company name to a dataframe using a selected id_type that is stored in id_col_name
    :param df: DataFrame with id_col_name
    :param id_type: the id_type to be used, one of those: https://www.openfigi.com/api#v3-idType-values
    :return: df with FIGI_company_name
    """
    if id_col_name not in df.columns:
        return 'missing '+ id_col_name +' column'
    else:
        uniques = pd.DataFrame(df[id_col_name].unique(), columns = [id_col_name])
        if id_type == 'ISIN':
            uniques['FIGI_company_name'] = uniques.progress_apply(add_figi_name_to_row_by_ISIN, axis='columns')
        else:
            print('id_type {} not supported yet'.format(id_type))
        result_df = pd.merge(
            left=df,
            right=uniques,
            left_on=id_col_name,
            right_on=id_type,
            how='left'
        )
        print("holdings with FIGI company name: {:,.2f}%".format(100 * result_df['FIGI_company_name'].notnull().mean()))
        print("holdings missing FIGI company name: {}".format(result_df['FIGI_company_name'].isnull().sum()))
        return result_df

def get_figi_names_by_id_type(df, id_type, id_col_name):
    if id_col_name not in df.columns:
        return 'missing '+ id_col_name +' column'
    else:
        uniques = df[id_col_name].unique()
        matches = {}
        if id_type == 'name':
            for u in uniques:
                print("matching {} by {}".format(u, id_type))
                m = figi_search_api_call(u)
                if m is not None:
                    matches[u] = m['name'].unique()
        elif id_type == 'TICKER':
            for u in uniques:
                print("matching {} by {}".format(u, id_type))
                mapping = figi_mapping_api_call('TICKER', u)
                if mapping is not None:
                    matches[u] = mapping['name'].unique()
        else:
            print('id_type {} not supported yet').format(id_type)
            return None
    return matches


In [None]:
holdings_enriched_with_figi = add_figi_name_to_holdings_by_id_type(holdings_enriched, id_type="ISIN", id_col_name="ISIN")

## enrich FFF lists

In [None]:
fff_all = fetch_latest_fff_list()

In [None]:
# handle flags - turn into 0/1
id_cols = ["Company", "Country", "Tickers"]
flag_cols = [c for c in fff_all.columns if c not in id_cols]
fff_all[flag_cols] = fff_all[flag_cols].applymap(lambda x: 1 if x == 'Y' else 0)

fff_all.head()

In [None]:
fff_row_per_ticker = fff_all.assign(
    Tickers=fff_all['Tickers'].str.split(',')
).explode('Tickers')[id_cols].dropna(axis = 0, how = 'all')

# look for duplicates
fff_row_per_ticker = fff_row_per_ticker[fff_row_per_ticker["Tickers"].notnull()]
fff_row_per_ticker[fff_row_per_ticker["Tickers"].duplicated(keep=False)].sort_values("Tickers")

There are multiple duplicates -> tickers are not a unique identifier (not even with country)

# NEXT: 
1. do the same for FFF lists (using Ticker? maybe all tickers and get the name with max matches?)
1. check coverage of figi_name in holdings - ISINs with no match still need to be handled somehow
1. replace name match! use name match only for residues.
1. this can be used to cover other holding_types with ISIN or other IDs

In [None]:
ticker_response = figi_mapping_api_call('TICKER', 'MMM')
ticker_response

trying search instead

(maybe try to combine name and ticker?)
for each FFF name, get matching names by name search and matching names by tickers.
Take the "best" name (most common in all searches?)

In [None]:
# figi_search_api_call("Western Midstream Partners LP").iloc[0]["name"]
# figi_search_api_call("AT&S Austria Technologie & Systemtechnik AG").iloc[0]["name"]
# figi_search_api_call("3A-BestGroup JSC")
# figi_search_api_call("3M Co") # gets the wrong one, decapitalized... leaving for now
r = figi_search_api_call("A. P. Moller Maersk A/S")
df = pd.DataFrame([1])
df["figi_name"] = [r['name'].unique()]

In [None]:
fff_all.rename({"Company": "name"}, axis=1, inplace=True)

In [None]:
fff_with_figi_by_name = get_figi_names_by_id_type(fff_all, id_type="name", id_col_name="name")

In [None]:
fff_with_figi_by_name

In [None]:
import pickle

with open('fff_with_figi_by_name.pkl', 'wb') as handle:
    pickle.dump(fff_with_figi_by_name, handle, protocol=pickle.HIGHEST_PROTOCOL)

# with open('fff_with_figi_by_name.pkl', 'rb') as handle:
#     from_pickle = pickle.load(handle)

In [None]:
figi_mapping_api_call('TICKER', '0E6Y')

In [None]:
fff_with_figi_by_ticker = get_figi_names_by_id_type(fff_row_per_ticker, id_type="TICKER", id_col_name="Tickers")

In [None]:
with open('fff_with_figi_by_ticker.pkl', 'wb') as handle:
    pickle.dump(fff_with_figi_by_ticker, handle, protocol=pickle.HIGHEST_PROTOCOL)

# with open('fff_with_figi_by_name.pkl', 'rb') as handle:
#     from_pickle = pickle.load(handle)

try to match each holding to fff by name and ticker.
continue working on FFF:
1. If no ticker - only name (by best name match with fff_name?)
1. If has ticker - keep names that are in both name and ticker match

FFF name | Figi names (by ticker and name match)

* Join holdings to FFF by name
* Name match for holdings with no match

In [None]:
# ChatGPT
import requests

def match_company_names_with_lei_isin(ids, id_type):
    api_url = 'https://api.openfigi.com/v3/mapping'

    headers = {
        'Content-Type': 'application/json'
    }

    mappings = []

    for i in ids:
        mapping = {
            'idType': id_type,
            'idValue': i
        }
        mappings.append(mapping)

    request_data = {
        'mappings': mappings
    }

    response = requests.post(api_url, headers=headers, json=request_data)

    if response.status_code == 200:
        results = response.json()
        matched_companies = []
        for result in results:
            if 'data' in result:
                for data in result['data']:
                    if 'lei' in data and 'isin' in data:
                        matched_company = {
                            'company_name': data['name'],
                            'lei': data['lei'],
                            'isin': data['isin']
                        }
                        matched_companies.append(matched_company)
        return matched_companies
    else:
        print('Error:', response.status_code)
        return None

# Example usage
company_names = ['Apple Inc', 'Microsoft Corporation', 'Google LLC']
matched_results = match_company_names_with_lei_isin(company_names)

if matched_results:
    for result in matched_results:
        print('Company:', result['company_name'])
        print('LEI:', result['lei'])
        print('ISIN:', result['isin'])
        print('---')

###### 

In [None]:
fff_with_figi_by_ticker