## Support Party Mapping
Refer Section 5.2

In [1]:
import pandas as pd

# input file locations 
file_first_party_mapping = '../../Endpoint Mapping Data/Domain Data/first_party_mapping.csv'
file_serp_nmf_results = '../../Endpoint Mapping Data/Domain Data/Support Party Mapping/SERP_NMF_RESULTS.csv'
file_netify_support = '../../Endpoint Mapping Data/Domain Data/Support Party Mapping/Netify/netify_support_party.csv'
# output file location 
file_all_party_mapping = '../../Endpoint Mapping Data/Domain Data/all_party_mapping.csv'

# load first-party mappings
# we will ignore first-party vendor domain pair 
super_vendor_domain = pd.read_csv(file_first_party_mapping)

# load topic model to extract service type provided by each domain
# and drop unnecessary columns 
domain_labeling = pd.read_csv(file_serp_nmf_results)
domain_labeling = domain_labeling.drop(["text", 'text_google', 'text_bing', 'token'], axis=1)

# load service type collected from netify.ai
support_parties_from_netify = pd.read_csv(file_netify_support)
support_parties_from_netify.drop('org_name', axis=1, inplace=True)
support_parties_from_netify["netify"] = 2
support_parties_from_netify.rename(columns={"remote_hostname": "domain"}, inplace=True)

# merge all dataframe based on domain 
df_merged = pd.merge(super_vendor_domain, domain_labeling, on=['domain'], how='left')
df_merged = pd.merge(df_merged, support_parties_from_netify, on=['domain'], how='left')


# Please refer the cluster text file to find the cluster labels
# TF-IDF
# 8: cdn/cloud
# 12: iot/cloud service
# 14: iot platforms

# Counting
# 4: smart home/IoT
# 5: cdn/cloud
# 15: cloud service/iot


def support_party(label, label2, netify):
    if netify==2 or ((label==8 or label==12 or label==14) and (label2==4 or label2==5 or label2==15)) :
        return 2
    return 3

# map all party types (1: first party, 2: support party, 3: third party)
df_merged['party_labels'] = df_merged.apply(
    lambda row: row.first_party if row.first_party == 1 else support_party(row.target, row.target2, row.netify), axis=1)


# todo save results in file
# df_merged.to_csv(file_all_party_mapping, index=False)