<H2>Benefit Contract Data Generation</b>

In [1]:
import pandas as pd
import sqlite3
import re
import hashlib

<b>Read membership data from Reltio export</b>

In [2]:
df_non_resigned = pd.read_csv("datasets/prod_non_resigned_membership_20193001.csv", dtype=str)

<b>Read Community name mappings</b>

In [3]:
community_names_df = pd.read_excel("datasets/Community Name Mapping.xlsx", sheet_name=0, dtype=str)
community_names_df['Community_Name_Dana'] = community_names_df['Community_Name_Dana'].apply(lambda x: x.strip().upper())
community_names_df['Community_Name_Dana'] = community_names_df['Community_Name_Dana'].apply(lambda x: ' '.join(x.split()))

In [4]:
community_names_df.columns

Index(['Community_ID', 'Community_Name_Reltio', 'Community_Name_Dana'], dtype='object')

<b>Read Community Benefit File<b>

In [5]:
community_benefit_df = pd.read_csv("datasets/Owned Club Community Benefits Review 20181220 V8 Preprocessed.csv", dtype=str)
community_benefit_df['Benefit Tier'] = community_benefit_df['Benefit Tier'].apply(lambda x: x.strip().upper())
community_benefit_df['Benefit Tier'] = community_benefit_df['Benefit Tier'].apply(lambda x: ' '.join(x.split()))

<b>Read benefit tier queries</b>

In [6]:
query_df = pd.read_csv("datasets/benefit_tier_query_20190129 V18.csv", dtype=str)
query_df['benefit_tier'] = query_df['benefit_tier'].str.upper()
query_df['benefit_tier'] = query_df['benefit_tier'].str.strip()

In [7]:
query_df.columns

Index(['benefit_tier', 'query'], dtype='object')

<b>Loading datasets in SQLite</b>

In [8]:
%%time
conn = sqlite3.connect("benefitContract.db")

Wall time: 25.7 ms


<b>Benefit contract generation</b>

In [9]:
benefit_contracts = []
membership_type_and_benefit_tier = set()
all_communities = community_benefit_df['Community'].dropna().unique()
for community_name_dana in all_communities:
    print(community_name_dana)
    community_name_reltio = community_names_df[community_names_df['Community_Name_Dana'] == community_name_dana]['Community_Name_Reltio'].iloc[0]
    community_id = community_names_df[community_names_df['Community_Name_Dana'] == community_name_dana]['Community_ID'].iloc[0]
    
    if community_name_reltio == 'My Compass Southern Community':
        continue
        
    df = df_non_resigned[(df_non_resigned['Attributes.EC_Community.EC_Name'] == community_name_reltio) | (df_non_resigned['Attributes.EC_Community2.EC_Name'] == community_name_reltio)]
    df.to_sql("df", conn, if_exists="replace")

    benefit_tiers = community_benefit_df[community_benefit_df['Community']==community_name_dana]['Benefit Tier'].dropna().unique()
    #print(benefit_tiers)
    for benefit_tier in benefit_tiers:
        benefit_tier = benefit_tier.strip()
        benefit_tier = benefit_tier.replace('  ',' ')
        benefit_tier = benefit_tier.upper()
        print("Generating benefit contract for : " + community_name_reltio + "," + benefit_tier)
        query_str = query_df[query_df['benefit_tier'] == benefit_tier]['query'].iloc[0]
        query_all_columns = query_str.replace('select df.ID', 'select df.*')
        rs = pd.read_sql_query(query_all_columns, conn)
        if len(rs.index) > 0:
            
            for membership_index, membership_row in rs.iterrows():
                
                membership_uri = membership_row['ID']
                membership_number = membership_row['Attributes.EC_Membership_Number']
                entity_code = membership_row['Attributes.CCNative.ENTITY_CODE']
                membership_type_id = membership_row['Attributes.EC_MP_Detail.MBRSHIP_TYPE_ID']
                membership_category_code = membership_row['Attributes.EC_MP_Detail.MBRSHIP_CATEGORY_CODE']
                #community_1 = membership_row['Attributes.EC_Community.EC_Name']
                #community_2 = membership_row['Attributes.EC_Community2.EC_Name']
                
                benefit_contracts.append({'membership_uri':membership_uri,'membership_number':membership_number,'entity_code':entity_code, 'membership_type_id':membership_type_id, 'membership_category_code' : membership_category_code , 'benefit_tier': benefit_tier,'community': community_name_reltio,'community_id': community_id})
                membership_type_and_benefit_tier.add((membership_type_id,membership_category_code,benefit_tier))


ATLANTA
Generating benefit contract for : My Atlanta Community,ATL ASSOCIATE GOLF
Generating benefit contract for : My Atlanta Community,ATL ONE GOLF
Generating benefit contract for : My Atlanta Community,ATL ONE NON GOLF
Generating benefit contract for : My Atlanta Community,ATL SOCIETY FOR ALLIANCE CLUBS
Generating benefit contract for : My Atlanta Community,ATL SOCIETY FOR CCOS GOLF
Generating benefit contract for : My Atlanta Community,ATL SOCIETY FOR CCOS NON GOLF
Generating benefit contract for : My Atlanta Community,ATL SOCIETY FOR LEGACY CLUBS
Generating benefit contract for : My Atlanta Community,ATLANTA SIGNATURE GOLF
Generating benefit contract for : My Atlanta Community,CHAMPIONS
Generating benefit contract for : My Atlanta Community,CHAMPIONS ONE
Generating benefit contract for : My Atlanta Community,INFINITY
Generating benefit contract for : My Atlanta Community,INFINITY - ONE
Generating benefit contract for : My Atlanta Community,ONE BC RESIDENT
Generating benefit contra

Generating benefit contract for : My Orlando Community,ALL COMMUNITY PROGRAMMING
PHOENIX
Generating benefit contract for : My Phoenix Community,ALL COMMUNITY PROGRAMMING
PITTSBURGH
Generating benefit contract for : My Pittsburgh Community,ALL COMMUNITY PROGRAMMING
TAMPA BAY
Generating benefit contract for : My Tampa Bay Community,CROSS BAY
Generating benefit contract for : My Tampa Bay Community,ONE
Generating benefit contract for : My Tampa Bay Community,SOCIETY
LONESTAR
Generating benefit contract for : My Lone Star Community - Austin/San Antonio,ONE BAYLOR - AUSTIN
Generating benefit contract for : My Lone Star Community - Austin/San Antonio,ONE BAYLOR - SAN ANTONIO
Generating benefit contract for : My Lone Star Community - Austin/San Antonio,ONE
Generating benefit contract for : My Lone Star Community - Austin/San Antonio,ONE TX TECH - AUSTIN
Generating benefit contract for : My Lone Star Community - Austin/San Antonio,ONE TX TECH - SAN ANTONIO
Generating benefit contract for : My 

<b>Save Benefit Contract Data</b>

In [10]:
benefit_contract_df = pd.DataFrame(benefit_contracts)
benefit_contract_df = benefit_contract_df[['membership_uri','membership_number','entity_code','membership_type_id', 'membership_category_code', 'benefit_tier','community','community_id']]
benefit_contract_df.to_csv("output\community_benefit_contracts_initial.csv", index=False)

In [11]:
membership_type_and_benefit_tier_df = pd.DataFrame(list(membership_type_and_benefit_tier), columns=['membership_type_id','membership_category_code','benefit_tier'])
membership_type_and_benefit_tier_df.to_csv("output\membership_type_and_benefit_tier_df.csv", index=False)

<b>Duplicate Membership Type IDs - this means benefit tier interpretation problem<b/>

In [12]:
## Printing the duplicate MembershipIds
print(membership_type_and_benefit_tier_df[membership_type_and_benefit_tier_df.duplicated(['membership_type_id','membership_category_code'], keep=False)].groupby(('membership_type_id', 'membership_category_code')).count())

print(membership_type_and_benefit_tier_df[membership_type_and_benefit_tier_df.duplicated(['membership_type_id','membership_category_code'], keep=False)].groupby(('membership_type_id', 'membership_category_code')).min())
print(membership_type_and_benefit_tier_df[membership_type_and_benefit_tier_df.duplicated(['membership_type_id','membership_category_code'], keep=False)].groupby(('membership_type_id', 'membership_category_code')).max())



                                             benefit_tier
membership_type_id membership_category_code              
102000476          01                                   2
112000476          04                                   2
114000476          01                                   2
119000476          01                                   2
120000476          01                                   2
128000476          01                                   2
129000476          01                                   2
130000476          01                                   2
139000476          01                                   2
143000476          01                                   2
145000476          01                                   2
146000476          01                                   2
196002810          01                                   2
199002832          01                                   2
50000476           01                                   2
51000476      

<b>Generating Benefit Contract with all the required values With All</b>

In [13]:
pattern = re.compile(r'\s+')
def generateIdBasedOnHashBenefitContract(df):
    benefit_contract_ids = []
    for index, row in df.iterrows():
        community_id = row['community_id']
        text = row['benefit_tier'] + "|" + community_id + "|" + row['membership_number']+ "|" + row['entity_code'] 
        benefit_contract_ids.append(generateIdBasedOnHash(text))
    return benefit_contract_ids

In [14]:
def generateIdBasedOnHashBenefitPackage(df):
    benefit_contract_ids = []
    for index, row in df.iterrows():
        community_id = row['community_id']
        text = community_id + "|" + row['benefit_tier']
        benefit_contract_ids.append(generateIdBasedOnHash(text))
    return benefit_contract_ids

In [15]:
pattern = re.compile(r'\s+')
def generateIdBasedOnHash(text):
    normalized_text = text.strip().lower()
    normalized_text = re.sub(pattern, '_', normalized_text)
    return str(hashlib.md5(normalized_text.encode('utf-8')).hexdigest())

In [16]:
benefit_contract_initial_df = pd.read_csv("output/community_benefit_contracts_initial.csv", dtype=str)

benefit_contract_initial_df['benefit_package_id'] = generateIdBasedOnHashBenefitPackage(benefit_contract_initial_df)
benefit_contract_initial_df['benefit_contract_id'] = generateIdBasedOnHashBenefitContract(benefit_contract_initial_df)
benefit_contract_initial_df['name'] = benefit_contract_initial_df.apply(lambda row: row.benefit_tier + "_" + row.community + "(" + row.membership_number + ")", axis=1)
benefit_contract_initial_df['description'] = benefit_contract_initial_df.apply(lambda row: row.benefit_tier + "_" + row.community + "(" + row.membership_number+ "-" + row.entity_code + ")", axis=1)
benefit_contract_initial_df['type'] = "Community"
benefit_contract_initial_df['active'] = "Y"

benefit_contract_initial_df.to_csv("output\community_benefit_contracts_allValues.csv", index=False)

<b>Close database connection</b>

In [17]:
conn.close()

<b>Removing Duplicate Ids</b>

In [18]:
benefit_contract_id_unique = benefit_contract_initial_df['benefit_contract_id'].is_unique
print("Benefit Contract Id Uniqueness : " + str(benefit_contract_id_unique))

if not benefit_contract_id_unique:
    #Getting the duplicate Ids
    sub_set = benefit_contract_initial_df[['benefit_contract_id']]
    dulicateBenefitContractIds = sub_set[sub_set.duplicated(['benefit_contract_id'], keep=False)].groupby(('benefit_contract_id')).max().index.values
    print(dulicateBenefitContractIds)
    
    
    condition = benefit_contract_initial_df['benefit_contract_id'].isin(dulicateBenefitContractIds)
    df_unique_values = benefit_contract_initial_df[(~condition)]
    df_duplicate_values = benefit_contract_initial_df[(condition)]
    
    print(len(benefit_contract_initial_df.index))
    print(len(df_unique_values.index))
    print(len(df_duplicate_values.index))
    
    df_unique_values.to_csv("output\community_benefit_contracts_allValues_unique.csv", index=False)
    df_duplicate_values.to_csv("output\community_benefit_contracts_allValues_duplicate.csv", index=False)
          
          
    

Benefit Contract Id Uniqueness : False
['11cfe081da9fa3dc8fce53abae0650bc' '444e853f566b6ac94c541b55a172e547'
 '5efff61abc8d6523971ba33f3bb338e7' 'c972536421a6a2027d424ec02d20aa4e'
 'e31585ce45694c89cb226d5231493c89']
94852
94842
10
