In [None]:
pip install termcolor

In [23]:
import pandas as pd
import sqlite3
from itertools import combinations
from termcolor import colored

<b>Benefit Tier Interpretation - required for analysis</b>

In [24]:
fileName = "Datasets\\Benefit_Tier_Understanding_test.xlsx"
#fileName = "Datasets\\Benefit_Tier_Understanding_20200929.xlsx"
metadata_df = pd.read_excel(fileName,sheet_name=0, dtype=str, na_filter=False)

<b>Benefit Tier Query</b>

In [25]:
#Benefit Tier specific query
query_df = pd.read_csv("output/benefit_tier_query_20190919 V29.csv", dtype=str)
query_df['benefit_tier'] = query_df['benefit_tier'].str.upper()
query_df['benefit_tier'] = query_df['benefit_tier'].str.strip()

<b>Non Resigned Memberships</b>

In [26]:
# Non resigned membership data from Reltio
df_membership = pd.read_csv("Datasets/non_resigned_membership_test.csv", dtype=str)

In [27]:
query_df.columns

Index(['benefit_tier', 'query'], dtype='object')

<b>Community Benefits File</b>

In [28]:
# Community Benefits File
community_df = pd.read_csv("Datasets/Owned Club Community Benefits Review 09282020 Preprocessed.csv", dtype=str)

#community_df = pd.read_excel("Datasets/Owned Club Community Benefits Review 09102020.xlsx", dtype=str, sheet_name=2)

community_df['Benefit Tier'] = community_df['Benefit Tier'].str.upper()
community_df['Benefit Tier'] = community_df['Benefit Tier'].str.strip()

In [29]:
#community_df.head()

<b>Dana's Community name to Reltio community name mapping</b>

In [30]:
# Community Name & Id mapping
community_name_df = pd.read_excel("Datasets/community_name_mapping.xlsx", sheet_name=0, dtype=str)

In [32]:
%%time
conn = sqlite3.connect("communityOverlap.db")
cur = conn.cursor()

Wall time: 996 µs


In [33]:
def getStandardCommunityName(Community_Name_Dana):
    standard_community_name = ''
    standard_community_name_df = community_name_df[community_name_df['Community_Name_Business'] == Community_Name_Dana]
    if len(standard_community_name_df.index) > 0:
        standard_community_name = standard_community_name_df.iloc[0]['Community_Name_Reltio']
    return standard_community_name

In [34]:
%%time
overlap_found = []
overlap_not_computed = []
# All communities
all_communities = community_df['Community'].unique()
for community in all_communities:
    community_name = getStandardCommunityName(community)
    df = df_membership[(df_membership['Attributes.EC_Community.EC_Name']==community_name)|(df_membership['Attributes.EC_Community2.EC_Name']==community_name)]
    df.to_sql("df", conn, if_exists="replace")
    benefit_tiers = list(community_df[community_df['Community']==community]['Benefit Tier'].replace('\s+', ' ', regex=True).dropna().unique())
    
    #print (community)
    
   # if community != 'Houston':
        #continue
    #else:
       # print (benefit_tiers)
       # break
        
    
    
    if 'NAN' in benefit_tiers:
        benefit_tiers.remove('NAN')
    if 'NO COMMUNITY AT THIS TIME' in benefit_tiers:
        benefit_tiers.remove('NO COMMUNITY AT THIS TIME')
        
    benefit_tier_combinations = [comb for comb in combinations(benefit_tiers, 2)]
    for combination in benefit_tier_combinations:
        #print(community_name, combination)
        benefit_tier0 = combination[0]
        benefit_tier0 = benefit_tier0.replace('  ', ' ')
        benefit_tier1 = combination[1]
        benefit_tier1 = benefit_tier1.replace('  ', ' ')
        query0 = ''
        query1 = ''
        if len(query_df[query_df['benefit_tier'] == benefit_tier0]['query']) > 0:
            query0 = query_df[query_df['benefit_tier'] == benefit_tier0]['query'].iloc[0]
        else:
            continue
        if len(query_df[query_df['benefit_tier'] == benefit_tier1]['query']) > 0:
            query1 = query_df[query_df['benefit_tier'] == benefit_tier1]['query'].iloc[0]
        else:
            continue
            
        rs0 = pd.read_sql_query(query0, conn)
        rs1 = pd.read_sql_query(query1, conn)
        if len(rs0.index) == 0 or len(rs1.index) == 0:
            #print("Overlap not computed for," + community + "," + benefit_tier0 + ", " + benefit_tier1)
            overlap_not_computed.append({
                'community': community,
                'benefit_tier0': benefit_tier0,
                'benefit_tier1': benefit_tier1
            })
        else:
            intersection = list(set(rs0['ID']) & set(rs1['ID']))
            if len(intersection) > 0:
                print(colored(community + "," + benefit_tier0 + "," + benefit_tier1,'red'))
                overlap_found.append({
                    'community': community,
                    'benefit_tier0': benefit_tier0,
                    'benefit_tier1': benefit_tier1                    
                })

if len(overlap_found) == 0:
    print("No overlaps detected")
else:
    print("Overlap count: " + str(len(overlap_found)))

[31mDFW,YEX,HILLS AND VARSITY GOLF ACCESS AND PLAY AWAY ADV[0m
Overlap count: 1
Wall time: 53.3 s


In [35]:
if len(overlap_found) > 0:
    overlap_found_df = pd.DataFrame(overlap_found, columns=overlap_found[0].keys())
    overlap_found_df.to_csv("output/community_benefits_overlaps.csv", index=False)
else:
    print("No overlaps found, output file not generated")

In [27]:
overlap_not_computed_df = pd.DataFrame(overlap_not_computed, columns=overlap_not_computed[0].keys())
overlap_not_computed_df.head()

Unnamed: 0,community,benefit_tier0,benefit_tier1
0,ATLANTA,ATL ASSOCIATE GOLF,ATL ONE GOLF
1,ATLANTA,ATL ASSOCIATE GOLF,ATL ONE NON GOLF
2,ATLANTA,ATL ASSOCIATE GOLF,ATL SOCIETY FOR ALLIANCE CLUBS
3,ATLANTA,ATL ASSOCIATE GOLF,ATL SOCIETY FOR CCOS GOLF
4,ATLANTA,ATL ASSOCIATE GOLF,ATL SOCIETY FOR CCOS NON GOLF


In [28]:
def getBenefitTierAttributes(benefit_tier):
    rule_attributes = metadata_df[metadata_df['Benefit Tier'].str.upper() == benefit_tier]
    for index, row in rule_attributes.iterrows():
        one = row['ONE']
        category_golf = row['Category_Golf']
        category_social = row['Category_Social']
        return (one, category_golf, category_social)
    return ('', '', '')

In [29]:
overlap_suspect_details = []
named_clubs = ('TX TECH','UT CLUB','BAYLOR', 'STONEBRIDGE', 'BAY OAKS','KINGWOOD', 'WOODLANDS', 'CCOS', 'INFINITY', 'CHAMPIONS')

for index, row in overlap_not_computed_df.iterrows():
    community = row['community']
    benefit_tier0 = row['benefit_tier0']
    benefit_tier1 = row['benefit_tier1']
    one_0, category_golf_0, category_social_0 = getBenefitTierAttributes(benefit_tier0)
    one_1, category_golf_1, category_social_1 = getBenefitTierAttributes(benefit_tier1)
    
    safe = False
    if one_0 == 'Yes' and one_1 == 'No':
        safe =  True
    elif one_0 == 'No' and one_1 == 'Yes':
        safe = True
    elif category_golf_0 == 'Yes' and category_social_1 == 'Yes':
        safe = True
    elif category_golf_0 == 'Yes' and category_golf_1 == 'No':
        safe = True 
    elif category_golf_0 == 'No' and category_golf_1 == 'Yes':
        safe = True 
    elif benefit_tier0 in ['TOPGOLF','NCS','NCS CUSTOM'] or benefit_tier1 in ['TOPGOLF','NCS','NCS CUSTOM']:
        safe = True
    else:
        bnt0_in_stadium_clubs = any(s in benefit_tier0 for s in named_clubs)
        bnt1_in_stadium_clubs = any(s in benefit_tier1 for s in named_clubs)
        if bnt0_in_stadium_clubs == True and bnt1_in_stadium_clubs == True:
            safe = True
        
    overlap_suspect_details.append({
        'community': community,
        'benefit_tier0': benefit_tier0,
        'one_0': one_0,
        'category_golf_0': category_golf_0,
        'category_social_0': category_social_0,
        'benefit_tier1': benefit_tier1,
        'one_1': one_1,
        'category_golf_1': category_golf_1,
        'category_social_1': category_social_1,
        'safe': safe
    })

In [30]:
overlap_suspect_details_df = pd.DataFrame(overlap_suspect_details, columns=overlap_suspect_details[0].keys())
overlap_suspect_details_df = overlap_suspect_details_df.drop_duplicates()
overlap_suspect_details_df.to_csv('output/overlap_suspect_details.csv', index=False)
overlap_suspect_details_df.head()

Unnamed: 0,community,benefit_tier0,one_0,category_golf_0,category_social_0,benefit_tier1,one_1,category_golf_1,category_social_1,safe
0,ATLANTA,ATL ASSOCIATE GOLF,No,Yes,,ATL ONE GOLF,,Yes,,False
1,ATLANTA,ATL ASSOCIATE GOLF,No,Yes,,ATL ONE NON GOLF,,No,,True
2,ATLANTA,ATL ASSOCIATE GOLF,No,Yes,,ATL SOCIETY FOR ALLIANCE CLUBS,,,,False
3,ATLANTA,ATL ASSOCIATE GOLF,No,Yes,,ATL SOCIETY FOR CCOS GOLF,No,Yes,,False
4,ATLANTA,ATL ASSOCIATE GOLF,No,Yes,,ATL SOCIETY FOR CCOS NON GOLF,No,No,,True
