In [1]:
import pandas as pd
import numpy as np
import re

In [2]:
fileName = "Datasets\\Benefit_Tier_Understanding_20190919 V30 With Atlanta, no LS.xlsx"
metadata_df = pd.read_excel(fileName,sheet_name=0, dtype=str, na_filter=False)

In [3]:
list(metadata_df.columns)

['Benefit Tier',
 'Community_Flag',
 'Member_Of',
 'Not_Member_Of',
 'Exclude_Benefit_Tiers',
 'Community',
 'Ignore_Community',
 'RS_Ben_Prog_Name',
 'RS_Ben_Prog_Name_Not',
 'ONE',
 'Category_Golf',
 'Category_Social',
 'Mbrship_type_desc_contains',
 'Mbrship_type_desc_equals',
 'Mbrship_type_desc_does_not_have']

In [4]:
query_prefix = "select * from where "

In [5]:
entity_codes = set()
entity_codes.update(['a','b'])
entity_codes

{'a', 'b'}

In [6]:
def getClubList(member_of):
    tokens = member_of.split(",")
    entity_codes = set()
    for token in tokens:
        if token == 'Atlanta_Business_Clubs':
            entity_codes.update(['00730','02506'])
        elif token == 'Atlanta_Legacy_Clubs':
            entity_codes.update(['01682','00730','00641','02506','02505','01593','01592','01668','00622'])
        elif token == 'Atlanta_Infinity_Clubs':
            entity_codes.update(['02851','02806','02808','02810','02832','02812','02822','02836','02816',
                                 '00622','02834','02818','02824','02840','02838','02814','02830','02820'])
        elif token == 'Houston_Canongate_Clubs':
            entity_codes.update(['02863','02861','02865','02859','02855'])
        elif token == 'Atlanta_Canongate_Clubs':
            entity_codes.update(['02847','02849','02845','02843','02851','02806','02838','02808','02810',
                                 '02832','02812','02822','02836','02816','02834','02840','02818','02824',
                                 '02814','02820','02868','02866'])
        elif token == 'DFW_Elite_Clubs':
            entity_codes.update(['00268','00002','00245','01162'])
        elif token == 'Houston_Legacy_Clubs':
            entity_codes.update(['00116','00153','00164','00112','00298','00192','02604'])
        elif token == 'Champions_Houston_Atlanta':
            entity_codes.update(['02847','02845','02849','02843','02855'])
        elif token == 'YEX_Houston_Dallas':
            entity_codes.update(['00116','00153','00164','02859','02861','00112','02859',
                                 '00298','00192','02604','00100','00102','00170','00238',
                                 '00574','01582','02701','01584','00128','00430','00648'])
        else:
            entity_codes.add(token)
    
    club_list_str = str(list(entity_codes))
    club_list_str = club_list_str.replace("[","")
    club_list_str = club_list_str.replace("]","")
    return club_list_str

def getMemberOfPredicate(member_of):
    club_list = getClubList(member_of)
    return "df.'Attributes.CCNative.ENTITY_CODE' in (" + club_list + ")"  

def getNotMemberOfPredicate(not_member_of):
    club_list = getClubList(not_member_of)
    return "df.'Attributes.CCNative.ENTITY_CODE' not in (" + club_list + ")"  

def getResortSuitBenefitProgramPredicate(benefitPrograms):
    benefitPrograms_str = str(benefitPrograms.split(","))
    benefitPrograms_str = benefitPrograms_str.replace("[","")
    benefitPrograms_str = benefitPrograms_str.replace("]","")
    return "df.'Attributes.EC_MP_Detail.RESORTSUITE_BENEFIT_PROGRAM_NAME' in (" + benefitPrograms_str + ")"  

def getResortSuitBenefitProgramIgnorePredicate(resort_suite_bef_prog_ignore_names):
    benefitPrograms_str = str(resort_suite_bef_prog_ignore_names.split(","))
    benefitPrograms_str = benefitPrograms_str.replace("[","")
    benefitPrograms_str = benefitPrograms_str.replace("]","")
    return "df.'Attributes.EC_MP_Detail.RESORTSUITE_BENEFIT_PROGRAM_NAME' not in (" + benefitPrograms_str + ")"     

def getOnePredicate(one):
    if one == "Yes":
        return "df.'Attributes.EC_MP_Detail.MP_ONE_FLAG' = 'Y'"
    else:
        return "ifnull(df.'Attributes.EC_MP_Detail.MP_ONE_FLAG','N') = 'N'" 

def getCategoryGolfPredicate(category_golf):
    if category_golf == 'Yes':
        return "df.'Attributes.EC_MP_Detail.MBRSHIP_CATEGORY_CODE' = '01'"
    else:
        return "df.'Attributes.EC_MP_Detail.MBRSHIP_CATEGORY_CODE' != '01'"

def getCategorySocialPredicate(category_social):
    if category_social == 'Yes':
        return "df.'Attributes.EC_MP_Detail.MBRSHIP_CATEGORY_CODE' = '04'"
    else:
        return "df.'Attributes.EC_MP_Detail.MBRSHIP_CATEGORY_CODE' != '04'"   
    
def getMbrshipTypeDescContainsPredicate(mbrship_type_desc_contains):
    predicate = ""
    mbrship_type_descriptions = mbrship_type_desc_contains.split(",")
    for mbrship_type_description in mbrship_type_descriptions:
        
        if len(predicate) != 0:
            predicate = predicate + ' or '
        if '-' in mbrship_type_description:
            mbrship_type_description = mbrship_type_description.replace('-','#-') #Chinmoy - taking care of -
            predicate = predicate + "df.'Attributes.EC_MP_Detail.MBRSHIP_TYPE_DESCRIPTION' like '%" + mbrship_type_description.strip() + "%'" + " ESCAPE '#'"    
        else:
            predicate = predicate + "df.'Attributes.EC_MP_Detail.MBRSHIP_TYPE_DESCRIPTION' like '%" + mbrship_type_description.strip() + "%'"
    predicate = '(' + predicate + ")"
    return predicate

def getMbrshipTypeDescDoesNotHavePredicate(mbrship_type_desc_does_not_have):
    predicate = ""
    mbrship_type_descriptions = mbrship_type_desc_does_not_have.split(",")
    for mbrship_type_description in mbrship_type_descriptions:
        if len(predicate) != 0:
            predicate = predicate + ' and '
        predicate = predicate + "df.'Attributes.EC_MP_Detail.MBRSHIP_TYPE_DESCRIPTION' not like '%" + mbrship_type_description.strip() + "%'"
    predicate = '(' + predicate + ")"
    return predicate

def getMbrshipTypeDescEqualsPredicate(mbrship_type_desc_equals):
    return "df.'Attributes.EC_MP_Detail.MBRSHIP_TYPE_DESCRIPTION' = '" + mbrship_type_desc_equals + "'" 

In [7]:
#getNotMemberOfPredicate("Atlanta_Legacy_Clubs,Canongate_Clubs,Atlanta_Business_Clubs,01592")

In [8]:
def getCommunityPredicate(communities):
    community_names = str(communities.split('#'))
    community_names = community_names.replace("[","")
    community_names = community_names.replace("]","")
    return "(df.'Attributes.EC_Community.EC_Name' in (" + community_names + ") or df.'Attributes.EC_Community2.EC_Name' in (" + community_names + "))"

In [9]:
def getIgnoreCommunityPredicate(communities):
    community_names = str(communities.split('#'))
    community_names = community_names.replace("[","")
    community_names = community_names.replace("]","")
    return "(df.'Attributes.EC_Community.EC_Name' not in (" + community_names + ") and df.'Attributes.EC_Community2.EC_Name' not in (" + community_names + "))"

In [10]:
def addPredicate(buffer, predicate):
    if len(buffer) != 0:
        buffer = buffer + " and " + predicate
    else:
        buffer = buffer + predicate
    return buffer

In [11]:
def getBenefitTierQuery(benefit_tire, query_list):
    for data in query_list:
        if data.get('benefit_tier') == benefit_tire:
            return data.get('query')
    return ""

In [12]:
benefit_tier_and_query = []
for index, row in metadata_df.iterrows():
    query_predicate = ""
    benefit_tier = row['Benefit Tier']
    
    community_flag = row['Community_Flag']
    if community_flag:
        query_predicate = addPredicate(query_predicate, "df.'Attributes.EC_MP_Detail.MBRSHIP_COMMUNITY_FLAG' = '" + community_flag + "'")
        
    member_of = row['Member_Of']
    if member_of:
        query_predicate = addPredicate(query_predicate, getMemberOfPredicate(member_of))

    not_member_of = row['Not_Member_Of']
    if not_member_of:
        query_predicate = addPredicate(query_predicate, getNotMemberOfPredicate(not_member_of))
        
    community = row['Community']
    if community:
        query_predicate = addPredicate(query_predicate, getCommunityPredicate(community))
        
    ignore_community = row['Ignore_Community']
    if ignore_community:
        query_predicate = addPredicate(query_predicate, getIgnoreCommunityPredicate(ignore_community))
        
    resort_suite_bef_prog_names = row['RS_Ben_Prog_Name']
    if resort_suite_bef_prog_names:
        query_predicate = addPredicate(query_predicate, getResortSuitBenefitProgramPredicate(resort_suite_bef_prog_names))
    
    resort_suite_bef_prog_ignore_names = row['RS_Ben_Prog_Name_Not']
    if resort_suite_bef_prog_ignore_names:
        query_predicate = addPredicate(query_predicate, getResortSuitBenefitProgramIgnorePredicate(resort_suite_bef_prog_ignore_names))
    
    one = row['ONE']
    if one:
        query_predicate = addPredicate(query_predicate, getOnePredicate(one))
        
    category_golf = row['Category_Golf']
    if category_golf: 
        query_predicate = addPredicate(query_predicate, getCategoryGolfPredicate(category_golf))
        
    category_social = row['Category_Social']
    if category_social:
        query_predicate = addPredicate(query_predicate, getCategorySocialPredicate(category_social))
        
    mbrship_type_desc_contains = row['Mbrship_type_desc_contains']
    if mbrship_type_desc_contains:
        query_predicate = addPredicate(query_predicate, getMbrshipTypeDescContainsPredicate(mbrship_type_desc_contains))
        
    mbrship_type_desc_equals = row['Mbrship_type_desc_equals']
    if mbrship_type_desc_equals:
        query_predicate = addPredicate(query_predicate, getMbrshipTypeDescEqualsPredicate(mbrship_type_desc_equals))
        
    mbrship_type_desc_does_not_have = row['Mbrship_type_desc_does_not_have']
    if mbrship_type_desc_does_not_have:
        query_predicate = addPredicate(query_predicate, getMbrshipTypeDescDoesNotHavePredicate(mbrship_type_desc_does_not_have))    
        
    if len(query_predicate) > 0:
        query = "select df.ID from df where " + query_predicate
    else:
        query = ""
        
    benefit_tier = benefit_tier.replace('  ', ' ')
    
    exclude_benefit_tiers = row['Exclude_Benefit_Tiers'].strip()
    if exclude_benefit_tiers:
        exclude_benefit_tier_list = exclude_benefit_tiers.split(',')
        except_query = ''
        for exclude_benefit_tier in exclude_benefit_tier_list:
            if len(except_query) > 0:
                except_query = except_query + " Except " + getBenefitTierQuery(exclude_benefit_tier,benefit_tier_and_query)
            else:
                except_query = getBenefitTierQuery(exclude_benefit_tier,benefit_tier_and_query)
        query = query + " Except " + except_query + ""
        
    
    benefit_tier_and_query.append({"benefit_tier": benefit_tier, "query":query})        

In [13]:
#benefit_tier_and_query

In [14]:
#benefit_tier_and_query

In [15]:
query_df = pd.DataFrame(benefit_tier_and_query)

In [16]:
#query_df.describe()

In [17]:
query_df.head()

Unnamed: 0,benefit_tier,query
0,All Community Programming,select df.ID from df where df.'Attributes.EC_M...
1,Associate Golf,select df.ID from df where df.'Attributes.EC_M...
2,ATL Society for Alliance Clubs,select df.ID from df where df.'Attributes.EC_M...
3,ATL Society for CCoS Golf,select df.ID from df where df.'Attributes.EC_M...
4,ATL Society for CCoS Non Golf,select df.ID from df where df.'Attributes.EC_M...


In [18]:
query_df.to_csv("output/benefit_tier_query_20190919 V29.csv", index=False)

In [19]:
query_df = pd.read_csv("output/benefit_tier_query_20190919 V29.csv", dtype=str)

<B>Test The Queries</b>

In [20]:
import sqlite3

In [21]:
df = pd.read_csv("Datasets/prod_non_resigned_membership_20190911.csv", dtype=str)

In [22]:
#df.head()

<b>Loading query & Reltio export of non-resigned members in SQLite</b>

In [23]:
%%time
conn = sqlite3.connect("membershipQuery.db")
cur = conn.cursor()
query_df.to_sql("query_df", conn, if_exists="replace")
df.to_sql("df", conn, if_exists="replace")

Wall time: 8.04 s


<b>Generation counts from a recent Reltio export</b>

In [24]:
count = 0
for index, row in query_df.iterrows():
    query_str = row['query']
    rs = pd.read_sql_query(query_str, conn)
    count = count + len(rs.index)
    if len(rs.index) == 0 or 'Champions' in row['benefit_tier']:
        #print("No membership found for " + row['benefit_tier'] + ": " + str(len(rs.index)))
        print("Membership count for " + row['benefit_tier'] + ": " + str(len(rs.index)))

Membership count for Champions: 25
Membership count for Champions YEX ONE - Retired: 170
Membership count for Champions YEX ONE: 9
Membership count for Champions ONE: 489
Membership count for NCS: 0
Membership count for ONE TX Tech - Houston: 0
Membership count for ONE UT Club - Houston: 0
Membership count for Topgolf: 0
Membership count for Premier Golf: 0
Membership count for Full Golf: 0
