In [72]:
import pandas as pd
import re
import hashlib
import sqlite3

In [73]:
travel_benefit = pd.read_csv("output/travel_benefit_cleansed.csv", dtype=str)

In [74]:
# Following operations are performed
# (1) Trim (2) Convert to lower (c) Replace consequitive white space characters with underscore (_)

pattern = re.compile(r'\s+')
def generateIdBasedOnHash(text):
    normalized_text = text.strip().lower()
    normalized_text = re.sub(pattern, '_', normalized_text)
    return str(hashlib.md5(normalized_text.encode('utf-8')).hexdigest())

In [75]:
generateIdBasedOnHash('BIKASH' + "|" + "Anindya")

'1e601c901923aca854386c45ba831a32'

In [76]:
benefit_package_set = set()
benefit_set = set()
access_rule_set = set()
access_rule_club_set = set()
columns = list(travel_benefit.columns)
for item in columns[3:]:
    
    benefit_package_name = item.rsplit(' ', 1)[0]
    benefit_package_name = benefit_package_name.strip().upper()
    benefit_package_id = generateIdBasedOnHash(benefit_package_name)
    benefit_package_set.update([(benefit_package_id,benefit_package_name)])
    
    benefit_name = item.strip()
    access_rules = list(travel_benefit[benefit_name].dropna().unique())
    benefit_name = item.strip().upper()
    access_rules.remove('No')
    for access_rule_unmodified in access_rules:
        access_rule = access_rule_unmodified.strip().upper()
        benefit_id = generateIdBasedOnHash(benefit_package_name + "|" + benefit_name + "|" + access_rule + "hash")
        benefit_type = benefit_name.split()[-1]
        #benefit_set.update([(benefit_package_id, benefit_id, benefit_name, benefit_name + " - "+ access_rule, benefit_type, access_rule,max_usage_type[access_rule])])
        benefit_set.update([(benefit_package_id, benefit_id, benefit_name, benefit_name + " - "+ access_rule, benefit_type, access_rule)])
        access_rule_id = generateIdBasedOnHash(benefit_package_name + "|" + benefit_name + "|" + access_rule)
        access_rule_set.update([(benefit_id, benefit_name, access_rule_id, access_rule)])
        clubs = travel_benefit['Entity Number'][travel_benefit[item] == access_rule_unmodified]
            
        for club in clubs:
            access_rule_club_set.update([(benefit_id, access_rule_id, club, benefit_type )])

In [77]:
benefit_package_df = pd.DataFrame(list(benefit_package_set), columns = ['benefit_package_id', 'BenefitPackageName'])
#benefit_df = pd.DataFrame(list(benefit_set), columns = ['benefit_package_id', 'benefit_id','TravelBenefitName','BenefitName','Type','BenefitParameter', 'MaxUsageType'])
benefit_df = pd.DataFrame(list(benefit_set), columns = ['benefit_package_id', 'benefit_id','TravelBenefitName','BenefitName','Type','BenefitParameter'])
access_rule_df = pd.DataFrame(list(access_rule_set), columns = ['benefit_id','BenefitName','access_rule_id','AccessRuleName'])
access_rule_club_df = pd.DataFrame(list(access_rule_club_set), columns = ['benefit_id', 'access_rule_id', 'entity_code', 'Type'])

<B>Sort the Data Frames</B>

In [78]:
benefit_package_df = benefit_package_df.sort_values('BenefitPackageName')
benefit_df = benefit_df.sort_values('BenefitName')
#benefit_df['Type'] = benefit_df['BenefitName'].str.split().str[-1]
access_rule_df = access_rule_df.sort_values(['BenefitName','AccessRuleName'])
access_rule_club_df = access_rule_club_df.sort_values('access_rule_id')

<B>Save Files</B>

In [79]:
benefit_package_df.to_csv("output\\travel_benefit_package_option2.csv", index=False)
benefit_df.to_csv("output\\travel_benefit_option2.csv", index=False)
access_rule_df.to_csv("output\\travel_access_rule_option2.csv", index=False)
access_rule_club_df.to_csv("output\\travel_access_rule_and_club_option2.csv", index=False)

In [80]:
max_usage_metadata_df = pd.read_csv("datasets/metadata/travel_benefit_max_usage_meta_data.csv")
advance_metadata_df = pd.read_csv("datasets/metadata/travel_benefit_advance_meta_data.csv")

conn = sqlite3.connect("travelBenefit.db")
cur = conn.cursor()
benefit_df.to_sql("travelBenefit", conn, if_exists="replace")
max_usage_metadata_df.to_sql("maxUsageMetadata", conn, if_exists="replace")
advance_metadata_df.to_sql("advanceMetadata", conn, if_exists="replace")

In [81]:
query = """select travelBenefit.benefit_package_id,
         travelBenefit.benefit_id,
         travelBenefit.TravelBenefitName,
         travelBenefit.BenefitName,
         travelBenefit.Type,
         travelBenefit.BenefitParameter,
         maxUsageMetadata.UsageType,
         maxUsageMetadata.IsQuantityUnlimited,
         maxUsageMetadata.Quantity,
         maxUsageMetadata.Period,
         maxUsageMetadata.Scope,
         maxUsageMetadata.MaxPerClub,
         advanceMetadata.Prompt,
         advanceMetadata.PromptScope,
         advanceMetadata.DefaultValue
         from travelBenefit
         left join advanceMetadata on trim(upper(advanceMetadata.benefit)) = trim(upper(travelBenefit.TravelBenefitName))
         left join maxUsageMetadata on trim(upper(maxUsageMetadata.KeyColumn)) = trim(upper(travelBenefit.BenefitParameter))"""

rs_benefit = pd.read_sql_query(query, conn)

#mask = (rs_benefit.TravelBenefitName.str =='Signature Gold Spouse Unlimited Golf' & rs_benefit.UsageType.str =='CFO') | (rs_benefit.TravelBenefitName.str =='Signature Gold Spouse Golf' & rs_benefit.UsageType.str =='CFO')
mask = (rs_benefit.UsageType =='CFO') & ((rs_benefit.TravelBenefitName =='Signature Gold Spouse Unlimited Golf')|(rs_benefit.TravelBenefitName =='Signature Gold Spouse Golf'))
rs_benefit.loc[mask, 'Quantity'] = 4


In [82]:
rs_benefit.to_csv("output/travel_benefit_option2_withAllValues.csv",index=False)
conn.execute("DROP TABLE travelBenefit")
conn.execute("DROP TABLE maxUsageMetadata")
conn.execute("DROP TABLE advanceMetadata")
cur.close()
conn.close()