## Importing Libraries

In [1]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import ast
from statistics import mean, median
import matplotlib.pyplot as plt

## Importing CSV

In [2]:
# Loading csv and setting to 
user_data = pd.read_csv('../SavedData/Lloyds_users.csv')
user_df = pd.DataFrame(user_data)

# Changing the lists back to normal columns
user_df['user_biz_interaction'] = user_df['user_biz_interaction'].apply(ast.literal_eval)
user_df['user_biz_interaction_frequency'] = user_df['user_biz_interaction_frequency'].apply(ast.literal_eval)
user_df['user_biz_spend'] = user_df['user_biz_spend'].apply(ast.literal_eval)
user_df['user_user_interaction'] = user_df['user_user_interaction'].apply(ast.literal_eval)
user_df['user_user_interaction_frequency'] = user_df['user_user_interaction_frequency'].apply(ast.literal_eval)
user_df['user_user_spend'] = user_df['user_user_spend'].apply(ast.literal_eval)
user_df['user_industry_interaction'] = user_df['user_industry_interaction'].apply(ast.literal_eval)
user_df['user_industry_interaction_frequency'] = user_df['user_industry_interaction_frequency'].apply(ast.literal_eval)
user_df['user_industry_spend'] = user_df['user_industry_spend'].apply(ast.literal_eval)
user_df['user_date_transaction'] = user_df['user_date_transaction'].apply(ast.literal_eval)
user_df['user_date_transaction_freq'] = user_df['user_date_transaction_freq'].apply(ast.literal_eval)

## Setting the Negative and Positive Items

In [3]:
positive_biz_list = [
    "FASHIONABLE_SPORTSWARE_SHOP",
    "TRAINER_SHOP",
    "SPORT_SHOP",
    "RUNNING_SHOP",
    "GREENGROCER",
    "GYM"
]

negative_biz_list = [
    "TAKEAWAY",
    "CHINESE_TAKEAWAY",
    "TAKEAWAY_CURRY",
    "LIQUORE_STORE",
    "WHISKEY_SHOP",
    "WINE_CELLAR",
    "WINE_BAR",
    "WHISKEY_BAR",
    "PUB",
    "G&T_BAR",
    "COCKTAIL_BAR",
    "LOCAL_WATERING_HOLE",
    "BAR",
    "LOCAL_PUB",
    "KEBAB_SHOP"
]

## Building Insurance Dataframe

### Calculating Positive Spend List

In [4]:
positive_biz_spend_list = []

positive_biz_list_dict = {
    "FASHIONABLE_SPORTSWARE_SHOP": [],
    "TRAINER_SHOP": [],
    "SPORT_SHOP": [],
    "RUNNING_SHOP": [],
    "GREENGROCER": [],
    "GYM": []
}

for user_index in range(0, len(user_df)):
    positive_biz_spend = 0
    
    positive_biz_int_dict = {
        "FASHIONABLE_SPORTSWARE_SHOP": 0,
        "TRAINER_SHOP": 0,
        "SPORT_SHOP": 0,
        "RUNNING_SHOP": 0,
        "GREENGROCER": 0,
        "GYM": 0 
    }
    
    for biz in positive_biz_list:
        interaction_list = user_df['user_biz_interaction'].iloc[user_index]
        interaction_spend_list = user_df['user_biz_spend'].iloc[user_index]

        try: 
            positive_biz_pos = interaction_list.index(biz)
            positive_biz_spend += float(interaction_spend_list[positive_biz_pos])
            
            positive_biz_int_dict[biz] += float(interaction_spend_list[positive_biz_pos])
            positive_biz_list_dict[biz].append(positive_biz_int_dict[biz])

        except:
            positive_biz_spend += 0
            positive_biz_list_dict[biz].append(0)
        
    positive_biz_spend_list.append(positive_biz_spend)
    
#print(positive_biz_spend_list)

### Calculating Negative Spend List

In [None]:
negative_biz_spend_list = []

negative_biz_list_dict = {
    "TAKEAWAY": [],
    "CHINESE_TAKEAWAY": [],
    "TAKEAWAY_CURRY": [],
    "LIQUORE_STORE": [],
    "WHISKEY_SHOP": [],
    "WINE_CELLAR": [],
    "WINE_BAR": [],
    "WHISKEY_BAR": [],
    "PUB": [],
    "G&T_BAR": [],
    "COCKTAIL_BAR": [],
    "LOCAL_WATERING_HOLE": [],
    "BAR": [],
    "LOCAL_PUB": [],
    "KEBAB_SHOP": []  
}

for user_index in range(0, len(user_df)):
    negative_biz_spend = 0
    
    negative_biz_int_dict = {
        "TAKEAWAY": 0,
        "CHINESE_TAKEAWAY": 0,
        "TAKEAWAY_CURRY": 0,
        "LIQUORE_STORE": 0,
        "WHISKEY_SHOP": 0,
        "WINE_CELLAR": 0,
        "WINE_BAR": 0,
        "WHISKEY_BAR": 0,
        "PUB": 0,
        "G&T_BAR": 0,
        "COCKTAIL_BAR": 0,
        "LOCAL_WATERING_HOLE": 0,
        "BAR": 0,
        "LOCAL_PUB": 0,
        "KEBAB_SHOP": 0   
    }
    
    for biz in negative_biz_list:
        interaction_list = user_df['user_biz_interaction'].iloc[user_index]
        interaction_spend_list = user_df['user_biz_spend'].iloc[user_index]

        try: 
            negative_biz_pos = interaction_list.index(biz)
            negative_biz_spend += float(interaction_spend_list[negative_biz_pos])
            
            negative_biz_int_dict[biz] += float(interaction_spend_list[negative_biz_pos])
            negative_biz_list_dict[biz].append(negative_biz_int_dict[biz])

        except:
            negative_biz_spend += 0
            negative_biz_list_dict[biz].append(0)
        
    negative_biz_spend_list.append(negative_biz_spend)
    
print(negative_biz_list_dict)
    


### Calculating Net Spending List

In [None]:
net_pos_neg_spending = [a - b for a, b in zip(positive_biz_spend_list, negative_biz_spend_list)]

#print(net_pos_neg_spending)

### Creating the Insurance Dataframe

In [7]:
insurance_df = pd.DataFrame({
    'user_id': list(user_df['user_id']),
    'FASHIONABLE_SPORTSWARE_SHOP': positive_biz_list_dict['FASHIONABLE_SPORTSWARE_SHOP'],
    'TRAINER_SHOP': positive_biz_list_dict['TRAINER_SHOP'],
    'SPORT_SHOP': positive_biz_list_dict['SPORT_SHOP'],
    'RUNNING_SHOP': positive_biz_list_dict['RUNNING_SHOP'],
    'GREENGROCER': positive_biz_list_dict['GREENGROCER'],
    'GYM': positive_biz_list_dict['GYM'],
    'positive_spend': positive_biz_spend_list,
    'TAKEAWAY': negative_biz_list_dict['TAKEAWAY'],
    'CHINESE_TAKEAWAY': negative_biz_list_dict['CHINESE_TAKEAWAY'],
    'TAKEAWAY_CURRY': negative_biz_list_dict['TAKEAWAY_CURRY'],
    'LIQUORE_STORE': negative_biz_list_dict['LIQUORE_STORE'],
    'WHISKEY_SHOP': negative_biz_list_dict['WHISKEY_SHOP'],
    'WINE_CELLAR': negative_biz_list_dict['WINE_CELLAR'],
    'WINE_BAR': negative_biz_list_dict['WINE_BAR'],
    'WHISKEY_BAR': negative_biz_list_dict['WHISKEY_BAR'],
    'PUB': negative_biz_list_dict['PUB'],
    'G&T_BAR': negative_biz_list_dict['G&T_BAR'],
    'COCKTAIL_BAR': negative_biz_list_dict['COCKTAIL_BAR'],
    'LOCAL_WATERING_HOLE': negative_biz_list_dict['LOCAL_WATERING_HOLE'],
    'BAR': negative_biz_list_dict['BAR'],
    'LOCAL_PUB': negative_biz_list_dict['LOCAL_PUB'],
    'KEBAB_SHOP': negative_biz_list_dict['KEBAB_SHOP'],
    'negative_spend': negative_biz_spend_list,
    'net_spend': net_pos_neg_spending
})

In [16]:
# Sorting the DataFrame by the 'Age' column in ascending order
insurance_df = insurance_df.sort_values(by='positive_spend', ascending=False)

In [17]:
insurance_df

Unnamed: 0,user_id,FASHIONABLE_SPORTSWARE_SHOP,TRAINER_SHOP,SPORT_SHOP,RUNNING_SHOP,GREENGROCER,GYM,positive_spend,TAKEAWAY,CHINESE_TAKEAWAY,...,WHISKEY_BAR,PUB,G&T_BAR,COCKTAIL_BAR,LOCAL_WATERING_HOLE,BAR,LOCAL_PUB,KEBAB_SHOP,negative_spend,net_spend
4578,59828,307.0,449.0,662.0,406.0,1269.02,240.0,3333.02,78.98,215.11,...,515.5,1222.96,523.5,359.0,274.5,790.5,367.15,20.5,5267.85,-1934.83
662,16464,346.0,326.0,567.0,602.0,1186.09,240.0,3267.09,154.29,172.25,...,7.5,1188.07,16.5,12.0,1073.0,881.0,481.21,33.5,4506.78,-1239.69
1044,2069,257.0,255.0,334.0,576.0,1578.95,240.0,3240.95,94.29,190.82,...,0.0,1058.88,0.0,0.0,1002.0,1294.0,393.69,27.0,4485.68,-1244.73
5712,72283,551.0,196.0,674.0,305.0,1251.20,240.0,3217.20,362.25,321.63,...,12.0,1056.74,27.0,6.0,777.5,999.0,335.52,238.0,4433.39,-1216.19
7957,97955,369.0,370.0,431.0,512.0,1289.52,240.0,3211.52,366.94,393.45,...,0.0,1442.68,0.0,0.0,1140.5,1172.5,430.41,5.0,5295.78,-2084.26
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7140,88754,0.0,0.0,0.0,0.0,0.00,0.0,0.00,467.16,444.48,...,133.5,1699.62,112.0,79.0,1016.0,1239.5,247.55,33.0,6042.01,-6042.01
7871,97069,0.0,0.0,0.0,0.0,0.00,0.0,0.00,215.31,283.90,...,498.5,1067.28,650.5,598.0,337.5,771.5,375.51,8.5,5459.43,-5459.43
1868,29443,0.0,0.0,0.0,0.0,0.00,0.0,0.00,192.65,538.98,...,91.5,1511.95,92.5,127.0,1261.0,1035.0,344.88,281.0,6042.04,-6042.04
4045,53816,0.0,0.0,0.0,0.0,0.00,0.0,0.00,154.89,320.60,...,74.5,1599.27,331.0,125.5,1286.0,1320.5,109.59,199.5,6042.10,-6042.10
