## Import Libraries

In [1]:
# Importing necessary libraries
import pandas as pd
import numpy as np

## Loading the Dataset

In [2]:
# Loading in and setting the data
# You can't download these files from the github, you have to drop them in the folder
tx_data = pd.read_csv('fake_transactional_data_24.csv')
tx_df = pd.DataFrame(tx_data)

In [3]:
tx_df.head()

Unnamed: 0,from_totally_fake_account,monopoly_money_amount,to_randomly_generated_account,not_happened_yet_date
0,10371.0,4.0,CINEMA,01/01/2025
1,88339.0,2.4,40544,01/01/2025
2,18555.0,2.4,85149,01/01/2025
3,18555.0,4.1,HIPSTER_COFFEE_SHOP,01/01/2025
4,80792.0,1.95,18555,01/01/2025


## Data Extraction and Organization

### Creating Senders and Receivers List

In [34]:
# Creating senders and receivers list
unique_senders_list = list(tx_df['from_totally_fake_account'].unique())
unique_senders_list.sort()
unique_receivers_list = list(tx_df['to_randomly_generated_account'].unique())
unique_receivers_list.sort()

In [40]:
# Creating business and users list
unique_receivers_biz_list = []
unique_receivers_user_list = []

for receiver in unique_receivers_list:
    if receiver.isnumeric() == True:
        unique_receivers_user_list.append(receiver)
    else:
        unique_receivers_biz_list.append(receiver)

['ACCESSORY_SHOP', 'A_CAFE', 'A_LOCAL_COFFEE_SHOP', 'A_SUPERMARKET', 'BAR', 'BOOKSHOP', 'BUTCHER', 'BUTCHERS', 'CAFE', 'CHILDRENDS_SHOP', 'CHINESE_RESTAURANT', 'CHINESE_TAKEAWAY', 'CINEMA', 'CLOTHES_SHOP', 'COCKTAIL_BAR', 'COFFEE_SHOP', 'COMIC_BOOK_SHOP', 'COOKSHOP', 'DEPARTMENT_STORE', 'DIY_STORE', 'DVD_SHOP', 'ELECTRONICS_SHOP', 'EXPRESS_SUPERMARKET', 'FASHIONABLE_SPORTSWARE_SHOP', 'FASHION_SHOP', 'FLORIST', 'G&T_BAR', 'GAME_SHOP', 'GOURMET_COFFEE_SHOP', 'GREENGROCER', 'GYM', 'HIPSTER_COFFEE_SHOP', 'HIPSTER_ELECTRONICS_SHOP', 'HOME_IMPROVEMENT_STORE', 'INDIAN_RESTAURANT', 'JEWLLERY_SHOP', 'KEBAB_SHOP', 'KIDS_ACTIVITY_CENTRE', 'KIDS_CLOTHING_SHOP', 'LARGE_SUPERMARKET', 'LIQUOR_STORE', 'LOCAL_BOOKSHOP', 'LOCAL_PUB', 'LOCAL_RESTAURANT', 'LOCAL_WATERING_HOLE', 'LUNCH_PLACE', 'LUNCH_VAN', 'NERDY_BOOK_STORE', 'PET_SHOP', 'PET_TOY_SHOP', 'PRETENTIOUS_COFFEE_SHOP', 'PUB', 'RESTAURANT', 'RESTAURANT_VOUCHER', 'ROASTERIE', 'RUNNING_SHOP', 'SANDWICH_SHOP', 'SCHOOL_SUPPLY_STORE', 'SEAFOOD_RESAURA

### Working with Business Accounts

In [41]:
# Creating business table
# Pruning data from original dataframe
biz_tx_df = tx_df[tx_df['to_randomly_generated_account'].isin(unique_receivers_biz_list)]
biz_tx_df = biz_tx_df.sort_values('to_randomly_generated_account')

In [42]:
# Getting Descriptive Stats
biz_tx_count = biz_tx_df.groupby('to_randomly_generated_account')['monopoly_money_amount'].count()
biz_tx_sum = biz_tx_df.groupby('to_randomly_generated_account')['monopoly_money_amount'].sum()

biz_tx_mean = biz_tx_df.groupby('to_randomly_generated_account')['monopoly_money_amount'].mean()
biz_tx_median = biz_tx_df.groupby('to_randomly_generated_account')['monopoly_money_amount'].median()

In [43]:
# Getting Business Customer Details
biz_tx_customers = biz_tx_df.groupby('to_randomly_generated_account')['from_totally_fake_account'].agg(list)

to_randomly_generated_account
ACCESSORY_SHOP            [46983.0, 75715.0, 14754.0, 26805.0, 18654.0, ...
A_CAFE                    [19143.0, 36391.0, 14754.0, 17151.0, 74767.0, ...
A_LOCAL_COFFEE_SHOP       [66113.0, 45642.0, 89061.0, 7672.0, 74169.0, 6...
A_SUPERMARKET             [2030.0, 79680.0, 97573.0, 89698.0, 21727.0, 4...
BAR                       [27552.0, 13311.0, 71571.0, 38240.0, 22418.0, ...
                                                ...                        
WE_HAVE_BEAN_WEIGHTING    [33833.0, 54900.0, 99335.0, 48098.0, 7831.0, 7...
WHISKEY_BAR               [82958.0, 10454.0, 48894.0, 87388.0, 31105.0, ...
WHISKEY_SHOP              [6889.0, 6508.0, 75049.0, 49782.0, 97922.0, 99...
WINE_BAR                  [38962.0, 16779.0, 95957.0, 51514.0, 36725.0, ...
WINE_CELLAR               [24271.0, 66581.0, 78915.0, 93420.0, 58372.0, ...
Name: from_totally_fake_account, Length: 79, dtype: object


In [27]:
# Getting Descriptive Stats from Customer Details
biz_users
for biz_num in range(0, len(biz_tx_customers)):

test_list = biz_tx_customers[0]
test_list.sort()
print(test_list)

[1357.0, 1357.0, 1357.0, 1357.0, 1357.0, 1357.0, 1607.0, 1607.0, 1607.0, 1607.0, 1607.0, 1607.0, 1607.0, 1607.0, 1607.0, 1607.0, 1623.0, 1623.0, 1623.0, 1623.0, 1623.0, 1623.0, 1623.0, 1623.0, 1623.0, 1623.0, 1828.0, 1828.0, 1828.0, 1828.0, 1828.0, 1931.0, 1931.0, 1931.0, 1931.0, 1931.0, 1931.0, 1931.0, 1931.0, 1931.0, 1931.0, 1931.0, 1931.0, 1931.0, 1931.0, 2090.0, 2090.0, 2090.0, 2090.0, 2090.0, 2090.0, 2274.0, 2274.0, 2274.0, 2274.0, 2274.0, 2274.0, 2719.0, 2719.0, 2719.0, 2719.0, 2719.0, 2719.0, 2719.0, 2844.0, 2844.0, 2844.0, 2844.0, 2844.0, 2844.0, 2844.0, 3017.0, 3017.0, 3017.0, 3017.0, 3017.0, 3287.0, 3287.0, 3287.0, 3287.0, 3287.0, 3287.0, 3287.0, 3287.0, 3287.0, 3287.0, 3287.0, 4559.0, 4559.0, 4559.0, 4559.0, 4559.0, 4559.0, 4559.0, 4559.0, 4559.0, 4818.0, 4818.0, 4818.0, 4818.0, 4818.0, 4818.0, 4818.0, 4818.0, 4818.0, 5283.0, 5283.0, 5283.0, 5283.0, 5283.0, 5283.0, 5283.0, 5283.0, 5283.0, 5299.0, 5299.0, 5299.0, 5299.0, 5299.0, 5299.0, 5299.0, 5299.0, 5299.0, 5501.0, 5501.0,