In [1]:
import pandas as pd

customer_data = pd.read_csv('data/customer_data.csv')
customer_data.head()
#todo add business name eg James William Agriculutre - later


Unnamed: 0,Customer Name,uid,business sector,location,phone number,national ID,passport,tin,gender,education level,business start date
0,William Alice,8b37287f-cf0f-4305-97be-cbb21fa68b33,Manufacturing,Entebbe,256788183660,86462298,U8995688,3140853,Female,University,2021-09-15
1,Florence Sarah,eb89a5e6-644f-4f10-8886-31b4d37fc6a8,Hospitality,Entebbe,256757288527,25444622,U1349961,7876027,Female,University,2018-03-16
2,James Jane,34d1c009-65a9-44d5-a990-c8b4c5e7fb9f,Retail,Mbarara,256795669420,22943482,U2754911,5738302,Male,Primary,2013-06-24
3,Nakinobe Jane,72db3880-aab0-4782-bc73-a47f787d30d1,Fisheries,Kampala,256705734145,62064341,U1887849,1498630,Female,Secondary,2018-04-24
4,Bridget Jane,57493f5c-80a0-4a41-9381-3d9241de74f4,Fisheries,Fort Portal,256750464352,92025900,U2976054,6953697,Male,Primary,2010-03-18


In [2]:
transactions_data = pd.read_csv('data/transactions_data.csv')
transactions_data.head()

Unnamed: 0,Customer Name,uid,amount,type,reason,date
0,William Alice,8b37287f-cf0f-4305-97be-cbb21fa68b33,579389.94,Credit,Operating Expenses,2023-06-25
1,William Alice,8b37287f-cf0f-4305-97be-cbb21fa68b33,752544.38,Credit,Refund,2021-12-22
2,William Alice,8b37287f-cf0f-4305-97be-cbb21fa68b33,73243.63,Debit,Purchase,2022-09-08
3,William Alice,8b37287f-cf0f-4305-97be-cbb21fa68b33,672908.41,Debit,Sale,2023-09-30
4,William Alice,8b37287f-cf0f-4305-97be-cbb21fa68b33,473237.81,Debit,Refund,2023-10-08


In [3]:
print(f"Number of rows in transactions_data: {transactions_data.shape[0]}")

Number of rows in transactions_data: 88145


# Data cleaning
- Remove columns from customer_data df which are not needed for our analysis
- Add a new column to customer_data which shows the business duration
- Add new columsn which calculate the value: some value containng business age and number and amount of transactions (maybe business_score?)

In [4]:

customer_data_cleaned = customer_data.drop(columns=['phone number', 'national ID', 'passport', 'tin'])
customer_data_cleaned.head()

Unnamed: 0,Customer Name,uid,business sector,location,gender,education level,business start date
0,William Alice,8b37287f-cf0f-4305-97be-cbb21fa68b33,Manufacturing,Entebbe,Female,University,2021-09-15
1,Florence Sarah,eb89a5e6-644f-4f10-8886-31b4d37fc6a8,Hospitality,Entebbe,Female,University,2018-03-16
2,James Jane,34d1c009-65a9-44d5-a990-c8b4c5e7fb9f,Retail,Mbarara,Male,Primary,2013-06-24
3,Nakinobe Jane,72db3880-aab0-4782-bc73-a47f787d30d1,Fisheries,Kampala,Female,Secondary,2018-04-24
4,Bridget Jane,57493f5c-80a0-4a41-9381-3d9241de74f4,Fisheries,Fort Portal,Male,Primary,2010-03-18


Add business duration column

In [5]:
from datetime import datetime

#convert the business start date to data time so that we can calculate year
customer_data['business start date'] = pd.to_datetime(customer_data['business start date'])

duration_in_days = (datetime.now() - customer_data['business start date']).dt.days

duration_in_years = (duration_in_days / 365).round(2)

customer_data_cleaned['business duration (years)'] = duration_in_years

customer_data_cleaned.head()


Unnamed: 0,Customer Name,uid,business sector,location,gender,education level,business start date,business duration (years)
0,William Alice,8b37287f-cf0f-4305-97be-cbb21fa68b33,Manufacturing,Entebbe,Female,University,2021-09-15,2.1
1,Florence Sarah,eb89a5e6-644f-4f10-8886-31b4d37fc6a8,Hospitality,Entebbe,Female,University,2018-03-16,5.6
2,James Jane,34d1c009-65a9-44d5-a990-c8b4c5e7fb9f,Retail,Mbarara,Male,Primary,2013-06-24,10.33
3,Nakinobe Jane,72db3880-aab0-4782-bc73-a47f787d30d1,Fisheries,Kampala,Female,Secondary,2018-04-24,5.49
4,Bridget Jane,57493f5c-80a0-4a41-9381-3d9241de74f4,Fisheries,Fort Portal,Male,Primary,2010-03-18,13.6


### Add performance index column

Custom formula for *perfomance index*:

Perfomance Index =  (λ1 * Net Profit Per Year +  λ2 * Lifetime Net Profit + λ3 * Transaction Volume Per Year)


Where:

- λ1 is the weight for Net Profit Per Year
- λ2 is the weight for Transaction Volume Per Year
- λ3 is the weight for Lifetime Net Profit


In [6]:
# Calculate Net Profit
total_credit = transactions_data[transactions_data['type'] == 'Credit'].groupby('uid')['amount'].sum()
total_debit = transactions_data[transactions_data['type'] == 'Debit'].groupby('uid')['amount'].sum()
net_profit = total_credit.subtract(total_debit, fill_value=0)

# Calculate Transaction Volume
transaction_volume = transactions_data.groupby('uid').size()

# Merge net profit and transaction volume with customer_data on uid
customer_data_cleaned_with_net_profit = customer_data_cleaned.merge(net_profit.rename('Net Profit'), on='uid', how='left').fillna(0)
customer_data_cleaned_with_txn_vol = customer_data_cleaned_with_net_profit.merge(transaction_volume.rename('Transaction Volume'), on='uid', how='left').fillna(0)

# Calculate net profit per year and transaction volume per year
customer_data_cleaned_with_txn_vol['Net Profit Per Year'] = customer_data_cleaned_with_txn_vol['Net Profit'] / customer_data_cleaned_with_txn_vol['business duration (years)']
customer_data_cleaned_with_txn_vol['Transaction Volume Per Year'] = customer_data_cleaned_with_txn_vol['Transaction Volume'] / customer_data_cleaned_with_txn_vol['business duration (years)']

# Weights
λ1, λ2, λ3 = 1, 1, 1

# Calculate perfomance index considering lifetime profits
customer_data_cleaned_with_txn_vol['performance index'] = (λ1 * customer_data_cleaned_with_txn_vol['Net Profit Per Year'] + 
                                  λ2 * customer_data_cleaned_with_txn_vol['Transaction Volume Per Year'] + 
                                  λ3 * customer_data_cleaned_with_txn_vol['Net Profit'])

# Normalize the perfomance index: 
min_val = customer_data_cleaned_with_txn_vol['performance index'].min()  
max_val = customer_data_cleaned_with_txn_vol['performance index'].max()
customer_data_cleaned_with_txn_vol['Normalized Performance Index'] = (customer_data_cleaned_with_txn_vol['performance index'] - min_val) / (max_val - min_val)

customer_data_cleaned_with_txn_vol.head()


Unnamed: 0,Customer Name,uid,business sector,location,gender,education level,business start date,business duration (years),Net Profit,Transaction Volume,Net Profit Per Year,Transaction Volume Per Year,performance index,Normalized Performance Index
0,William Alice,8b37287f-cf0f-4305-97be-cbb21fa68b33,Manufacturing,Entebbe,Female,University,2021-09-15,2.1,1193667.71,54,568413.195238,25.714286,1762107.0,0.593091
1,Florence Sarah,eb89a5e6-644f-4f10-8886-31b4d37fc6a8,Hospitality,Entebbe,Female,University,2018-03-16,5.6,1866674.2,133,333334.678571,23.75,2200033.0,0.59599
2,James Jane,34d1c009-65a9-44d5-a990-c8b4c5e7fb9f,Retail,Mbarara,Male,Primary,2013-06-24,10.33,-2933611.18,246,-283989.465634,23.814134,-3217577.0,0.560126
3,Nakinobe Jane,72db3880-aab0-4782-bc73-a47f787d30d1,Fisheries,Kampala,Female,Secondary,2018-04-24,5.49,1184751.32,220,215801.697632,40.07286,1400593.0,0.590698
4,Bridget Jane,57493f5c-80a0-4a41-9381-3d9241de74f4,Fisheries,Fort Portal,Male,Primary,2010-03-18,13.6,-12089320.06,110,-888920.592647,8.088235,-12978230.0,0.495512


# Encoding
Transforming data into machine readable numbers to facilitate the analysis process. 

## Define value we want encode
Business Sector, Location, Gender, Education level, business age, some value containng business age and number and amount of transactions

The features we choose should be abe to describe groups of rows (as compated to geatures like phone number)

## Creating dummy varialbes/ encoding
Creating numerical values from categorical features can be approached in several ways. 
The easiest is assigning a value to each value of a features. e.g 
 Business Sector
    - Agriculture : 1
    - Service : 2
    - Fisheries : 3
However, this is not effecient for clustering algorithms as it will give inaccurate clusters

A better approach is the use of one-hot encoding, where a value is 1 and only 1 possibilty, while the rest are 0


In [7]:
categorical_columns = ['business sector', 'location', 'gender', 'education level']
customer_data_encoded = pd.get_dummies(customer_data_cleaned_with_txn_vol, columns=categorical_columns)

# Force convert all dummy variables to int
categorical_dummies = [col for col in customer_data_encoded.columns if any(prefix in col for prefix in categorical_columns)]
customer_data_encoded[categorical_dummies] = customer_data_encoded[categorical_dummies].astype(int)

customer_data_encoded = customer_data_encoded.drop(columns = ["performance index"])
customer_data_encoded.head()

# Standardize Continuous Columns fro ML
# from sklearn.preprocessing import StandardScaler
# continuous_columns = ['business duration (years)', 'Transaction Volume', 'Net Profit']
# scaler = StandardScaler()
# customer_data_encoded[continuous_columns] = scaler.fit_transform(customer_data_encoded[continuous_columns])




Unnamed: 0,Customer Name,uid,business start date,business duration (years),Net Profit,Transaction Volume,Net Profit Per Year,Transaction Volume Per Year,Normalized Performance Index,business sector_Agriculture,...,location_Kisoro,location_Lira,location_Mbale,location_Mbarara,gender_Female,gender_Male,education level_Primary,education level_Secondary,education level_University,education level_Vocational
0,William Alice,8b37287f-cf0f-4305-97be-cbb21fa68b33,2021-09-15,2.1,1193667.71,54,568413.195238,25.714286,0.593091,0,...,0,0,0,0,1,0,0,0,1,0
1,Florence Sarah,eb89a5e6-644f-4f10-8886-31b4d37fc6a8,2018-03-16,5.6,1866674.2,133,333334.678571,23.75,0.59599,0,...,0,0,0,0,1,0,0,0,1,0
2,James Jane,34d1c009-65a9-44d5-a990-c8b4c5e7fb9f,2013-06-24,10.33,-2933611.18,246,-283989.465634,23.814134,0.560126,0,...,0,0,0,1,0,1,1,0,0,0
3,Nakinobe Jane,72db3880-aab0-4782-bc73-a47f787d30d1,2018-04-24,5.49,1184751.32,220,215801.697632,40.07286,0.590698,0,...,0,0,0,0,1,0,0,1,0,0
4,Bridget Jane,57493f5c-80a0-4a41-9381-3d9241de74f4,2010-03-18,13.6,-12089320.06,110,-888920.592647,8.088235,0.495512,0,...,0,0,0,0,0,1,1,0,0,0


## Convert the business duration column into ranges
psuedo code:
find the min value in duration column : e.g customer_data_encoded.min("")
find the max value in duration column
create ranges based on your judgeemnt. For exmaple if minimun is 1 and max is 10, then 3 ranges makes sense
create a new column in customer_data_encoded called business duration range and assign each row to it's range
e.g.: `customer name`` `uid``` `start`` `date`` `duration`` `duration range`` `other columens...``
        James WIlliam   ccds    2013-234          10.58      7 - 10 years


After that, one hot encode the  `duration range` column so that we have sth like:
`customer name`` `uid``` `start`` `date`` `duration`` `duration range_0 - 3 years`` `duration range_3 - 7 years` `duration range_7 - 10 years`
James WIlliam   ccds    2013-234          10.58       0                             0                                 1

In [8]:
# 1. Find the minimum and maximum values in the "business duration (years)" column
min_duration = customer_data_encoded['business duration (years)'].min()
max_duration = customer_data_encoded['business duration (years)'].max()

print("min is " + str(min_duration))
print("max is " + str(max_duration))
# 1. Define the specific bins
bins = [0, 5, 10, 15, 20.01]  # 20.01 is included to ensure businesses with exactly 20.01 years are also categorized

# 2. Create the labels
labels = ['0 - 5 years', 
          '5 - 10 years', 
          '10 - 15 years', 
          '15 - 20 years']  # Notice the last label is '15 - 20 years', not '15 - 20.01 years' for display purposes

# 3. Create a new column called "business duration range" in the original dataframe and assign each row its respective range
customer_data_encoded['business duration range'] = pd.cut(customer_data_encoded['business duration (years)'], bins=bins, labels=labels, right=False, include_lowest=True)
duration_dummies = pd.get_dummies(customer_data_encoded['business duration range'], prefix='duration range')

duration_dummies = duration_dummies.astype(int)


# Extract rows for our own analysis
business_duration_ranges_df = customer_data_encoded.groupby('business duration range').size().reset_index(name='Number of Businesses')
business_duration_ranges_df.head()

# Reorder the columns
desired_order = ['Customer Name', 'uid', 'business start date', 'business duration (years)', 'business duration range'] + [col for col in customer_data_encoded if col not in ['Customer Name', 'uid', 'business start date', 'business duration (years)', 'business duration range']]
customer_data_encoded = customer_data_encoded[desired_order]

customer_data_encoded.head()





min is 0.04
max is 19.94


  business_duration_ranges_df = customer_data_encoded.groupby('business duration range').size().reset_index(name='Number of Businesses')


Unnamed: 0,Customer Name,uid,business start date,business duration (years),business duration range,Net Profit,Transaction Volume,Net Profit Per Year,Transaction Volume Per Year,Normalized Performance Index,...,location_Kisoro,location_Lira,location_Mbale,location_Mbarara,gender_Female,gender_Male,education level_Primary,education level_Secondary,education level_University,education level_Vocational
0,William Alice,8b37287f-cf0f-4305-97be-cbb21fa68b33,2021-09-15,2.1,0 - 5 years,1193667.71,54,568413.195238,25.714286,0.593091,...,0,0,0,0,1,0,0,0,1,0
1,Florence Sarah,eb89a5e6-644f-4f10-8886-31b4d37fc6a8,2018-03-16,5.6,5 - 10 years,1866674.2,133,333334.678571,23.75,0.59599,...,0,0,0,0,1,0,0,0,1,0
2,James Jane,34d1c009-65a9-44d5-a990-c8b4c5e7fb9f,2013-06-24,10.33,10 - 15 years,-2933611.18,246,-283989.465634,23.814134,0.560126,...,0,0,0,1,0,1,1,0,0,0
3,Nakinobe Jane,72db3880-aab0-4782-bc73-a47f787d30d1,2018-04-24,5.49,5 - 10 years,1184751.32,220,215801.697632,40.07286,0.590698,...,0,0,0,0,1,0,0,1,0,0
4,Bridget Jane,57493f5c-80a0-4a41-9381-3d9241de74f4,2010-03-18,13.6,10 - 15 years,-12089320.06,110,-888920.592647,8.088235,0.495512,...,0,0,0,0,0,1,1,0,0,0


## Encode business ranges column 

In [9]:
duration_dummies = pd.get_dummies(customer_data_encoded.get('business duration range', pd.Series()), prefix='duration range').astype(int)
customer_data_encoded = pd.concat([customer_data_encoded, duration_dummies], axis=1)
customer_data_encoded_range_dropped = customer_data_encoded.drop('business duration range', axis=1)
customer_data_encoded_range_dropped.head()





Unnamed: 0,Customer Name,uid,business start date,business duration (years),Net Profit,Transaction Volume,Net Profit Per Year,Transaction Volume Per Year,Normalized Performance Index,business sector_Agriculture,...,gender_Female,gender_Male,education level_Primary,education level_Secondary,education level_University,education level_Vocational,duration range_0 - 5 years,duration range_5 - 10 years,duration range_10 - 15 years,duration range_15 - 20 years
0,William Alice,8b37287f-cf0f-4305-97be-cbb21fa68b33,2021-09-15,2.1,1193667.71,54,568413.195238,25.714286,0.593091,0,...,1,0,0,0,1,0,1,0,0,0
1,Florence Sarah,eb89a5e6-644f-4f10-8886-31b4d37fc6a8,2018-03-16,5.6,1866674.2,133,333334.678571,23.75,0.59599,0,...,1,0,0,0,1,0,0,1,0,0
2,James Jane,34d1c009-65a9-44d5-a990-c8b4c5e7fb9f,2013-06-24,10.33,-2933611.18,246,-283989.465634,23.814134,0.560126,0,...,0,1,1,0,0,0,0,0,1,0
3,Nakinobe Jane,72db3880-aab0-4782-bc73-a47f787d30d1,2018-04-24,5.49,1184751.32,220,215801.697632,40.07286,0.590698,0,...,1,0,0,1,0,0,0,1,0,0
4,Bridget Jane,57493f5c-80a0-4a41-9381-3d9241de74f4,2010-03-18,13.6,-12089320.06,110,-888920.592647,8.088235,0.495512,0,...,0,1,1,0,0,0,0,0,1,0


## Create transactions volume per year range column

In [10]:
# 1. Find the minimum and maximum transactions in the "Transaction Volume Per Year" column
min_transaction = customer_data_encoded_range_dropped['Transaction Volume Per Year'].min()
max_transaction = customer_data_encoded_range_dropped['Transaction Volume Per Year'].max()

print("min is " + str(min_transaction))
print("max is " + str(max_transaction))
bins = (0, 2000,4000,6000,8000,11000)

# 2. Create the labels
labels = ['0 - 2000', 
          '2000 - 4000', 
          '4000 - 6000',
          '6000 - 8000', 
          '8000 - 11000'] 

customer_data_encoded_range_dropped['transaction volume range'] = pd.cut(customer_data_encoded_range_dropped['Transaction Volume Per Year'], bins=bins, labels=labels, right=False, include_lowest=True)


customer_data_encoded_range_dropped.head()

min is 3.197969543147208
max is 1650.0


Unnamed: 0,Customer Name,uid,business start date,business duration (years),Net Profit,Transaction Volume,Net Profit Per Year,Transaction Volume Per Year,Normalized Performance Index,business sector_Agriculture,...,gender_Male,education level_Primary,education level_Secondary,education level_University,education level_Vocational,duration range_0 - 5 years,duration range_5 - 10 years,duration range_10 - 15 years,duration range_15 - 20 years,transaction volume range
0,William Alice,8b37287f-cf0f-4305-97be-cbb21fa68b33,2021-09-15,2.1,1193667.71,54,568413.195238,25.714286,0.593091,0,...,0,0,0,1,0,1,0,0,0,0 - 2000
1,Florence Sarah,eb89a5e6-644f-4f10-8886-31b4d37fc6a8,2018-03-16,5.6,1866674.2,133,333334.678571,23.75,0.59599,0,...,0,0,0,1,0,0,1,0,0,0 - 2000
2,James Jane,34d1c009-65a9-44d5-a990-c8b4c5e7fb9f,2013-06-24,10.33,-2933611.18,246,-283989.465634,23.814134,0.560126,0,...,1,1,0,0,0,0,0,1,0,0 - 2000
3,Nakinobe Jane,72db3880-aab0-4782-bc73-a47f787d30d1,2018-04-24,5.49,1184751.32,220,215801.697632,40.07286,0.590698,0,...,0,0,1,0,0,0,1,0,0,0 - 2000
4,Bridget Jane,57493f5c-80a0-4a41-9381-3d9241de74f4,2010-03-18,13.6,-12089320.06,110,-888920.592647,8.088235,0.495512,0,...,1,1,0,0,0,0,0,1,0,0 - 2000


## Encode transactions volume per year range

In [11]:
duration_dummies = pd.get_dummies(customer_data_encoded_range_dropped.get('transaction volume range', pd.Series()), prefix='transaction range').astype(int)
customer_data_encoded_range_dropped = pd.concat([customer_data_encoded_range_dropped, duration_dummies], axis=1)
customer_data_encoded_range_dropped = customer_data_encoded_range_dropped.drop('transaction volume range', axis=1)
customer_data_encoded_range_dropped.head()


Unnamed: 0,Customer Name,uid,business start date,business duration (years),Net Profit,Transaction Volume,Net Profit Per Year,Transaction Volume Per Year,Normalized Performance Index,business sector_Agriculture,...,education level_Vocational,duration range_0 - 5 years,duration range_5 - 10 years,duration range_10 - 15 years,duration range_15 - 20 years,transaction range_0 - 2000,transaction range_2000 - 4000,transaction range_4000 - 6000,transaction range_6000 - 8000,transaction range_8000 - 11000
0,William Alice,8b37287f-cf0f-4305-97be-cbb21fa68b33,2021-09-15,2.1,1193667.71,54,568413.195238,25.714286,0.593091,0,...,0,1,0,0,0,1,0,0,0,0
1,Florence Sarah,eb89a5e6-644f-4f10-8886-31b4d37fc6a8,2018-03-16,5.6,1866674.2,133,333334.678571,23.75,0.59599,0,...,0,0,1,0,0,1,0,0,0,0
2,James Jane,34d1c009-65a9-44d5-a990-c8b4c5e7fb9f,2013-06-24,10.33,-2933611.18,246,-283989.465634,23.814134,0.560126,0,...,0,0,0,1,0,1,0,0,0,0
3,Nakinobe Jane,72db3880-aab0-4782-bc73-a47f787d30d1,2018-04-24,5.49,1184751.32,220,215801.697632,40.07286,0.590698,0,...,0,0,1,0,0,1,0,0,0,0
4,Bridget Jane,57493f5c-80a0-4a41-9381-3d9241de74f4,2010-03-18,13.6,-12089320.06,110,-888920.592647,8.088235,0.495512,0,...,0,0,0,1,0,1,0,0,0,0


Drop the transaction range Column, transaction vol per year, net profit per year, transaction volume colume, net profit columns.  Name the new data frame, transaction dropped data - encoded_with_transactions

In [12]:

dropped_data_encoded_with_transactions = customer_data_encoded_range_dropped.drop(columns=['Transaction Volume Per Year', 'Net Profit Per Year', 'Transaction Volume', 'Net Profit'])
dropped_data_encoded_with_transactions.head()

Unnamed: 0,Customer Name,uid,business start date,business duration (years),Normalized Performance Index,business sector_Agriculture,business sector_Fisheries,business sector_Hospitality,business sector_IT,business sector_Manufacturing,...,education level_Vocational,duration range_0 - 5 years,duration range_5 - 10 years,duration range_10 - 15 years,duration range_15 - 20 years,transaction range_0 - 2000,transaction range_2000 - 4000,transaction range_4000 - 6000,transaction range_6000 - 8000,transaction range_8000 - 11000
0,William Alice,8b37287f-cf0f-4305-97be-cbb21fa68b33,2021-09-15,2.1,0.593091,0,0,0,0,1,...,0,1,0,0,0,1,0,0,0,0
1,Florence Sarah,eb89a5e6-644f-4f10-8886-31b4d37fc6a8,2018-03-16,5.6,0.59599,0,0,1,0,0,...,0,0,1,0,0,1,0,0,0,0
2,James Jane,34d1c009-65a9-44d5-a990-c8b4c5e7fb9f,2013-06-24,10.33,0.560126,0,0,0,0,0,...,0,0,0,1,0,1,0,0,0,0
3,Nakinobe Jane,72db3880-aab0-4782-bc73-a47f787d30d1,2018-04-24,5.49,0.590698,0,1,0,0,0,...,0,0,1,0,0,1,0,0,0,0
4,Bridget Jane,57493f5c-80a0-4a41-9381-3d9241de74f4,2010-03-18,13.6,0.495512,0,1,0,0,0,...,0,0,0,1,0,1,0,0,0,0


## Export encoded data in to csv file

In [13]:
dropped_data_encoded_with_transactions.to_csv('encoded_data.csv', index=False)


In [14]:
dropped_data_encoded_with_transactions.head(1)

Unnamed: 0,Customer Name,uid,business start date,business duration (years),Normalized Performance Index,business sector_Agriculture,business sector_Fisheries,business sector_Hospitality,business sector_IT,business sector_Manufacturing,...,education level_Vocational,duration range_0 - 5 years,duration range_5 - 10 years,duration range_10 - 15 years,duration range_15 - 20 years,transaction range_0 - 2000,transaction range_2000 - 4000,transaction range_4000 - 6000,transaction range_6000 - 8000,transaction range_8000 - 11000
0,William Alice,8b37287f-cf0f-4305-97be-cbb21fa68b33,2021-09-15,2.1,0.593091,0,0,0,0,1,...,0,1,0,0,0,1,0,0,0,0
