# Credit Risk Modeling – Feature Engineering & Modeling

This notebook is part of an end-to-end credit risk modeling project completed
during my Data Science Internship at HDFC Capital Advisors Ltd.

⚠️ Note: Due to data confidentiality, raw datasets are not included.
The notebook demonstrates methodology, feature engineering logic,
modeling approach, and evaluation techniques.

In [38]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [39]:
pd.set_option('display.max_columns', 100)

In [40]:
LOAD_DATA = False  # Set to True only in secure local environment

if LOAD_DATA:
    with open('./data/senior_ds_test/data/train/enquiry_data_train.json', 'r') as f:
        df = json.load(f)
else:
    df = []
    
flat_list = [item for sublist in df for item in sublist]
df_enq = pd.DataFrame(flat_list)


In [41]:
df_enq.head()

Unnamed: 0,enquiry_type,enquiry_amt,enquiry_date,uid
0,Interbank credit,168839,2020-11-08,AAA08065248
1,Mobile operator loan,268392,2020-09-20,AAA08065248
2,Mobile operator loan,36082,2020-06-19,AAA08065248
3,Interbank credit,180467,2019-10-22,AAA08065248
4,Cash loan (non-earmarked),227459,2020-05-24,AAA08065248


In [42]:
df_enq.columns

Index(['enquiry_type', 'enquiry_amt', 'enquiry_date', 'uid'], dtype='object')

In [43]:
df_enq.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1909926 entries, 0 to 1909925
Data columns (total 4 columns):
 #   Column        Dtype 
---  ------        ----- 
 0   enquiry_type  object
 1   enquiry_amt   int64 
 2   enquiry_date  object
 3   uid           object
dtypes: int64(1), object(3)
memory usage: 58.3+ MB


In [44]:
df_enq.shape

(1909926, 4)

In [45]:
df_enq["enquiry_date"] = pd.to_datetime(df_enq["enquiry_date"],format= "%Y-%m-%d")

In [46]:
df_enq["uid"].value_counts().to_frame().sort_values(by='uid')

Unnamed: 0_level_0,count
uid,Unnamed: 1_level_1
AAA08065248,11
AAA09044550,26
AAA10545297,14
AAA14112888,15
AAA20326915,1
...,...
ZZZ74526004,8
ZZZ78449185,6
ZZZ79008454,1
ZZZ81253108,29


In [47]:
df_enq["enquiry_type"].nunique()

17

In [48]:
df_enq.loc[df_enq.duplicated(subset=["uid","enquiry_amt","enquiry_date"])].sort_values(by="enquiry_date")
       

Unnamed: 0,enquiry_type,enquiry_amt,enquiry_date,uid
546191,Cash loans,169000,2018-03-12,HLA72886346
367003,Unknown type of loan,161000,2018-10-24,EZZ13603477
1797730,Credit card,65000,2018-11-24,YMT71817987
199268,Consumer credit,25000,2019-04-05,CSR18214725
1406611,Loan for working capital replenishment,8000,2019-06-21,TDJ67096851
...,...,...,...,...
300577,Real estate loan,35000,2020-12-19,EDB71045421
268792,Cash loans,196000,2020-12-21,DRL25878603
1469853,Mobile operator loan,79000,2020-12-24,TZK82111626
1670667,Consumer credit,162000,2020-12-27,WSY68658971


In [49]:
#No. of enquiries made by each customer for different categories. 
enq_counts = df_enq.groupby(["uid","enquiry_type"])["enquiry_type"].size().to_frame()

enq_counts.head(40)

Unnamed: 0_level_0,Unnamed: 1_level_0,enquiry_type
uid,enquiry_type,Unnamed: 2_level_1
AAA08065248,Cash loan (non-earmarked),1
AAA08065248,Interbank credit,3
AAA08065248,Loan for purchase of shares (margin lending),2
AAA08065248,Mobile operator loan,2
AAA08065248,Mortgage,1
AAA08065248,Revolving loans,2
AAA09044550,Another type of loan,3
AAA09044550,Car loan,3
AAA09044550,Cash loan (non-earmarked),1
AAA09044550,Cash loans,7


In [50]:
df_enq.shape

(1909926, 4)

## Features

In [51]:
df_enq['enquiry_year']=df_enq["enquiry_date"].dt.year

#month enquired
df_enq['enquiry_month']=df_enq["enquiry_date"].dt.month

# day enquired
df_enq['enquiry_day']=df_enq["enquiry_date"].dt.day

In [52]:
df_enq["Avg_amount_enquired"] = df_enq["uid"].map(df_enq.groupby('uid')['enquiry_amt'].mean().round(2))
df_enq["max_amount_enquired"] = df_enq["uid"].map(df_enq.groupby('uid')['enquiry_amt'].max())
df_enq["min_amount_enquired"] = df_enq["uid"].map(df_enq.groupby('uid')['enquiry_amt'].min())

In [53]:
df_enq['total_enquiries_per_customer'] = df_enq.groupby('uid')['enquiry_type'].transform('count')

In [25]:
df_enq.head(10)

Unnamed: 0,enquiry_type,enquiry_amt,enquiry_date,uid,enquiry_year,enquiry_month,enquiry_day,Avg_amount_enquired,max_amount_enquired,min_amount_enquired,total_enquiries_per_customer
0,Interbank credit,168839,2020-11-08,AAA08065248,2020,11,8,187696.18,364751,36082,11
1,Mobile operator loan,268392,2020-09-20,AAA08065248,2020,9,20,187696.18,364751,36082,11
2,Mobile operator loan,36082,2020-06-19,AAA08065248,2020,6,19,187696.18,364751,36082,11
3,Interbank credit,180467,2019-10-22,AAA08065248,2019,10,22,187696.18,364751,36082,11
4,Cash loan (non-earmarked),227459,2020-05-24,AAA08065248,2020,5,24,187696.18,364751,36082,11
5,Mortgage,44841,2019-11-03,AAA08065248,2019,11,3,187696.18,364751,36082,11
6,Revolving loans,364751,2020-02-26,AAA08065248,2020,2,26,187696.18,364751,36082,11
7,Interbank credit,127618,2020-10-04,AAA08065248,2020,10,4,187696.18,364751,36082,11
8,Loan for purchase of shares (margin lending),315591,2019-06-12,AAA08065248,2019,6,12,187696.18,364751,36082,11
9,Revolving loans,169056,2019-03-01,AAA08065248,2019,3,1,187696.18,364751,36082,11


In [54]:
# count of enquiry types per customer.
df_enq['unique_enquiry_types_per_customer'] = df_enq.groupby('uid')['enquiry_type'].transform('nunique')


In [55]:
df_enq.shape

(1909926, 12)

In [56]:
df_enq[df_enq['uid'] == 'AAA09044550'][['enquiry_type', 'uid']].value_counts()

enquiry_type                        uid        
Cash loans                          AAA09044550    7
Another type of loan                AAA09044550    3
Car loan                            AAA09044550    3
Interbank credit                    AAA09044550    2
Microloan                           AAA09044550    2
Cash loan (non-earmarked)           AAA09044550    1
Consumer credit                     AAA09044550    1
Credit card                         AAA09044550    1
Loan for the purchase of equipment  AAA09044550    1
Mobile operator loan                AAA09044550    1
Mortgage                            AAA09044550    1
Real estate loan                    AAA09044550    1
Revolving loans                     AAA09044550    1
Unknown type of loan                AAA09044550    1
Name: count, dtype: int64

In [57]:
df_enq['total_enquiries_per_type'] = df_enq.groupby(['enquiry_type'])['enquiry_type'].transform('count')

In [58]:
#Percentage of each type of enquiries per customer by the total no of enquiries for that type

df_enq['enquiry_percentage'] = (df_enq['total_enquiries_per_customer'] / df_enq['total_enquiries_per_type']) * 100

In [59]:
grouped = df_enq.groupby(['uid', 'enquiry_type']).size().reset_index(name='count')

# Pivot the DataFrame
pivot_table = grouped.pivot(index='uid', columns='enquiry_type', values='count').fillna(0).astype(int)

# Flatten the pivot table columns
pivot_table.columns = [f'{col}_total_enquiries' for col in pivot_table.columns]

# Reset index to prepare for merge
pivot_table.reset_index(inplace=True)

# Merge the pivoted data back into the main DataFrame (df_enq)
df_enq = pd.merge(df_enq, pivot_table, on='uid', how='left')


In [61]:
enquiry_percentage_aggregates = df_enq.groupby('enquiry_type')['enquiry_percentage'].agg(['mean', 'sum', 'min', 'max']).reset_index()
enquiry_percentage_aggregates.columns = ['enquiry_type', 'mean_enquiry_percentage', 'total_enquiry_percentage', 'min_enquiry_percentage', 'max_enquiry_percentage']

# Merge aggregated statistics back into df_enq
df_enq = df_enq.merge(enquiry_percentage_aggregates, on='enquiry_type', how='left')

In [62]:
df_enq.shape

(1909926, 35)

In [34]:
df_enq.head()

Unnamed: 0,enquiry_type,enquiry_amt,enquiry_date,uid,enquiry_year,enquiry_month,enquiry_day,Avg_amount_enquired,max_amount_enquired,min_amount_enquired,total_enquiries_per_customer,unique_enquiry_types_per_customer,total_enquiries_per_type,enquiry_percentage,Another type of loan_total_enquiries,Car loan_total_enquiries,Cash loan (non-earmarked)_total_enquiries,Cash loans_total_enquiries,Consumer credit_total_enquiries,Credit card_total_enquiries,Interbank credit_total_enquiries,Loan for business development_total_enquiries,Loan for purchase of shares (margin lending)_total_enquiries,Loan for the purchase of equipment_total_enquiries,Loan for working capital replenishment_total_enquiries,Microloan_total_enquiries,Mobile operator loan_total_enquiries,Mortgage_total_enquiries,Real estate loan_total_enquiries,Revolving loans_total_enquiries,Unknown type of loan_total_enquiries
0,Interbank credit,168839,2020-11-08,AAA08065248,2020,11,8,187696.18,364751,36082,11,6,99063,0.011104,0,0,1,0,0,0,3,0,2,0,0,0,2,1,0,2,0
1,Mobile operator loan,268392,2020-09-20,AAA08065248,2020,9,20,187696.18,364751,36082,11,6,99479,0.011058,0,0,1,0,0,0,3,0,2,0,0,0,2,1,0,2,0
2,Mobile operator loan,36082,2020-06-19,AAA08065248,2020,6,19,187696.18,364751,36082,11,6,99479,0.011058,0,0,1,0,0,0,3,0,2,0,0,0,2,1,0,2,0
3,Interbank credit,180467,2019-10-22,AAA08065248,2019,10,22,187696.18,364751,36082,11,6,99063,0.011104,0,0,1,0,0,0,3,0,2,0,0,0,2,1,0,2,0
4,Cash loan (non-earmarked),227459,2020-05-24,AAA08065248,2020,5,24,187696.18,364751,36082,11,6,98831,0.01113,0,0,1,0,0,0,3,0,2,0,0,0,2,1,0,2,0


In [63]:
# first enquiry date per type
df_enq['first_enquiry_date'] = df_enq.groupby(['uid', 'enquiry_type'])['enquiry_date'].transform('min')


In [64]:
# last enquiry date per type
df_enq['last_enquiry_date'] = df_enq.groupby(['uid', 'enquiry_type'])['enquiry_date'].transform('max')

In [65]:
max_date = df_enq['enquiry_date'].max()

In [66]:
max_date

Timestamp('2021-01-01 00:00:00')

In [69]:
df_enq.shape

(1909926, 38)

###### COUNT OF ENQUIRIES ACC TO NO. OF MONTHS

In [68]:
#count of enquiries in the last 1 month
a_month_ago = max_date - pd.DateOffset(months=1)
df_enq['enquiries_last_1_month'] = df_enq.groupby('uid')['enquiry_date'].transform(lambda x: x.gt(a_month_ago).sum())

In [70]:
#count of enquiries in the last 3 months
three_months_ago = max_date - pd.DateOffset(months=3)
df_enq['enquiries_last_3_months'] = df_enq.groupby('uid')['enquiry_date'].transform(lambda x: x.gt(three_months_ago).sum())

In [71]:
#count of enquiries in the last 6 months
six_months_ago = max_date - pd.DateOffset(months=6)
df_enq['enquiries_last_6_months'] = df_enq.groupby('uid')['enquiry_date'].transform(lambda x: x.gt(six_months_ago).sum())

In [72]:
#count of enquiries in the last 9 months
nine_months_ago = max_date - pd.DateOffset(months=9)
df_enq['enquiries_last_9_months'] = df_enq.groupby('uid')['enquiry_date'].transform(lambda x: x.gt(nine_months_ago).sum())

In [75]:
#count of enquiries in the last year 
max_date = df_enq['enquiry_date'].max()
one_year_ago = max_date - pd.DateOffset(years=1)
df_enq['enquiries_last_year'] = df_enq.groupby('uid')['enquiry_date'].transform(lambda x: x.gt(one_year_ago).sum())

In [76]:
df_enq.head()

Unnamed: 0,enquiry_type,enquiry_amt,enquiry_date,uid,enquiry_year,enquiry_month,enquiry_day,Avg_amount_enquired,max_amount_enquired,min_amount_enquired,total_enquiries_per_customer,unique_enquiry_types_per_customer,total_enquiries_per_type,enquiry_percentage,Another type of loan_total_enquiries,Car loan_total_enquiries,Cash loan (non-earmarked)_total_enquiries,Cash loans_total_enquiries,Consumer credit_total_enquiries,Credit card_total_enquiries,Interbank credit_total_enquiries,Loan for business development_total_enquiries,Loan for purchase of shares (margin lending)_total_enquiries,Loan for the purchase of equipment_total_enquiries,Loan for working capital replenishment_total_enquiries,Microloan_total_enquiries,Mobile operator loan_total_enquiries,Mortgage_total_enquiries,Real estate loan_total_enquiries,Revolving loans_total_enquiries,Unknown type of loan_total_enquiries,mean_enquiry_percentage,total_enquiry_percentage,min_enquiry_percentage,max_enquiry_percentage,first_enquiry_date,last_enquiry_date,enquiries_last_1_month,enquiries_last_3_months,enquiries_last_6_months,enquiries_last_9_months,enquiries_last_year
0,Interbank credit,168839,2020-11-08,AAA08065248,2020,11,8,187696.18,364751,36082,11,6,99063,0.011104,0,0,1,0,0,0,3,0,2,0,0,0,2,1,0,2,0,0.014149,1401.666616,0.001009,0.069653,2019-10-22,2020-11-08,1,3,4,6,7
1,Mobile operator loan,268392,2020-09-20,AAA08065248,2020,9,20,187696.18,364751,36082,11,6,99479,0.011058,0,0,1,0,0,0,3,0,2,0,0,0,2,1,0,2,0,0.014064,1399.078197,0.001005,0.069361,2020-06-19,2020-09-20,1,3,4,6,7
2,Mobile operator loan,36082,2020-06-19,AAA08065248,2020,6,19,187696.18,364751,36082,11,6,99479,0.011058,0,0,1,0,0,0,3,0,2,0,0,0,2,1,0,2,0,0.014064,1399.078197,0.001005,0.069361,2020-06-19,2020-09-20,1,3,4,6,7
3,Interbank credit,180467,2019-10-22,AAA08065248,2019,10,22,187696.18,364751,36082,11,6,99063,0.011104,0,0,1,0,0,0,3,0,2,0,0,0,2,1,0,2,0,0.014149,1401.666616,0.001009,0.069653,2019-10-22,2020-11-08,1,3,4,6,7
4,Cash loan (non-earmarked),227459,2020-05-24,AAA08065248,2020,5,24,187696.18,364751,36082,11,6,98831,0.01113,0,0,1,0,0,0,3,0,2,0,0,0,2,1,0,2,0,0.014194,1402.856391,0.001012,0.069816,2020-05-24,2020-05-24,1,3,4,6,7


In [78]:
df_enq.shape

(1909926, 42)

###### ENQUIRY AMOUNT

In [79]:
pivot_mean = pd.pivot_table(df_enq, 
                            index='uid', 
                            columns='enquiry_type', 
                            values='enquiry_amt', 
                            aggfunc='mean', 
                            fill_value=0)

# Flatten the columns of the pivot table
pivot_mean.columns = [f'{credit}_mean_enquiry_amt' for credit in pivot_mean.columns]

# Reset the index to merge with the original dataframe
pivot_mean.reset_index(inplace=True)

df_enq = df_enq.merge(pivot_mean, on='uid', how='left')

In [84]:
df_enq.shape


(1909926, 93)

In [81]:
pivot_median = pd.pivot_table(df_enq, 
                            index='uid', 
                            columns='enquiry_type', 
                            values='enquiry_amt', 
                            aggfunc='median', 
                            fill_value=0)

# Flatten the columns of the pivot table
pivot_median.columns = [f'{credit}_median_enquiry_amt' for credit in pivot_median.columns]

# Reset the index to merge with the original dataframe
pivot_median.reset_index(inplace=True)

df_enq = df_enq.merge(pivot_median, on='uid', how='left')

In [83]:
pivot_sum = pd.pivot_table(df_enq, 
                            index='uid', 
                            columns='enquiry_type', 
                            values='enquiry_amt', 
                            aggfunc='sum', 
                            fill_value=0)

# Flatten the columns of the pivot table
pivot_sum.columns = [f'{credit}_total_enquiry_amt' for credit in pivot_sum.columns]

# Reset the index to merge with the original dataframe
pivot_sum.reset_index(inplace=True)

df_enq = df_enq.merge(pivot_sum, on='uid', how='left')

In [106]:
df_enq.head()

Unnamed: 0,enquiry_type,enquiry_amt,enquiry_date,uid,enquiry_year,enquiry_month,enquiry_day,Avg_amount_enquired,max_amount_enquired,min_amount_enquired,total_enquiries_per_customer,unique_enquiry_types_per_customer,total_enquiries_per_type,enquiry_percentage,Another type of loan_total_enquiries,Car loan_total_enquiries,Cash loan (non-earmarked)_total_enquiries,Cash loans_total_enquiries,Consumer credit_total_enquiries,Credit card_total_enquiries,Interbank credit_total_enquiries,Loan for business development_total_enquiries,Loan for purchase of shares (margin lending)_total_enquiries,Loan for the purchase of equipment_total_enquiries,Loan for working capital replenishment_total_enquiries,Microloan_total_enquiries,Mobile operator loan_total_enquiries,Mortgage_total_enquiries,Real estate loan_total_enquiries,Revolving loans_total_enquiries,Unknown type of loan_total_enquiries,first_enquiry_date,last_enquiry_date,enquiries_last_1_month,enquiries_last_3_months,enquiries_last_6_months,enquiries_last_9_months,Another type of loan_mean_enquiry_amt,Car loan_mean_enquiry_amt,Cash loan (non-earmarked)_mean_enquiry_amt,Cash loans_mean_enquiry_amt,Consumer credit_mean_enquiry_amt,Credit card_mean_enquiry_amt,Interbank credit_mean_enquiry_amt,Loan for business development_mean_enquiry_amt,Loan for purchase of shares (margin lending)_mean_enquiry_amt,Loan for the purchase of equipment_mean_enquiry_amt,Loan for working capital replenishment_mean_enquiry_amt,Microloan_mean_enquiry_amt,Mobile operator loan_mean_enquiry_amt,Mortgage_mean_enquiry_amt,Real estate loan_mean_enquiry_amt,Revolving loans_mean_enquiry_amt,Unknown type of loan_mean_enquiry_amt,Another type of loan_median_enquiry_amt,Car loan_median_enquiry_amt,Cash loan (non-earmarked)_median_enquiry_amt,Cash loans_median_enquiry_amt,Consumer credit_median_enquiry_amt,Credit card_median_enquiry_amt,Interbank credit_median_enquiry_amt,Loan for business development_median_enquiry_amt,Loan for purchase of shares (margin lending)_median_enquiry_amt,Loan for the purchase of equipment_median_enquiry_amt,Loan for working capital replenishment_median_enquiry_amt,Microloan_median_enquiry_amt,Mobile operator loan_median_enquiry_amt,Mortgage_median_enquiry_amt,Real estate loan_median_enquiry_amt,Revolving loans_median_enquiry_amt,Unknown type of loan_median_enquiry_amt,Another type of loan_total_enquiry_amt,Car loan_total_enquiry_amt,Cash loan (non-earmarked)_total_enquiry_amt,Cash loans_total_enquiry_amt,Consumer credit_total_enquiry_amt,Credit card_total_enquiry_amt,Interbank credit_total_enquiry_amt,Loan for business development_total_enquiry_amt,Loan for purchase of shares (margin lending)_total_enquiry_amt,Loan for the purchase of equipment_total_enquiry_amt,Loan for working capital replenishment_total_enquiry_amt,Microloan_total_enquiry_amt,Mobile operator loan_total_enquiry_amt,Mortgage_total_enquiry_amt,Real estate loan_total_enquiry_amt,Revolving loans_total_enquiry_amt,Unknown type of loan_total_enquiry_amt
0,Interbank credit,168839,2020-11-08,AAA08065248,2020,11,8,187696.18,364751,36082,11,6,99063,0.011104,0,0,1,0,0,0,3,0,2,0,0,0,2,1,0,2,0,2019-10-22,2020-11-08,1,3,4,6,0.0,0.0,227459.0,0.0,0.0,0.0,158974.666667,0.0,238576.5,0.0,0.0,0.0,152237.0,44841.0,0.0,266903.5,0.0,0.0,0.0,227459.0,0.0,0.0,0.0,168839.0,0.0,238576.5,0.0,0.0,0.0,152237.0,44841.0,0.0,266903.5,0.0,0,0,227459,0,0,0,476924,0,477153,0,0,0,304474,44841,0,533807,0.0
1,Mobile operator loan,268392,2020-09-20,AAA08065248,2020,9,20,187696.18,364751,36082,11,6,99479,0.011058,0,0,1,0,0,0,3,0,2,0,0,0,2,1,0,2,0,2020-06-19,2020-09-20,1,3,4,6,0.0,0.0,227459.0,0.0,0.0,0.0,158974.666667,0.0,238576.5,0.0,0.0,0.0,152237.0,44841.0,0.0,266903.5,0.0,0.0,0.0,227459.0,0.0,0.0,0.0,168839.0,0.0,238576.5,0.0,0.0,0.0,152237.0,44841.0,0.0,266903.5,0.0,0,0,227459,0,0,0,476924,0,477153,0,0,0,304474,44841,0,533807,0.0
2,Mobile operator loan,36082,2020-06-19,AAA08065248,2020,6,19,187696.18,364751,36082,11,6,99479,0.011058,0,0,1,0,0,0,3,0,2,0,0,0,2,1,0,2,0,2020-06-19,2020-09-20,1,3,4,6,0.0,0.0,227459.0,0.0,0.0,0.0,158974.666667,0.0,238576.5,0.0,0.0,0.0,152237.0,44841.0,0.0,266903.5,0.0,0.0,0.0,227459.0,0.0,0.0,0.0,168839.0,0.0,238576.5,0.0,0.0,0.0,152237.0,44841.0,0.0,266903.5,0.0,0,0,227459,0,0,0,476924,0,477153,0,0,0,304474,44841,0,533807,


In [None]:
df_enq.loc[df_enq['uid'] == 'AAA08065248', ['enquiry_type', 'total_enquiries_per_type', 'uid']]

In [85]:
df_enq.shape

(1909926, 93)

In [86]:
def get_user_info(df, user_id):
    user_df = df[df['uid'] == user_id]
    return user_df.to_dict(orient='records')

# Get the information for user ID 'AAA09044550'
user_info = get_user_info(df, 'AAA08065248')

user_info

TypeError: list indices must be integers or slices, not str

### FEATURE DROPS

In [43]:
df_enq.loc[1:3]

Unnamed: 0,enquiry_type,enquiry_amt,enquiry_date,uid,enquiry_year,enquiry_month,enquiry_day,Avg_amount_enquired,max_amount_enquired,min_amount_enquired,total_enquiries_per_customer,unique_enquiry_types_per_customer,total_enquiries_per_type,enquiry_percentage,Another type of loan_total_enquiries,Car loan_total_enquiries,Cash loan (non-earmarked)_total_enquiries,Cash loans_total_enquiries,Consumer credit_total_enquiries,Credit card_total_enquiries,Interbank credit_total_enquiries,Loan for business development_total_enquiries,Loan for purchase of shares (margin lending)_total_enquiries,Loan for the purchase of equipment_total_enquiries,Loan for working capital replenishment_total_enquiries,Microloan_total_enquiries,Mobile operator loan_total_enquiries,Mortgage_total_enquiries,Real estate loan_total_enquiries,Revolving loans_total_enquiries,Unknown type of loan_total_enquiries,mean_enquiry_percentage,total_enquiry_percentage,min_enquiry_percentage,max_enquiry_percentage,first_enquiry_date,last_enquiry_date,enquiries_last_1_month,enquiries_last_3_months,enquiries_last_6_months,enquiries_last_9_months,enquiries_last_year,Another type of loan_mean_enquiry_amt,Car loan_mean_enquiry_amt,Cash loan (non-earmarked)_mean_enquiry_amt,Cash loans_mean_enquiry_amt,Consumer credit_mean_enquiry_amt,Credit card_mean_enquiry_amt,Interbank credit_mean_enquiry_amt,Loan for business development_mean_enquiry_amt,Loan for purchase of shares (margin lending)_mean_enquiry_amt,Loan for the purchase of equipment_mean_enquiry_amt,Loan for working capital replenishment_mean_enquiry_amt,Microloan_mean_enquiry_amt,Mobile operator loan_mean_enquiry_amt,Mortgage_mean_enquiry_amt,Real estate loan_mean_enquiry_amt,Revolving loans_mean_enquiry_amt,Unknown type of loan_mean_enquiry_amt,Another type of loan_median_enquiry_amt,Car loan_median_enquiry_amt,Cash loan (non-earmarked)_median_enquiry_amt,Cash loans_median_enquiry_amt,Consumer credit_median_enquiry_amt,Credit card_median_enquiry_amt,Interbank credit_median_enquiry_amt,Loan for business development_median_enquiry_amt,Loan for purchase of shares (margin lending)_median_enquiry_amt,Loan for the purchase of equipment_median_enquiry_amt,Loan for working capital replenishment_median_enquiry_amt,Microloan_median_enquiry_amt,Mobile operator loan_median_enquiry_amt,Mortgage_median_enquiry_amt,Real estate loan_median_enquiry_amt,Revolving loans_median_enquiry_amt,Unknown type of loan_median_enquiry_amt,Another type of loan_total_enquiry_amt,Car loan_total_enquiry_amt,Cash loan (non-earmarked)_total_enquiry_amt,Cash loans_total_enquiry_amt,Consumer credit_total_enquiry_amt,Credit card_total_enquiry_amt,Interbank credit_total_enquiry_amt,Loan for business development_total_enquiry_amt,Loan for purchase of shares (margin lending)_total_enquiry_amt,Loan for the purchase of equipment_total_enquiry_amt,Loan for working capital replenishment_total_enquiry_amt,Microloan_total_enquiry_amt,Mobile operator loan_total_enquiry_amt,Mortgage_total_enquiry_amt,Real estate loan_total_enquiry_amt,Revolving loans_total_enquiry_amt,Unknown type of loan_total_enquiry_amt
1,Mobile operator loan,268392,2020-09-20,AAA08065248,2020,9,20,187696.18,364751,36082,11,6,99479,0.011058,0,0,1,0,0,0,3,0,2,0,0,0,2,1,0,2,0,0.014064,1399.078197,0.001005,0.069361,2020-06-19,2020-09-20,1,3,4,6,7,0.0,0.0,227459.0,0.0,0.0,0.0,158974.666667,0.0,238576.5,0.0,0.0,0.0,152237.0,44841.0,0.0,266903.5,0.0,0.0,0.0,227459.0,0.0,0.0,0.0,168839.0,0.0,238576.5,0.0,0.0,0.0,152237.0,44841.0,0.0,266903.5,0.0,0.0,0.0,227459.0,0.0,0.0,0.0,476924.0,0.0,477153.0,0.0,0.0,0.0,304474.0,44841.0,0.0,533807.0,0.0
2,Mobile operator loan,36082,2020-06-19,AAA08065248,2020,6,19,187696.18,364751,36082,11,6,99479,0.011058,0,0,1,0,0,0,3,0,2,0,0,0,2,1,0,2,0,0.014064,1399.078197,0.001005,0.069361,2020-06-19,2020-09-20,1,3,4,6,7,0.0,0.0,227459.0,0.0,0.0,0.0,158974.666667,0.0,238576.5,0.0,0.0,0.0,152237.0,44841.0,0.0,266903.5,0.0,0.0,0.0,227459.0,0.0,0.0,0.0,168839.0,0.0,238576.5,0.0,0.0,0.0,152237.0,44841.0,0.0,266903.5,0.0,0.0,0.0,227459.0,0.0,0.0,0.0,476924.0,0.0,477153.0,0.0,0.0,0.0,304474.0,44841.0,0.0,533807.0,0.0
3,Interbank credit,180467,2019-10-22,AAA08065248,2019,10,22,187696.18,364751,36082,11,6,99063,0.011104,0,0,1,0,0,0,3,0,2,0,0,0,2,1,0,2,0,0.014149,1401.666616,0.001009,0.069653,2019-10-22,2020-11-08,1,3,4,6,7,0.0,0.0,227459.0,0.0,0.0,0.0,158974.666667,0.0,238576.5,0.0,0.0,0.0,152237.0,44841.0,0.0,266903.5,0.0,0.0,0.0,227459.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [87]:
columns_to_drop_enq =[
    "enquiry_type",
    "enquiry_amt",
    "enquiry_date",
    "enquiry_year",
    "enquiry_month",
    "enquiry_day",
    "total_enquiries_per_type",
    "enquiry_percentage",
    "mean_enquiry_percentage",
    "total_enquiry_percentage",
    "min_enquiry_percentage",
    "max_enquiry_percentage",
    "first_enquiry_date",
    "last_enquiry_date"   
]
df_enq = df_enq.drop(columns=columns_to_drop_enq)

In [88]:
df_enq.columns.to_list()

['uid',
 'Avg_amount_enquired',
 'max_amount_enquired',
 'min_amount_enquired',
 'total_enquiries_per_customer',
 'unique_enquiry_types_per_customer',
 'Another type of loan_total_enquiries',
 'Car loan_total_enquiries',
 'Cash loan (non-earmarked)_total_enquiries',
 'Cash loans_total_enquiries',
 'Consumer credit_total_enquiries',
 'Credit card_total_enquiries',
 'Interbank credit_total_enquiries',
 'Loan for business development_total_enquiries',
 'Loan for purchase of shares (margin lending)_total_enquiries',
 'Loan for the purchase of equipment_total_enquiries',
 'Loan for working capital replenishment_total_enquiries',
 'Microloan_total_enquiries',
 'Mobile operator loan_total_enquiries',
 'Mortgage_total_enquiries',
 'Real estate loan_total_enquiries',
 'Revolving loans_total_enquiries',
 'Unknown type of loan_total_enquiries',
 'enquiries_last_1_month',
 'enquiries_last_3_months',
 'enquiries_last_6_months',
 'enquiries_last_9_months',
 'enquiries_last_year',
 'Another type of l

In [89]:
df_enq.shape


(1909926, 79)

In [90]:
df_enq["uid"].nunique()

261383

In [91]:
df_enq["uid"].value_counts()

uid
PLY03601601    69
TMC11112974    63
JSE97180253    63
VYX32349248    62
AST46250024    60
               ..
FXC06846551     1
FXD12959861     1
FXD27170586     1
FXD28922874     1
TEH25811659     1
Name: count, Length: 261383, dtype: int64

In [92]:
df_enq.duplicated(subset="uid")

0          False
1           True
2           True
3           True
4           True
           ...  
1909921     True
1909922     True
1909923     True
1909924     True
1909925     True
Length: 1909926, dtype: bool

In [93]:
df_enq = df_enq.drop_duplicates(subset="uid",keep="first")

In [None]:
df_enq.shape

In [94]:
enq_prefix = 'enqList_'

# Add prefix to all columns in df_enq except 'uid'
df_enq = df_enq.rename(columns=lambda x: enq_prefix + x if x != 'uid' else x)

In [95]:
df_enq.to_csv('df_enq.csv', index=False)

In [96]:
user_dict = df_enq[df_enq["uid"]=='AAA09044550'].to_dict('records')

In [97]:
user_dict

[{'uid': 'AAA09044550',
  'enqList_Avg_amount_enquired': 102269.23,
  'enqList_max_amount_enquired': 197000,
  'enqList_min_amount_enquired': 5000,
  'enqList_total_enquiries_per_customer': 26,
  'enqList_unique_enquiry_types_per_customer': 14,
  'enqList_Another type of loan_total_enquiries': 3,
  'enqList_Car loan_total_enquiries': 3,
  'enqList_Cash loan (non-earmarked)_total_enquiries': 1,
  'enqList_Cash loans_total_enquiries': 7,
  'enqList_Consumer credit_total_enquiries': 1,
  'enqList_Credit card_total_enquiries': 1,
  'enqList_Interbank credit_total_enquiries': 2,
  'enqList_Loan for business development_total_enquiries': 0,
  'enqList_Loan for purchase of shares (margin lending)_total_enquiries': 0,
  'enqList_Loan for the purchase of equipment_total_enquiries': 1,
  'enqList_Loan for working capital replenishment_total_enquiries': 0,
  'enqList_Microloan_total_enquiries': 2,
  'enqList_Mobile operator loan_total_enquiries': 1,
  'enqList_Mortgage_total_enquiries': 1,
  'enq

In [55]:
df_enq.columns.to_list()

['uid',
 'enqList_Avg_amount_enquired',
 'enqList_max_amount_enquired',
 'enqList_min_amount_enquired',
 'enqList_total_enquiries_per_customer',
 'enqList_unique_enquiry_types_per_customer',
 'enqList_Another type of loan_total_enquiries',
 'enqList_Car loan_total_enquiries',
 'enqList_Cash loan (non-earmarked)_total_enquiries',
 'enqList_Cash loans_total_enquiries',
 'enqList_Consumer credit_total_enquiries',
 'enqList_Credit card_total_enquiries',
 'enqList_Interbank credit_total_enquiries',
 'enqList_Loan for business development_total_enquiries',
 'enqList_Loan for purchase of shares (margin lending)_total_enquiries',
 'enqList_Loan for the purchase of equipment_total_enquiries',
 'enqList_Loan for working capital replenishment_total_enquiries',
 'enqList_Microloan_total_enquiries',
 'enqList_Mobile operator loan_total_enquiries',
 'enqList_Mortgage_total_enquiries',
 'enqList_Real estate loan_total_enquiries',
 'enqList_Revolving loans_total_enquiries',
 'enqList_Unknown type of l