# Importing Pandas and reading the CSV file

In [2]:
import pandas as pd

#reading the CSV file using pandas
df = pd.read_csv('5k_borrowers_data.csv')

#printing first 10 rows of the data
print(df.head(10))

               Name Date of Birth  Gender Marital Status  Phone Number  \
0      Khushi Balan    19-02-1986    Male         Single    2169182416   
1  Umang Chatterjee    13-08-1980    Male        Married    4521712306   
2        Adira Dara    05-08-1995  Female        Widowed    4615155004   
3        Anvi Saini    16-11-2001    Male        Married  914836846677   
4       Kartik Kade    09-07-1979    Male        Widowed  910186397558   
5      Rohan Sekhon    31-07-1980    Male        Widowed    5864147671   
6    Purab Randhawa    19-10-1966  Female         Single    5187626325   
7     Vardaniya Roy    03-08-1998  Female        Widowed    3958629885   
8         Piya Ravi    06-10-2001    Male       Divorced  913408234563   
9  Rati Swaminathan    01-01-1992  Female        Widowed    7508064439   

             Email Address                                   Mailing Address  \
0      xbhakta@example.com                       29/74, Mander, Kulti 156906   
1      ivaidya@example.or

In [4]:
#Checking for null values in all the columns of the data
df.isnull().any()

Name                            False
Date of Birth                   False
Gender                          False
Marital Status                  False
Phone Number                    False
Email Address                   False
Mailing Address                 False
Language Preference             False
Geographical Location           False
Credit Score                    False
Loan Type                       False
Loan Amount                     False
Loan Term                       False
Interest Rate                   False
Loan Purpose                    False
EMI                             False
IP Address                      False
Geolocation                     False
Repayment History               False
Days Left to Pay Current EMI    False
Delayed Payment                 False
dtype: bool

# Duplicating the original data to perform data cleaning and Standardization on the duplicate data

In [8]:
df.to_csv('Duplicate_5k_borrowers_data.csv', index=False)

In [11]:
df = pd.read_csv('Duplicate_5k_borrowers_data.csv')
print(df.head())

               Name Date of Birth  Gender Marital Status  Phone Number  \
0      Khushi Balan    19-02-1986    Male         Single    2169182416   
1  Umang Chatterjee    13-08-1980    Male        Married    4521712306   
2        Adira Dara    05-08-1995  Female        Widowed    4615155004   
3        Anvi Saini    16-11-2001    Male        Married  914836846677   
4       Kartik Kade    09-07-1979    Male        Widowed  910186397558   

             Email Address                        Mailing Address  \
0      xbhakta@example.com            29/74, Mander, Kulti 156906   
1      ivaidya@example.org    73/885\nSharma Marg\nSolapur 386449   
2  loyalvihaan@example.net   H.No. 468\nRaval Zila\nNanded 490253   
3        tgaba@example.org    04/25, Mandal Path, Guntakal 305639   
4     sanakaur@example.net  55/13, Srivastava Path\nRaipur-801775   

  Language Preference Geographical Location  Credit Score  ... Loan Amount  \
0             Marathi               Danapur           491  ...

# Converting Data types

In [12]:
#Converting 'Date of Birth' column to datetime format
df['Date of Birth'] = pd.to_datetime(df['Date of Birth'], format = '%d-%m-%Y', errors = 'coerce')

In [17]:
#Adding the country code '+91' before the phone number where '+91' is not present 

#Creating a Function to check and add country code
def country_code_addition(phone):
    #Converting phone to string
    phone = str(phone)
    #Removing non-numeric characters
    phone = ''.join(filter(str.isdigit, phone))
    #Adding Country code if missing
    if not phone.startswith('91'):
        phone = '91' + phone
    return '+' + phone

#Applying the function to the 'Phone Number' column 
df['Phone Number'] = df['Phone Number'].apply(country_code_addition)

df.head(10)

Unnamed: 0,Name,Date of Birth,Gender,Marital Status,Phone Number,Email Address,Mailing Address,Language Preference,Geographical Location,Credit Score,...,Loan Amount,Loan Term,Interest Rate,Loan Purpose,EMI,IP Address,Geolocation,Repayment History,Days Left to Pay Current EMI,Delayed Payment
0,Khushi Balan,1986-02-19,Male,Single,912169182416,xbhakta@example.com,"29/74, Mander, Kulti 156906",Marathi,Danapur,491,...,34398,15,0.16,Medical Emergency,2545.36,10.65.217.95,"-49.3275015, 58.067192","[{'Payment Date': datetime.date(2023, 5, 3), '...",2,No
1,Umang Chatterjee,1980-08-13,Male,Married,914521712306,ivaidya@example.org,73/885\nSharma Marg\nSolapur 386449,Malayalam,Bangalore,325,...,96856,9,0.2,Home Renovation,11678.35,192.168.179.132,"14.951437, -136.491335","[{'Payment Date': datetime.date(2023, 5, 1), '...",4,Yes
2,Adira Dara,1995-08-05,Female,Widowed,914615155004,loyalvihaan@example.net,H.No. 468\nRaval Zila\nNanded 490253,Malayalam,Bijapur,624,...,91301,10,0.16,Home Renovation,9812.94,10.166.34.134,"64.013123, 175.275587","[{'Payment Date': datetime.date(2023, 5, 7), '...",7,No
3,Anvi Saini,2001-11-16,Male,Married,914836846677,tgaba@example.org,"04/25, Mandal Path, Guntakal 305639",Gujarati,Korba,346,...,78981,14,0.28,Education Fees,6678.01,10.107.161.197,"-11.6130395, 37.595772","[{'Payment Date': datetime.date(2023, 5, 10), ...",7,Yes
4,Kartik Kade,1979-07-09,Male,Widowed,910186397558,sanakaur@example.net,"55/13, Srivastava Path\nRaipur-801775",Malayalam,Jorhat,321,...,89953,1,0.29,Wedding Expenses,92126.86,192.168.255.28,"-71.7186905, 49.352990",[],11,No
5,Rohan Sekhon,1980-07-31,Male,Widowed,915864147671,orama@example.org,"53/19, Lal Street\nPondicherry-688297",Hindi,Davanagere,760,...,14147,2,0.29,Home Renovation,7330.93,10.56.169.62,"46.0948625, 4.226201","[{'Payment Date': datetime.date(2023, 5, 25), ...",9,No
6,Purab Randhawa,1966-10-19,Female,Single,915187626325,mandaarnav@example.org,89/03\nKrish Nagar\nAmbarnath-455869,English,Lucknow,658,...,69922,10,0.29,Home Renovation,7954.84,10.177.235.49,"-2.3971695, -31.699453","[{'Payment Date': datetime.date(2023, 5, 12), ...",6,Yes
7,Vardaniya Roy,1998-08-03,Female,Widowed,913958629885,sanghavidur@example.com,"H.No. 35, Rege Path, Kirari Suleman Nagar 292256",Marathi,Thrissur,441,...,75349,6,0.15,Medical Emergency,13113.27,172.17.100.164,"78.914761, -43.072960","[{'Payment Date': datetime.date(2023, 5, 6), '...",7,Yes
8,Piya Ravi,2001-10-06,Male,Divorced,913408234563,ivana98@example.org,"H.No. 239, Ben Ganj, Ballia 420501",Tamil,Berhampore,355,...,68574,13,0.28,Medical Emergency,6176.18,192.168.218.223,"19.605321, 75.110705","[{'Payment Date': datetime.date(2023, 5, 22), ...",14,Yes
9,Rati Swaminathan,1992-01-01,Female,Widowed,917508064439,ivansoman@example.net,"63/29\nSrivastava Zila, Bidhannagar-812274",English,Amritsar,502,...,39789,10,0.26,Wedding Expenses,4468.29,192.168.244.120,"-35.8522285, -162.788675","[{'Payment Date': datetime.date(2023, 5, 17), ...",1,No


In [20]:
# Converting 'Email Address' to Lowercase
df['Email Address'] = df['Email Address'].str.lower()

# Checking for Duplicates

In [21]:
# Identifying Duplicate rows
duplicates = df.duplicated()

#Counting the duplicate rows if any
print(duplicates.sum())

0


# Standardizing texts

In [30]:
# Converting 'Gender' and 'Marital Status' to lowercase
df['Gender'] = df['Gender'].str.lower()
df['Marital Status'] = df['Marital Status'].str.lower()

df.head(10)

Unnamed: 0,Name,Date of Birth,Gender,Marital Status,Phone Number,Email Address,Mailing Address,Language Preference,Geographical Location,Credit Score,...,Loan Term,Interest Rate,Loan Purpose,EMI,IP Address,Repayment History,Days Left to Pay Current EMI,Delayed Payment,Latitude,Longitude
0,Khushi Balan,1986-02-19,male,single,912169182416,xbhakta@example.com,"29/74, Mander, Kulti 156906",Marathi,Danapur,491,...,15,0.16,Medical Emergency,2545.36,10.65.217.95,"[{'Payment Date': datetime.date(2023, 5, 3), '...",2,No,-49.327501,58.067192
1,Umang Chatterjee,1980-08-13,male,married,914521712306,ivaidya@example.org,73/885\nSharma Marg\nSolapur 386449,Malayalam,Bangalore,325,...,9,0.2,Home Renovation,11678.35,192.168.179.132,"[{'Payment Date': datetime.date(2023, 5, 1), '...",4,Yes,14.951437,-136.491335
2,Adira Dara,1995-08-05,female,widowed,914615155004,loyalvihaan@example.net,H.No. 468\nRaval Zila\nNanded 490253,Malayalam,Bijapur,624,...,10,0.16,Home Renovation,9812.94,10.166.34.134,"[{'Payment Date': datetime.date(2023, 5, 7), '...",7,No,64.013123,175.275587
3,Anvi Saini,2001-11-16,male,married,914836846677,tgaba@example.org,"04/25, Mandal Path, Guntakal 305639",Gujarati,Korba,346,...,14,0.28,Education Fees,6678.01,10.107.161.197,"[{'Payment Date': datetime.date(2023, 5, 10), ...",7,Yes,-11.613039,37.595772
4,Kartik Kade,1979-07-09,male,widowed,910186397558,sanakaur@example.net,"55/13, Srivastava Path\nRaipur-801775",Malayalam,Jorhat,321,...,1,0.29,Wedding Expenses,92126.86,192.168.255.28,[],11,No,-71.71869,49.35299
5,Rohan Sekhon,1980-07-31,male,widowed,915864147671,orama@example.org,"53/19, Lal Street\nPondicherry-688297",Hindi,Davanagere,760,...,2,0.29,Home Renovation,7330.93,10.56.169.62,"[{'Payment Date': datetime.date(2023, 5, 25), ...",9,No,46.094862,4.226201
6,Purab Randhawa,1966-10-19,female,single,915187626325,mandaarnav@example.org,89/03\nKrish Nagar\nAmbarnath-455869,English,Lucknow,658,...,10,0.29,Home Renovation,7954.84,10.177.235.49,"[{'Payment Date': datetime.date(2023, 5, 12), ...",6,Yes,-2.397169,-31.699453
7,Vardaniya Roy,1998-08-03,female,widowed,913958629885,sanghavidur@example.com,"H.No. 35, Rege Path, Kirari Suleman Nagar 292256",Marathi,Thrissur,441,...,6,0.15,Medical Emergency,13113.27,172.17.100.164,"[{'Payment Date': datetime.date(2023, 5, 6), '...",7,Yes,78.914761,-43.07296
8,Piya Ravi,2001-10-06,male,divorced,913408234563,ivana98@example.org,"H.No. 239, Ben Ganj, Ballia 420501",Tamil,Berhampore,355,...,13,0.28,Medical Emergency,6176.18,192.168.218.223,"[{'Payment Date': datetime.date(2023, 5, 22), ...",14,Yes,19.605321,75.110705
9,Rati Swaminathan,1992-01-01,female,widowed,917508064439,ivansoman@example.net,"63/29\nSrivastava Zila, Bidhannagar-812274",English,Amritsar,502,...,10,0.26,Wedding Expenses,4468.29,192.168.244.120,"[{'Payment Date': datetime.date(2023, 5, 17), ...",1,No,-35.852229,-162.788675


# Splitting and Deleting a Column

In [33]:
# Splitting the 'Geolocation' column into 'Latitude' and 'Longitude' for better understanding
df[['Latitude','Longitude']] = df['Geolocation'].str.split(',',expand = True)

# Converting 'Latitude' and 'Longitude' to numeric types
df['Latitude'] = pd.to_numeric(df['Latitude'])
df['Longitude'] = pd.to_numeric(df['Longitude'])

# Deleting the 'Geolocation' Column
df.drop('Geolocation', axis = 1, inplace = True)

df.head(10)

Unnamed: 0,Name,Date of Birth,Gender,Marital Status,Phone Number,Email Address,Mailing Address,Language Preference,Geographical Location,Credit Score,...,Loan Term,Interest Rate,Loan Purpose,EMI,IP Address,Repayment History,Days Left to Pay Current EMI,Delayed Payment,Latitude,Longitude
0,Khushi Balan,1986-02-19,male,single,912169182416,xbhakta@example.com,"29/74, Mander, Kulti 156906",Marathi,Danapur,491,...,15,0.16,Medical Emergency,2545.36,10.65.217.95,"[{'Payment Date': datetime.date(2023, 5, 3), '...",2,No,-49.327501,58.067192
1,Umang Chatterjee,1980-08-13,male,married,914521712306,ivaidya@example.org,73/885\nSharma Marg\nSolapur 386449,Malayalam,Bangalore,325,...,9,0.2,Home Renovation,11678.35,192.168.179.132,"[{'Payment Date': datetime.date(2023, 5, 1), '...",4,Yes,14.951437,-136.491335
2,Adira Dara,1995-08-05,female,widowed,914615155004,loyalvihaan@example.net,H.No. 468\nRaval Zila\nNanded 490253,Malayalam,Bijapur,624,...,10,0.16,Home Renovation,9812.94,10.166.34.134,"[{'Payment Date': datetime.date(2023, 5, 7), '...",7,No,64.013123,175.275587
3,Anvi Saini,2001-11-16,male,married,914836846677,tgaba@example.org,"04/25, Mandal Path, Guntakal 305639",Gujarati,Korba,346,...,14,0.28,Education Fees,6678.01,10.107.161.197,"[{'Payment Date': datetime.date(2023, 5, 10), ...",7,Yes,-11.613039,37.595772
4,Kartik Kade,1979-07-09,male,widowed,910186397558,sanakaur@example.net,"55/13, Srivastava Path\nRaipur-801775",Malayalam,Jorhat,321,...,1,0.29,Wedding Expenses,92126.86,192.168.255.28,[],11,No,-71.71869,49.35299
5,Rohan Sekhon,1980-07-31,male,widowed,915864147671,orama@example.org,"53/19, Lal Street\nPondicherry-688297",Hindi,Davanagere,760,...,2,0.29,Home Renovation,7330.93,10.56.169.62,"[{'Payment Date': datetime.date(2023, 5, 25), ...",9,No,46.094862,4.226201
6,Purab Randhawa,1966-10-19,female,single,915187626325,mandaarnav@example.org,89/03\nKrish Nagar\nAmbarnath-455869,English,Lucknow,658,...,10,0.29,Home Renovation,7954.84,10.177.235.49,"[{'Payment Date': datetime.date(2023, 5, 12), ...",6,Yes,-2.397169,-31.699453
7,Vardaniya Roy,1998-08-03,female,widowed,913958629885,sanghavidur@example.com,"H.No. 35, Rege Path, Kirari Suleman Nagar 292256",Marathi,Thrissur,441,...,6,0.15,Medical Emergency,13113.27,172.17.100.164,"[{'Payment Date': datetime.date(2023, 5, 6), '...",7,Yes,78.914761,-43.07296
8,Piya Ravi,2001-10-06,male,divorced,913408234563,ivana98@example.org,"H.No. 239, Ben Ganj, Ballia 420501",Tamil,Berhampore,355,...,13,0.28,Medical Emergency,6176.18,192.168.218.223,"[{'Payment Date': datetime.date(2023, 5, 22), ...",14,Yes,19.605321,75.110705
9,Rati Swaminathan,1992-01-01,female,widowed,917508064439,ivansoman@example.net,"63/29\nSrivastava Zila, Bidhannagar-812274",English,Amritsar,502,...,10,0.26,Wedding Expenses,4468.29,192.168.244.120,"[{'Payment Date': datetime.date(2023, 5, 17), ...",1,No,-35.852229,-162.788675
