In [None]:
import pandas as pd
import numpy as np

cols_rejected = ['Amount Requested', 'Risk_Score', 'Debt-To-Income Ratio', 'Employment Length', 'State']
df_reject = pd.read_csv('rejected_2007_to_2018Q4.csv', usecols=cols_rejected)

cols_accepted = ['loan_amnt', 'fico_range_low', 'dti', 'emp_length', 'addr_state']
df_accept = pd.read_csv('accepted_2007_to_2018Q4.csv', usecols=cols_accepted)

df_accept = df_accept.rename(columns={
    'loan_amnt': 'Amount Requested',
    'fico_range_low': 'Risk_Score',
    'dti': 'Debt-To-Income Ratio',
    'emp_length': 'Employment Length',
    'addr_state': 'State'
})

df_accept['Approved'] = 1
df_reject['Approved'] = 0

df_reject['Debt-To-Income Ratio'] = df_reject['Debt-To-Income Ratio'].astype(str).str.replace('%', '')
df_reject['Debt-To-Income Ratio'] = pd.to_numeric(df_reject['Debt-To-Income Ratio'], errors='coerce')

def clean_emp_length(x):
    if pd.isna(x): return 0
    x = str(x).replace(' years', '').replace(' year', '').replace('+', '').replace('< ', '')
    if x == 'nan': return 0
    return int(x)

df_accept['Employment Length'] = df_accept['Employment Length'].apply(clean_emp_length)
df_reject['Employment Length'] = df_reject['Employment Length'].apply(clean_emp_length)

df_final = pd.concat([df_accept, df_reject], axis=0)

df_final = df_final.dropna(subset=['Risk_Score', 'Debt-To-Income Ratio'])

df_final.to_csv('cleaned_loan_approval_data.csv', index=False)

print(f"Success! Dataset created with {len(df_final)} rows.")
print(df_final.head())