In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 
import plotly.express as px
import plotly.graph_objects as go # para hacer graficos interactivos
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

# Configuración inicial
plt.style.use('seaborn-v0_8')
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

In [9]:
df = pd.read_csv('Bankloan.csv')
print(df.columns)


Index(['age;ed;employ;address;income;debtinc;creddebt;othdebt;default'], dtype='object')


In [30]:
# Cargar Datos
def load_data(file_path):
    '''Carga y limpieza de bankloan'''
    df = pd.read_csv(file_path, sep=';', encoding='utf-8-sig')
    
    # Limpieza de de datos
    # convertir columnas a numéricas. manejando errores
    numeric_cols=['age', 'ed', 'employ', 'address', 'income', 'debtinc', 'creddebt', 'othdebt', 'default']
    # recorriendo numeric_cols y convirtiendo a numérico
    for col in numeric_cols:
        df[col] = pd.to_numeric(df[col], errors='coerce')
        
    # manejar valores missing
    df.fillna(df.median(numeric_only=True), inplace=True)
    
    # Eliminar duplicados
    df.drop_duplicates(inplace=True)
    
    # Crear Variable categórica para default
    df['default_status'] = df['default'].map({0: 'Aprobado', 1: 'No Aprobado'})
    
    # Crear ratio de endeudamiento
    df['ratio_debtinc'] = df['debtinc'] / df['income'].replace(0,1)
    
    print(f'Dataset cargado: {df.shape[0]} filas y {df.shape[1]} columnas')
    
    return df   

df = load_data('Bankloan.csv')    



Dataset cargado: 700 filas y 11 columnas
