In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

In [2]:
base_credit=pd.read_csv('credit_data.csv')

In [3]:
base_credit['age'].fillna(base_credit['age'].mean(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  base_credit['age'].fillna(base_credit['age'].mean(), inplace=True)


In [4]:
base_credit.loc[pd.isnull(base_credit['age'])]

Unnamed: 0,clientid,income,age,loan,default


In [5]:
base_credit.loc[base_credit['clientid'].isin([29, 31, 32])]

Unnamed: 0,clientid,income,age,loan,default
28,29,59417.805406,40.807559,2082.625938,0
30,31,48528.852796,40.807559,6155.78467,0
31,32,23526.302555,40.807559,2862.010139,0


In [6]:
type(base_credit)

pandas.core.frame.DataFrame

In [26]:
X_credit = base_credit.iloc[:, 1:4].values

In [27]:
X_credit

array([[6.61559251e+04, 5.90170151e+01, 8.10653213e+03],
       [3.44151540e+04, 4.81171531e+01, 6.56474502e+03],
       [5.73171701e+04, 6.31080495e+01, 8.02095330e+03],
       ...,
       [4.43114493e+04, 2.80171669e+01, 5.52278669e+03],
       [4.37560566e+04, 6.39717958e+01, 1.62272260e+03],
       [6.94365796e+04, 5.61526170e+01, 7.37883360e+03]])

In [9]:
type(X_credit)

numpy.ndarray

In [28]:
y_credit = base_credit.iloc[:, 4].values

In [29]:
y_credit

array([0, 0, 0, ..., 1, 0, 0])

In [12]:
type(y_credit)

numpy.ndarray

In [19]:
media_idades_validas = base_credit['age'][base_credit['age'] > 0].mean()

In [20]:
base_credit.loc[base_credit['age'] < 0, 'age'] = 40.92

In [25]:
base_credit.loc[base_credit['age'] < 0]

Unnamed: 0,clientid,income,age,loan,default


In [32]:
X_credit[:, 0].min(), X_credit[:,1].min(), X_credit[:,2].min()

(np.float64(20014.4894700497),
 np.float64(18.055188510566897),
 np.float64(1.37762959325451))

In [33]:
X_credit[:, 0].max(), X_credit[:, 1].max(), X_credit[:,2].max()

(np.float64(69995.6855783239),
 np.float64(63.971795841120205),
 np.float64(13766.0512393337))

In [35]:
from sklearn.preprocessing import StandardScaler
scaler_credit = StandardScaler()
X_credit = scaler_credit.fit_transform(X_credit)

In [36]:
X_credit[:, 0].min(), X_credit[:,1].min(), X_credit[:,2].min()

(np.float64(-1.7676158019964077),
 np.float64(-1.7264008335456944),
 np.float64(-1.4592791099462408))

In [37]:
X_credit[:, 0].max(), X_credit[:, 1].max(), X_credit[:,2].max()

(np.float64(1.7220222385319197),
 np.float64(1.7393808868191305),
 np.float64(3.0616609141708273))

In [38]:
X_credit

array([[ 1.45393393,  1.36539444,  1.20281942],
       [-0.76217555,  0.54267377,  0.69642695],
       [ 0.83682073,  1.67418538,  1.17471147],
       ...,
       [-0.07122592, -0.97447153,  0.35420081],
       [-0.11000289,  1.73938089, -0.92675625],
       [ 1.682986  ,  1.14918992,  0.96381038]])