# Python AI Project: Artificial Intelligence and Predictions

### Case: Customers' Credit Score

You have been hired by a bank to determine the credit score of customers. You need to analyze all the bank's customers and, based on this analysis, create a model that can read the customer's information and automatically determine their credit score: Bad, Okay, Good


In [None]:
!pip install pandas numpy scikit-learn

In [19]:
# Loading the customer data from the CSV file

import pandas as pd

table = pd.read_csv("customers.csv")

display(table.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 25 columns):
 #   Column                    Non-Null Count   Dtype  
---  ------                    --------------   -----  
 0   id_cliente                100000 non-null  int64  
 1   mes                       100000 non-null  int64  
 2   idade                     100000 non-null  float64
 3   profissao                 100000 non-null  object 
 4   salario_anual             100000 non-null  float64
 5   num_contas                100000 non-null  float64
 6   num_cartoes               100000 non-null  float64
 7   juros_emprestimo          100000 non-null  float64
 8   num_emprestimos           100000 non-null  float64
 9   dias_atraso               100000 non-null  float64
 10  num_pagamentos_atrasados  100000 non-null  float64
 11  num_verificacoes_credito  100000 non-null  float64
 12  mix_credito               100000 non-null  object 
 13  divida_total              100000 non-null  fl

None

In [20]:
# Encoding the categorical variables (except for the target variable, "score_credito")
# as machine learning algorithms require numerical inputs

# Then we separate the target variable ("score_credito") from the rest of the data

from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()

for column in table.columns:
    if table[column].dtype == "object" and column != "score_credito": 
        table[column] = encoder.fit_transform(table[column])
        
display(table.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 25 columns):
 #   Column                    Non-Null Count   Dtype  
---  ------                    --------------   -----  
 0   id_cliente                100000 non-null  int64  
 1   mes                       100000 non-null  int64  
 2   idade                     100000 non-null  float64
 3   profissao                 100000 non-null  int32  
 4   salario_anual             100000 non-null  float64
 5   num_contas                100000 non-null  float64
 6   num_cartoes               100000 non-null  float64
 7   juros_emprestimo          100000 non-null  float64
 8   num_emprestimos           100000 non-null  float64
 9   dias_atraso               100000 non-null  float64
 10  num_pagamentos_atrasados  100000 non-null  float64
 11  num_verificacoes_credito  100000 non-null  float64
 12  mix_credito               100000 non-null  int32  
 13  divida_total              100000 non-null  fl

None

In [21]:
# y is the column we want to predict
# x are the columns we will use to make the prediction, which the AI will learn from

# To define X we exclude the "score_credits" column because it´s whate we want to predict 
# and we exclude the "id_cliente" column because it´s a random number

y = table["score_credito"]

x = table.drop(columns = ["score_credito", "id_cliente"])

In [22]:
# Spliting the data into training and testing sets

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x,y)

In [23]:
# Creating 2 models of AIs

from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

treedecision_model = RandomForestClassifier()
knn_model = KNeighborsClassifier()

In [24]:
# Training the AIs

treedecision_model.fit(x_train, y_train)
knn_model.fit(x_train, y_train)

In [25]:
# Making the predicitons using the trained models

treedecision_prediction = treedecision_model.predict(x_test)
knn_prediciton = knn_model.predict(x_test.to_numpy())



In [26]:
# Measuring the accuracy

from sklearn.metrics import accuracy_score

print(accuracy_score(y_test, treedecision_prediction))
print(accuracy_score(y_test, knn_prediciton))

0.82816
0.74504


In [28]:
# Making new predictions for new customers using a new data set

table_new_customers = pd.read_csv("new_customers.csv")

for column in table_new_customers.columns:
    if table_new_customers[column].dtype == "object" and column != "score_credito": 
        table_new_customers[column] = encoder.fit_transform(table_new_customers[column])
        
previsions = treedecision_model.predict(table_new_customers)
print(previsions)

['Poor' 'Good' 'Good']
