# AI Model Project
**Analyzing the client score**

*This is a testing and basic data analysis project, being the basis for any more complex project.*

---

* **Import Client Data**

In [None]:
import pandas as pd

table = pd.read_csv('./data/clientes.csv')
display(table)

* **Data Preprocessing**

In [None]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()

print(table.info())

# encode non numeric columns
for column in table.columns:
   if(table[column].dtype == 'object' and column != 'score_credito'):
      table[column] = encoder.fit_transform(table[column])
        
display(table)


* **Get Data for Training Model**

In [None]:
from sklearn.model_selection import train_test_split

# x -> train
x = table.drop(['id_cliente', 'score_credito', 'mes'], axis=1)
# y -> predict
y = table['score_credito']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1)

* **Train Model**

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

# AI models
treeModel = RandomForestClassifier() 
knnModel = KNeighborsClassifier()

# train models
treeModel.fit(x_train, y_train)
knnModel.fit(x_train, y_train)

* **Use Model**

In [None]:
# test fixed predict
scores = table['score_credito'].value_counts()
print(scores)

from sklearn.metrics import accuracy_score

tree_predict = treeModel.predict(x_test)
knn_predict  = knnModel.predict(x_test.to_numpy())

print(accuracy_score(y_test, tree_predict))
print(accuracy_score(y_test, knn_predict))

* **Defining Columns Importance**

In [None]:
columns = list(x_test.columns)
importance = (pd.DataFrame(data=treeModel.feature_importances_, index=columns)) * 100
print(importance)

* **New Model Based on Columns Importance**

In [None]:
toDrop: str = ['id_cliente', 'mes', 'score_credito']
for key, row in importance[0].items():
   if(row < 2):
      toDrop.extend([key])


x = table.drop(toDrop, axis=1)
y = table['score_credito']


x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1)


treeModel.fit(x_train, y_train)
knnModel.fit(x_train, y_train)

tree_predict = treeModel.predict(x_test)
knn_predict  = knnModel.predict(x_test.to_numpy())

print(accuracy_score(y_test, tree_predict))
print(accuracy_score(y_test, knn_predict))

* **Use Model for new Clients**

In [None]:
new_clients = pd.read_csv('./data/clientes.csv')

x = new_clients.drop(toDrop, axis=1)
for c in x.columns:
   if(x[c].dtype == 'object' and c != 'score_credito'):
      x[c] = encoder.fit_transform(x[c])

y = new_clients['score_credito']

tree_predict = treeModel.predict(x)
knn_predict  = knnModel.predict(x.to_numpy())

display(tree_predict)
display(knn_predict)

* **Set score with AI model**

In [None]:
for c in new_clients.columns:
   if(c == 'score_credito'):
      i = 0
      for score in new_clients[c]:
         new_clients[c][i] = tree_predict[i]
         i = i+1

In [None]:
display(new_clients)