# Instalação e importação de bibliotecas

In [104]:
import pandas as pd

In [105]:
!pip3 install pycaret[full]



# Importação dos dados

In [106]:
dataframe = pd.read_csv('/content/Mall_Customers.csv')

In [107]:
dataframe

Unnamed: 0,CustomerID,Gender,Age,Annual Income (k$),Spending Score (1-100)
0,1,Male,19,15,39
1,2,Male,21,15,81
2,3,Female,20,16,6
3,4,Female,23,16,77
4,5,Female,31,17,40
...,...,...,...,...,...
195,196,Female,35,120,79
196,197,Female,45,126,28
197,198,Male,32,126,74
198,199,Male,32,137,18


# Faixa etária a partir da idade

In [108]:
def faixa_etaria(valor):
    if valor <= 12:
        return 'Criança'
    elif valor <= 18 and valor > 12:
        return 'Adolescente'
    elif valor > 18 and valor <= 30 :
        return 'Jovem-Adulto'
    elif valor > 30 and valor <= 65:
        return 'Adulto'
    elif valor > 65:
        return 'Idoso'

dataframe['Age group'] = dataframe['Age'].apply(faixa_etaria)

In [109]:
dataframe.drop('Age', axis=1, inplace=True)
dataframe

Unnamed: 0,CustomerID,Gender,Annual Income (k$),Spending Score (1-100),Age group
0,1,Male,15,39,Jovem-Adulto
1,2,Male,15,81,Jovem-Adulto
2,3,Female,16,6,Jovem-Adulto
3,4,Female,16,77,Jovem-Adulto
4,5,Female,17,40,Adulto
...,...,...,...,...,...
195,196,Female,120,79,Adulto
196,197,Female,126,28,Adulto
197,198,Male,126,74,Adulto
198,199,Male,137,18,Adulto


# Qualitativos -> Quantitativos

In [110]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

dataframe['Gender'] = le.fit_transform(dataframe['Gender'])

rotulo_0 = le.inverse_transform([0])
print(f"Valor Numérico {0} corresponde ao rótulo '{rotulo_0[0]}'")

rotulo_1 = le.inverse_transform([1])
print(f"Valor Numérico {1} corresponde ao rótulo '{rotulo_1[0]}'")

Valor Numérico 0 corresponde ao rótulo 'Female'
Valor Numérico 1 corresponde ao rótulo 'Male'


In [111]:
dataframe['Age group'] = le.fit_transform(dataframe['Age group'])

rotulo_0 = le.inverse_transform([0])
print(f"Valor Numérico {0} corresponde ao rótulo '{rotulo_0[0]}'")

rotulo_1 = le.inverse_transform([1])
print(f"Valor Numérico {1} corresponde ao rótulo '{rotulo_1[0]}'")

rotulo_2 = le.inverse_transform([2])
print(f"Valor Numérico {2} corresponde ao rótulo '{rotulo_2[0]}'")

rotulo_3 = le.inverse_transform([3])
print(f"Valor Numérico {3} corresponde ao rótulo '{rotulo_3[0]}'")

Valor Numérico 0 corresponde ao rótulo 'Adolescente'
Valor Numérico 1 corresponde ao rótulo 'Adulto'
Valor Numérico 2 corresponde ao rótulo 'Idoso'
Valor Numérico 3 corresponde ao rótulo 'Jovem-Adulto'


# Normalização ( 0 a 1 ) de itens

In [112]:
coluna_alvo = 'Annual Income (k$)'

valor_minimo = dataframe[coluna_alvo].min()
valor_maximo = dataframe[coluna_alvo].max()

dataframe[coluna_alvo] = (dataframe[coluna_alvo] - valor_minimo) / (valor_maximo - valor_minimo)

print(dataframe)


     CustomerID  Gender  Annual Income (k$)  Spending Score (1-100)  Age group
0             1       1            0.000000                      39          3
1             2       1            0.000000                      81          3
2             3       0            0.008197                       6          3
3             4       0            0.008197                      77          3
4             5       0            0.016393                      40          1
..          ...     ...                 ...                     ...        ...
195         196       0            0.860656                      79          1
196         197       0            0.909836                      28          1
197         198       1            0.909836                      74          1
198         199       1            1.000000                      18          1
199         200       1            1.000000                      83          3

[200 rows x 5 columns]


In [113]:
coluna_alvo = 'Spending Score (1-100)'

valor_minimo = dataframe[coluna_alvo].min()
valor_maximo = dataframe[coluna_alvo].max()

dataframe[coluna_alvo] = (dataframe[coluna_alvo] - valor_minimo) / (valor_maximo - valor_minimo)

print(dataframe)

     CustomerID  Gender  Annual Income (k$)  Spending Score (1-100)  Age group
0             1       1            0.000000                0.387755          3
1             2       1            0.000000                0.816327          3
2             3       0            0.008197                0.051020          3
3             4       0            0.008197                0.775510          3
4             5       0            0.016393                0.397959          1
..          ...     ...                 ...                     ...        ...
195         196       0            0.860656                0.795918          1
196         197       0            0.909836                0.275510          1
197         198       1            0.909836                0.744898          1
198         199       1            1.000000                0.173469          1
199         200       1            1.000000                0.836735          3

[200 rows x 5 columns]


# Renomeia colunas

In [114]:
dataframe

Unnamed: 0,CustomerID,Gender,Annual Income (k$),Spending Score (1-100),Age group
0,1,1,0.000000,0.387755,3
1,2,1,0.000000,0.816327,3
2,3,0,0.008197,0.051020,3
3,4,0,0.008197,0.775510,3
4,5,0,0.016393,0.397959,1
...,...,...,...,...,...
195,196,0,0.860656,0.795918,1
196,197,0,0.909836,0.275510,1
197,198,1,0.909836,0.744898,1
198,199,1,1.000000,0.173469,1


In [115]:
dataframe.rename(columns={'Annual Income (k$)': 'annualIncome'}, inplace=True)
dataframe.rename(columns={'Spending Score (1-100)': 'spendingScore'}, inplace=True)
dataframe.rename(columns={'Age group': 'ageGroup'}, inplace=True)

In [116]:
dataframe

Unnamed: 0,CustomerID,Gender,annualIncome,spendingScore,ageGroup
0,1,1,0.000000,0.387755,3
1,2,1,0.000000,0.816327,3
2,3,0,0.008197,0.051020,3
3,4,0,0.008197,0.775510,3
4,5,0,0.016393,0.397959,1
...,...,...,...,...,...
195,196,0,0.860656,0.795918,1
196,197,0,0.909836,0.275510,1
197,198,1,0.909836,0.744898,1
198,199,1,1.000000,0.173469,1


# Pycaret

In [117]:
from pycaret.classification import *

s = setup(
    data=dataframe,
    target='ageGroup',
    ignore_features=['CustomerID']
)

Unnamed: 0,Description,Value
0,Session id,1049
1,Target,ageGroup
2,Target type,Multiclass
3,Original data shape,"(200, 5)"
4,Transformed data shape,"(200, 4)"
5,Transformed train set shape,"(140, 4)"
6,Transformed test set shape,"(60, 4)"
7,Ignore features,1
8,Numeric features,3
9,Preprocess,True


In [118]:
melhor_modelo = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lda,Linear Discriminant Analysis,0.6857,0.0554,0.6857,0.6423,0.631,0.2613,0.3101,0.097
knn,K Neighbors Classifier,0.6714,0.0624,0.6714,0.6406,0.6218,0.2487,0.2965,0.107
ridge,Ridge Classifier,0.6714,0.0,0.6714,0.6198,0.6046,0.2083,0.2543,0.127
lr,Logistic Regression,0.6643,0.0566,0.6643,0.5792,0.5818,0.1703,0.215,0.13
nb,Naive Bayes,0.6643,0.0605,0.6643,0.6481,0.6142,0.2237,0.2743,0.095
qda,Quadratic Discriminant Analysis,0.6286,0.0574,0.6286,0.622,0.5772,0.1544,0.2008,0.157
dummy,Dummy Classifier,0.6286,0.05,0.6286,0.3959,0.4856,0.0,0.0,0.1
lightgbm,Light Gradient Boosting Machine,0.6214,0.0581,0.6214,0.6087,0.5977,0.2176,0.2445,0.61
rf,Random Forest Classifier,0.6143,0.0588,0.6143,0.6648,0.608,0.2282,0.2539,1.343
et,Extra Trees Classifier,0.6,0.0621,0.6,0.6398,0.5899,0.2167,0.2502,0.446


Processing:   0%|          | 0/69 [00:00<?, ?it/s]

In [119]:
melhor_modelo

In [120]:
evaluate_model(melhor_modelo)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

In [121]:
save_model(melhor_modelo, 'Modelo')

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=FastMemory(location=/tmp/joblib),
          steps=[('numerical_imputer',
                  TransformerWrapper(exclude=None,
                                     include=['Gender', 'annualIncome',
                                              'spendingScore'],
                                     transformer=SimpleImputer(add_indicator=False,
                                                               copy=True,
                                                               fill_value=None,
                                                               keep_empty_features=False,
                                                               missing_values=nan,
                                                               strategy='mean',
                                                               verbose='deprecated'))),
                 ('categorical_imputer',
                  Transformer...e=[],
                                     transformer=SimpleImputer(ad