# Aprendizado por regra

## Importando as libs

In [1]:
# !pip install Orange3

In [2]:
import Orange
import numpy as np
import pandas as pd
import pickle

## Base risco de crédito - Exemplo didático

### Importando a base de dados

In [3]:
base_risco_credito = Orange.data.Table('./data/04.risco_credito_regras.csv')

In [4]:
base_risco_credito

[[ruim, alta, nenhuma, 0_15 | alto],
 [desconhecida, alta, nenhuma, 15_35 | alto],
 [desconhecida, baixa, nenhuma, 15_35 | moderado],
 [desconhecida, baixa, nenhuma, acima_35 | alto],
 [desconhecida, baixa, nenhuma, acima_35 | baixo],
 ...
]

In [5]:
base_risco_credito.domain

[historia, divida, garantias, renda | risco]

### Experimentando o modelo

In [6]:
cn2 = Orange.classification.rules.CN2Learner()
regras_risco_credito = cn2(base_risco_credito)

In [7]:
for regras in regras_risco_credito.rule_list:
    print(regras)

IF renda==0_15 THEN risco=alto 
IF historia==boa AND divida!=alta THEN risco=baixo 
IF historia==boa AND garantias!=nenhuma THEN risco=baixo 
IF historia==boa AND renda!=15_35 THEN risco=baixo 
IF historia==boa THEN risco=moderado 
IF divida==alta THEN risco=alto 
IF historia!=desconhecida THEN risco=moderado 
IF garantias==adequada THEN risco=baixo 
IF renda==15_35 THEN risco=moderado 
IF historia==desconhecida THEN risco=baixo 
IF TRUE THEN risco=alto 


In [8]:
# história boa, dívida alta, garantias nenhuma, renda > 35
# história ruim, dívida alta, garantias adequada, renda < 15
y_pred = regras_risco_credito([['boa', 'alta', 'nenhuma', 'acima_35'], ['ruim', 'alta', 'adequada', '0_15']])
y_pred

array([1, 0])

### Análise dos resultados

In [9]:
base_risco_credito.domain.class_var.values

('alto', 'baixo', 'moderado')

In [10]:
for i in y_pred:
    #print(i)
    print(base_risco_credito.domain.class_var.values[i])

baixo
alto


## Base credit data - Resultado da análise: 97.2%

### Importando a base de dados

In [11]:
base_credit = Orange.data.Table('./data/05.credit_data_regras.csv')

In [12]:
base_credit.domain

[income, age, loan | default]

In [13]:
base_dividida = Orange.evaluation.testing.sample(base_credit, n = 0.25)
base_dividida

([[33159.2, 42.3432, 2135.53 | 0],
  [66054.5, 39.0771, 10321.1 | 0],
  [57746.6, 63.6253, 727.195 | 0],
  [63910.3, 56.6356, 8986.72 | 0],
  [43777.5, 20.0109, 3601.3 | 0],
  ...
 ],
 [[31659.7, 31.9282, 858.511 | 0],
  [39970.2, 40.3681, 7867.62 | 0],
  [65588.4, 22.9182, 7879.74 | 1],
  [61427.4, 20.108, 2163.31 | 0],
  [36367, 47.1914, 371.041 | 0],
  ...
 ])

In [14]:
base_dividida[0]

[[33159.2, 42.3432, 2135.53 | 0],
 [66054.5, 39.0771, 10321.1 | 0],
 [57746.6, 63.6253, 727.195 | 0],
 [63910.3, 56.6356, 8986.72 | 0],
 [43777.5, 20.0109, 3601.3 | 0],
 ...
]

In [15]:
base_dividida[1]

[[31659.7, 31.9282, 858.511 | 0],
 [39970.2, 40.3681, 7867.62 | 0],
 [65588.4, 22.9182, 7879.74 | 1],
 [61427.4, 20.108, 2163.31 | 0],
 [36367, 47.1914, 371.041 | 0],
 ...
]

In [16]:
base_treinamento = base_dividida[1]
base_teste = base_dividida[0]

In [17]:
len(base_treinamento), len(base_teste)

(1500, 500)

### Experimentando o modelo

In [18]:
cn2 = Orange.classification.rules.CN2Learner()
regras_credit = cn2(base_treinamento)

In [19]:
for regras in regras_credit.rule_list:
    print(regras)

IF age>=34.9257164876908 THEN default=0 
IF loan<=2495.13299137587 AND income>=20145.9885970689 THEN default=0 
IF income<=31722.7309499867 AND loan>=3105.4430213977303 THEN default=1 
IF loan>=7718.479795185201 AND loan>=9698.582169129 THEN default=1 
IF loan>=7718.479795185201 AND age>=20.9909665295854 THEN default=1 
IF income>=58132.4712652713 AND age>=19.5239827041514 THEN default=0 
IF age<=18.075335860718 AND income>=50501.7266888171 THEN default=0 
IF income>=46801.274286117405 AND age>=33.5510297357105 THEN default=0 
IF age<=18.413736339658502 AND age>=18.413736339658502 THEN default=0 
IF age<=18.477425018791102 AND age>=18.477425018791102 THEN default=0 
IF loan>=5473.98555060076 AND income<=50052.292929031 AND loan>=5617.178645345511 THEN default=1 
IF loan>=6536.96636294544 AND age>=19.372464833315497 THEN default=1 
IF income<=22089.8374845274 AND age>=21.3656869572587 THEN default=1 
IF income>=45311.831838917 AND income>=53493.4860118665 THEN default=0 
IF loan<=4285.3

In [20]:
y_pred = Orange.evaluation.testing.TestOnTestData(base_treinamento, base_teste, [lambda testdata: regras_credit])
y_pred

<Orange.evaluation.testing.Results at 0x7fe6e9e15fa0>

### Análise dos resultados

In [21]:
Orange.evaluation.CA(y_pred)

array([0.972])

## Base census - 78.90% (executado na interface gráfica do Orange)

# Classificador base - Majority learner

## Base credit data - Resultado da análise: 85.8%

In [22]:
from collections import Counter

### Importando a base de dados

In [23]:
base_credit = Orange.data.Table('./data/05.credit_data_regras.csv')

In [24]:
base_credit.domain

[income, age, loan | default]

### Experimentando o modelo

In [25]:
majority = Orange.classification.MajorityLearner()

In [26]:
previsoes = Orange.evaluation.testing.TestOnTestData(base_credit, base_credit, [majority])

### Análise dos resultados

In [27]:
Orange.evaluation.CA(previsoes)

array([0.8585])

In [28]:
# for registro in base_credit:
#     print(registro.get_class())

In [29]:
Counter(str(registro.get_class()) for registro in base_credit)

Counter({'0': 1717, '1': 283})

In [30]:
1717 / 2000

0.8585

## Base census - Resultado da análise: 75.9%

### Importando a base de dados

In [31]:
base_census = Orange.data.Table('./data/06.census_regras.csv')

In [32]:
base_census.domain

[age, workclass, final-weight, education, education-num, marital-status, occupation, relationship, race, sex, capital-gain, capital-loos, hour-per-week, native-country | income]

### Experimentando o modelo

In [33]:
majority = Orange.classification.MajorityLearner()

In [34]:
previsoes = Orange.evaluation.testing.TestOnTestData(base_census, base_census, [majority])

### Análise dos resultados

In [35]:
Orange.evaluation.CA(previsoes)

array([0.75919044])

In [36]:
Counter(str(registro.get_class()) for registro in base_census)

Counter({'<=50K': 24720, '>50K': 7841})

In [37]:
24720 / (24720 + 7841)

0.7591904425539756