# IMPORTAR PACOTES

In [0]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix, classification_report

# LEITURA DO CONJUNTO DE DADOS 'landsat_full'

In [0]:
# Leitura do arquivo landsat_full.csv
df = pd.read_csv('landsat_full.csv')

In [3]:
# Verificação das dimensões (6435 linhas e 37 colunas)
df.shape

(6435, 37)

In [5]:
# Verificar as estatísticas
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
TL1,6435.0,69.4,13.605871,39.0,60.0,68.0,80.0,104.0
TL2,6435.0,83.594872,22.882234,27.0,71.0,87.0,103.0,137.0
TL3,6435.0,99.290598,16.645944,53.0,85.0,101.0,113.0,140.0
TL4,6435.0,82.592696,18.897674,33.0,69.0,81.0,92.0,154.0
TM1,6435.0,69.150272,13.561197,39.0,60.0,68.0,80.0,104.0
TM2,6435.0,83.243512,22.886495,27.0,71.0,85.0,103.0,137.0
TM3,6435.0,99.110645,16.664088,50.0,85.0,101.0,113.0,145.0
TM4,6435.0,82.497125,18.940923,29.0,69.0,81.0,92.0,157.0
TR1,6435.0,68.912354,13.470599,40.0,60.0,67.0,79.0,104.0
TR2,6435.0,82.893085,22.862255,27.0,71.0,85.0,102.0,130.0


# PASSOS PARA A CONSTRUÇÃO DOS MODELOS

## Construir Conjunto de Atributos Preditores (X) e Atributo Meta (y)

In [0]:
# ID será o atributo meta
y = df.ID

# Demais atributos serão os atributos preditores
X = df.drop(['ID'],axis=1)

## Construir Conjuntos de Treino e Teste (70/30) ESTRATIFICADO pelo atributo y.
* random_state = 2020

In [0]:
# Treino/Teste (sem estratificar)
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 2020, test_size=0.3)

In [0]:
# Treino/Teste
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 2020, test_size=0.3, stratify=y)

In [11]:
# Verificar distribuição (percentual) dos registros no atributo meta do conjunto de treino
y_train.value_counts(normalize=True)

A    0.238233
F    0.234236
C    0.211146
E    0.109902
B    0.109236
D    0.097247
Name: ID, dtype: float64

In [12]:
# Verificar distribuição (percentual) dos registros no atributo meta do conjunto de teste
y_test.value_counts(normalize=True)

A    0.238219
F    0.234593
C    0.210772
E    0.109788
B    0.109270
D    0.097359
Name: ID, dtype: float64

## Construir Modelos

### XGB (eXtended Gradient Boosting)

#### Importar pacote XGBClassifier

In [0]:
# Importar pacote XGBClassifier
from xgboost import XGBClassifier

#### Modelo 1 (Mod1_XGB): Sem otimização de hiperparâmetros

In [0]:
# Construção do Classificador
Classif_XGB_1 = XGBClassifier()

# Ajuste do Modelo ao Conjunto de Treino
Mod1_XGB = Classif_XGB_1.fit(X_train, y_train)

# Predição do Modelo no Conjunto de Teste
y_mod1_XGB = Mod1_XGB.predict(X_test)

#### Métricas para avaliação do Modelo

In [15]:
# Matriz de Confusão
MC_Mod1_XGB = pd.DataFrame(confusion_matrix(y_test, y_mod1_XGB,
                                            labels=['A', 'B', 'C', 'D', 'E', 'F']),
                           index=['Obs A', 'Obs B', 'Obs C', 'Obs D', 'Obs E', 'Obs F'],
                           columns=['Pred A', 'Pred B', 'Pred C', 'Pred D', 'Pred E', 'Pred F']
                           )
print(MC_Mod1_XGB)

       Pred A  Pred B  Pred C  Pred D  Pred E  Pred F
Obs A     447       0      10       0       3       0
Obs B       0     200       1       3       7       0
Obs C       0       0     388      15       0       4
Obs D       1       2      35     112       4      34
Obs E      12       2       0       2     185      11
Obs F       0       1       6      42      14     390


In [16]:
# Acurácia
acc_1_XGB = accuracy_score(y_test,y_mod1_XGB)
print('Acurácia Mod1_XGB:', acc_1_XGB)

Acurácia Mod1_XGB: 0.891765924391507


In [21]:
# Precisão
prec_1_XGB = precision_score(y_test,y_mod1_XGB, average=None)
print('Precisão Mod1_XGB:', prec_1_XGB)

Precisão Mod1_XGB: [0.97173913 0.97560976 0.88181818 0.64367816 0.8685446  0.88838269]


In [20]:
# Recall
recall_1_XGB = recall_score(y_test,y_mod1_XGB, average=None)
print('Recall Mod1_XGB:', recall_1_XGB)

Recall Mod1_XGB: [0.97173913 0.9478673  0.95331695 0.59574468 0.87264151 0.86092715]


In [22]:
# f-1 score
f1_1_XGB = f1_score(y_test,y_mod1_XGB, average=None)
print('f-1 Score Mod1_XGB:', f1_1_XGB)

f-1 Score Mod1_XGB: [0.97173913 0.96153846 0.91617473 0.61878453 0.87058824 0.87443946]


#### Usando o 'classification_report' do sklearn

In [23]:
# Precisão, Recall e f1-score do Modelo
print(classification_report(y_test,y_mod1_XGB))

              precision    recall  f1-score   support

           A       0.97      0.97      0.97       460
           B       0.98      0.95      0.96       211
           C       0.88      0.95      0.92       407
           D       0.64      0.60      0.62       188
           E       0.87      0.87      0.87       212
           F       0.89      0.86      0.87       453

    accuracy                           0.89      1931
   macro avg       0.87      0.87      0.87      1931
weighted avg       0.89      0.89      0.89      1931



#### Fazendo "na unha". Construindo um DataFrame e preenchendo os valores

In [0]:
# Construir uma tabela (DataFrame) com as classes nas linhas e as métricas nas colunas
classes = ['A','B','C','D','E','F']
metricas = ['Precisão', 'Recall', 'F-1']
df_1_XGB = pd.DataFrame(columns=metricas,
                        index=classes)

In [28]:
# Inicialmente, essa tabela estará vazia (NaN)
df_1_XGB

Unnamed: 0,Precisão,Recall,F-1
A,0.971739,0.971739,0.971739
B,0.97561,0.947867,0.961538
C,0.881818,0.953317,0.916175
D,0.643678,0.595745,0.618785
E,0.868545,0.872642,0.870588
F,0.888383,0.860927,0.874439


In [0]:
# Preencher as colunas da tabela (DataFrame)
df_1_XGB['Precisão'] = prec_1_XGB
df_1_XGB['Recall'] = recall_1_XGB
df_1_XGB['F-1']= f1_1_XGB

In [27]:
# Imprimir Acurácia Geral do Modelo 1, obtido com XGB SEM AJUSTE DE PARÂMETROS
print ('Acurácia Geral XGB_1 =', acc_1_XGB,'\n')

# Imprimir a tabela com os Resultados do Modelo 1, obtido com XGB SEM AJUSTE DE PARÂMETROS
print('Métricas XGB_1','\n')
print(df_1_XGB)

Acurácia Geral XGB_1 = 0.891765924391507 

Métricas XGB_1 

   Precisão    Recall       F-1
A  0.971739  0.971739  0.971739
B  0.975610  0.947867  0.961538
C  0.881818  0.953317  0.916175
D  0.643678  0.595745  0.618785
E  0.868545  0.872642  0.870588
F  0.888383  0.860927  0.874439


#### Atividade complementar: 
* Otimizar hiperparâmetros 'max_depth' e 'n_estimators' (Sugestão: usar randomizedSearchCV com cv=5)
* Repetir os passos realizados anteriormente, desta vez com os hiperparâmetros otimizados
* Em todos os lugares onde os nomes foram identificados como _1 _XGB, trocar por _2 _XGB

### ANN (Artificial Neural Networks)

#### Importar pacote MLPClassifier (Multi Layer Perceptron)

In [0]:
# Importar pacote MLPClassifier (Multi Layer Perceptron)
from sklearn.neural_network import MLPClassifier

#### Modelo 1 (Mod1_MLP): Sem otimização de hiperparâmetros e com dados originais

In [37]:
# Modelo 1 (Mod1_MLP): Sem otimização de hiperparâmetros e com dados originais
%%time

Classif_MLP_1 = MLPClassifier(random_state=2020)

# Ajuste do Modelo ao Conjunto de Treino
Mod1_MLP = Classif_MLP_1.fit(X_train, y_train)

# Predição do Modelo no Conjunto de Teste
y_mod1_MLP = Mod1_MLP.predict(X_test)

CPU times: user 2.44 s, sys: 1.55 s, total: 3.99 s
Wall time: 2.04 s


#### Métricas para avaliação do Modelo

In [38]:
# Matriz de Confusão

MC_Mod1_MLP = pd.DataFrame(confusion_matrix(y_test, y_mod1_MLP,
                                            labels=['A', 'B', 'C', 'D', 'E', 'F']),
                           index=['Obs A', 'Obs B', 'Obs C', 'Obs D', 'Obs E', 'Obs F'],
                           columns=['Pred A', 'Pred B', 'Pred C', 'Pred D', 'Pred E', 'Pred F']
                           )
print(MC_Mod1_MLP)

       Pred A  Pred B  Pred C  Pred D  Pred E  Pred F
Obs A     436       0      21       0       3       0
Obs B       0     197       0       1      10       3
Obs C       0       0     396       3       1       7
Obs D       0       1     142       4       3      38
Obs E      16       3      11       0     143      39
Obs F       0       0     138       3       8     304


In [39]:
# Acurácia
acc_1_MLP = accuracy_score(y_test, y_mod1_MLP)
print('Acurácia Mod1_MLP:', acc_1_MLP)

Acurácia Mod1_MLP: 0.7664422578974625


In [40]:
# Precisão
prec_1_MLP = precision_score(y_test, y_mod1_MLP, average=None)
print('Precisão Mod1_MLP:', prec_1_MLP)

Precisão Mod1_MLP: [0.96460177 0.9800995  0.55932203 0.36363636 0.85119048 0.77749361]


In [41]:
# Recall
recall_1_MLP = recall_score(y_test, y_mod1_MLP, average=None)
print('Recall Mod1_MLP:', recall_1_MLP)

Recall Mod1_MLP: [0.94782609 0.93364929 0.97297297 0.0212766  0.6745283  0.67108168]


In [42]:
# f-1 score
f1_1_MLP = f1_score(y_test, y_mod1_MLP, average=None)
print('f-1 Score Mod1_MLP:', f1_1_MLP)

f-1 Score Mod1_MLP: [0.95614035 0.95631068 0.7103139  0.04020101 0.75263158 0.72037915]


#### Usando o 'classification_report' do sklearn

In [43]:
# Precisão, Recall e f1-score do Modelo
print(classification_report(y_test, y_mod1_MLP))

              precision    recall  f1-score   support

           A       0.96      0.95      0.96       460
           B       0.98      0.93      0.96       211
           C       0.56      0.97      0.71       407
           D       0.36      0.02      0.04       188
           E       0.85      0.67      0.75       212
           F       0.78      0.67      0.72       453

    accuracy                           0.77      1931
   macro avg       0.75      0.70      0.69      1931
weighted avg       0.77      0.77      0.74      1931



#### Modelo 2 (Mod2_MLP): Sem otimização de hiperparâmetros, mas com dados normalizados (Z-score)

In [0]:
# Importar StandardScaler
from sklearn.preprocessing import StandardScaler

In [0]:
# Normalizar (Z-score) os dados
def_scaler = StandardScaler().fit(X_train)
X_train_norm = pd.DataFrame(def_scaler.transform(X_train))

In [0]:
# Verificando (head) o Conjunto X_train_norm
X_train_norm.head()

In [55]:
X_train_norm.mean()

0     4.362999e-17
1    -2.582327e-16
2    -3.789893e-16
3    -8.094965e-17
4    -3.182278e-16
5     1.822723e-16
6     4.501037e-17
7     2.872184e-16
8    -2.981629e-16
9     1.683021e-16
10    2.123819e-16
11   -2.115931e-16
12    1.461481e-16
13   -8.689023e-19
14   -1.662438e-16
15   -9.805162e-16
16    1.666752e-16
17   -1.030420e-16
18    1.205371e-16
19   -7.579786e-17
20    2.882722e-16
21   -5.300921e-17
22    4.880643e-18
23   -1.534445e-16
24   -1.970991e-16
25    3.282356e-16
26    1.129622e-15
27   -3.882638e-16
28    3.785210e-16
29    1.396283e-16
30   -3.383912e-16
31   -6.221094e-16
32    3.018172e-16
33   -1.409964e-17
34   -4.209678e-16
35    6.408925e-19
dtype: float64

In [0]:
# Usar no X_test a normalização feita para o X_train
X_test_norm = pd.DataFrame(def_scaler.transform(X_test))

In [54]:
# Verificando (head) o Conjunto X_test_norm
X_test_norm.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35
0,0.045929,-0.198673,-1.030254,-1.082931,0.062497,-0.183003,-0.841225,-1.075305,0.079743,0.048618,-1.0108,-1.276498,0.20235,-0.106829,-0.797958,-0.930338,-0.076213,-0.090775,-1.022409,-0.921213,-0.063756,-0.081713,-1.011894,-0.914435,0.137377,-0.017827,-0.734614,-0.666236,0.150713,-0.006824,-0.723116,-0.815519,-0.056397,-0.168683,-0.947763,-0.811132
1,-1.712735,-2.072333,0.043964,1.27876,-1.926017,-2.275443,0.5316,1.802274,-2.146018,-2.347647,0.54286,2.00141,-1.564612,-2.160624,0.279148,1.333917,-1.554832,-2.008215,0.287569,1.531216,-1.847339,-2.347879,0.593471,2.000366,-1.709186,-2.25059,0.285831,1.48501,-1.924257,-2.239599,0.288847,1.634568,-1.69183,-2.134119,0.303294,1.845805
2,-0.760126,-0.590835,-1.448005,-1.187895,0.136145,0.034959,-0.542785,-0.552109,0.450703,0.353597,0.124567,-0.391983,-0.460261,-0.718597,-1.755386,-1.298937,-0.445867,-0.352244,-1.022409,-0.921213,0.38214,0.223348,-0.595688,-0.341884,-0.084211,-0.630742,-1.514954,-1.13846,-0.071605,-0.444623,-1.020753,-0.919778,-0.056397,-0.081331,-0.769041,-0.759036
3,0.63215,0.367782,-0.373787,-0.348183,1.167226,0.601662,0.113784,-0.342831,0.970048,0.397166,-0.114457,-0.131832,0.791337,0.636034,-0.079887,-0.35111,0.810959,0.475741,-0.069698,-0.347241,0.828036,0.484829,-0.060567,-0.341884,0.50669,0.332411,0.045726,-0.456358,0.817668,0.168296,-0.485007,-0.450612,0.835657,0.355433,0.064997,-0.238068
4,-0.173904,1.021384,1.118182,0.701457,-0.158449,1.037586,1.42692,0.703562,-0.142833,1.224966,1.439203,0.700653,-0.239391,0.330149,0.279148,-0.245795,-0.445867,0.562898,0.763925,0.122374,-0.435336,1.138531,1.366424,0.699116,-0.453523,0.332411,-0.134352,-0.194011,-0.442136,0.212076,-0.127844,-0.189965,-0.130735,0.704844,0.899035,0.178707


In [59]:
X_test_norm.std()

0     0.989970
1     0.990151
2     0.977846
3     0.972335
4     0.995873
5     0.992227
6     0.982035
7     0.969576
8     0.998032
9     0.986848
10    0.980273
11    0.957779
12    1.004884
13    0.994948
14    0.991271
15    0.989633
16    1.002824
17    0.993798
18    0.984726
19    0.977329
20    1.000777
21    0.990983
22    0.982185
23    0.975116
24    1.010258
25    0.994301
26    0.990578
27    0.988139
28    0.999623
29    1.000826
30    0.981060
31    0.975569
32    0.987411
33    0.988490
34    0.981943
35    0.975294
dtype: float64

In [61]:
# Modelo 2 (Mod2_MLP): Sem otimização de hiperparâmetros e com dados normalizados (Z-score)
%%time

Classif_MLP_2 = MLPClassifier(max_iter=1000,
                              random_state = 2020)

Mod2_MLP = Classif_MLP_2.fit(X_train_norm, y_train)

# Predição
y_mod2_MLP = Mod2_MLP.predict(X_test_norm)

CPU times: user 35.1 s, sys: 22.8 s, total: 57.9 s
Wall time: 29.3 s


#### Métricas para avaliação do Modelo

In [62]:
# Matriz de Confusão

MC_Mod2_MLP = pd.DataFrame(confusion_matrix(y_test, y_mod2_MLP,
                                            labels=['A', 'B', 'C', 'D', 'E', 'F']),
                           index=['Obs A', 'Obs B', 'Obs C', 'Obs D', 'Obs E', 'Obs F'],
                           columns=['Pred A', 'Pred B', 'Pred C', 'Pred D', 'Pred E', 'Pred F']
                           )
print(MC_Mod2_MLP)

       Pred A  Pred B  Pred C  Pred D  Pred E  Pred F
Obs A     446       1       9       0       4       0
Obs B       0     202       0       4       3       2
Obs C       0       0     373      22       0      12
Obs D       1       1      34     125       2      25
Obs E       6       2       2       1     182      19
Obs F       0       2       6      21      12     412


In [63]:
# Acurácia
acc_2_MLP = accuracy_score(y_test, y_mod2_MLP)
print('Acurácia Mod2_MLP:', acc_2_MLP)

Acurácia Mod2_MLP: 0.9010875194199897


In [64]:
# Precisão
prec_2_MLP = precision_score(y_test, y_mod2_MLP, average=None)
print('Precisão Mod2_MLP:', prec_2_MLP)

Precisão Mod2_MLP: [0.98454746 0.97115385 0.87971698 0.72254335 0.89655172 0.87659574]


In [65]:
# Recall
recall_2_MLP = recall_score(y_test, y_mod2_MLP, average=None)
print('Recall Mod2_MLP:', recall_2_MLP)

Recall Mod2_MLP: [0.96956522 0.95734597 0.91646192 0.66489362 0.85849057 0.90949227]


In [66]:
# f-1 score
f1_2_MLP = f1_score(y_test, y_mod2_MLP, average=None)
print('f-1 Score Mod2_MLP:', f1_2_MLP)

f-1 Score Mod2_MLP: [0.9769989  0.96420048 0.8977136  0.69252078 0.87710843 0.89274106]


#### Usando o 'classification_report' do sklearn

In [67]:
# Precisão, Recall e f1-score do Modelo
print(classification_report(y_test, y_mod2_MLP))

              precision    recall  f1-score   support

           A       0.98      0.97      0.98       460
           B       0.97      0.96      0.96       211
           C       0.88      0.92      0.90       407
           D       0.72      0.66      0.69       188
           E       0.90      0.86      0.88       212
           F       0.88      0.91      0.89       453

    accuracy                           0.90      1931
   macro avg       0.89      0.88      0.88      1931
weighted avg       0.90      0.90      0.90      1931



#### Modelo 3 (Mod3_MLP): Sem otimização de hiperparâmetros, mas com dados normalizados (0-1)

In [0]:
# Importar StandardScaler
from sklearn.preprocessing import MinMaxScaler

In [0]:
# Normalizar (0-1) o Conjunto de Treino (X_train)
minmax_scaler = MinMaxScaler()
X_train_norm_0_1 = pd.DataFrame(minmax_scaler.fit_transform(X_train), columns=X_train.columns)


In [71]:
X_train_norm_0_1.head()

Unnamed: 0,TL1,TL2,TL3,TL4,TM1,TM2,TM3,TM4,TR1,TR2,TR3,TR4,ML1,ML2,ML3,ML4,MM1,MM2,MM3,MM4,MR1,MR2,MR3,MR4,BL1,BL2,BL3,BL4,BM1,BM2,BM3,BM4,BR1,BR2,BR3,BR4
0,0.276923,0.490909,0.430233,0.355372,0.3125,0.574257,0.463158,0.390625,0.3125,0.524272,0.421053,0.421875,0.333333,0.538462,0.484211,0.459016,0.375,0.582524,0.561798,0.460938,0.384615,0.554455,0.554348,0.443548,0.296875,0.538462,0.555556,0.432,0.369231,0.594059,0.526316,0.453125,0.369231,0.554455,0.554348,0.435484
1,0.430769,0.418182,0.22093,0.198347,0.359375,0.425743,0.231579,0.21875,0.359375,0.446602,0.231579,0.242188,0.444444,0.548077,0.368421,0.295082,0.59375,0.679612,0.52809,0.367188,0.661538,0.742574,0.565217,0.379032,0.75,0.807692,0.677778,0.496,0.815385,0.831683,0.642105,0.484375,0.753846,0.742574,0.673913,0.435484
2,0.753846,0.727273,0.697674,0.454545,0.75,0.841584,0.757895,0.492188,0.75,0.776699,0.663158,0.492188,0.777778,0.730769,0.621053,0.442623,0.75,0.776699,0.662921,0.453125,0.753846,0.792079,0.652174,0.435484,0.71875,0.701923,0.644444,0.416,0.723077,0.722772,0.568421,0.40625,0.723077,0.762376,0.597826,0.419355
3,0.507692,0.563636,0.674419,0.479339,0.5625,0.613861,0.589474,0.421875,0.5,0.563107,0.505263,0.390625,0.650794,0.653846,0.715789,0.54918,0.625,0.660194,0.764045,0.492188,0.569231,0.554455,0.51087,0.362903,0.546875,0.653846,0.655556,0.536,0.553846,0.673267,0.663158,0.523438,0.615385,0.712871,0.608696,0.403226
4,0.369231,0.263636,0.406977,0.338843,0.40625,0.356436,0.4,0.320312,0.40625,0.427184,0.315789,0.234375,0.444444,0.413462,0.263158,0.229508,0.359375,0.417476,0.280899,0.21875,0.369231,0.425743,0.206522,0.16129,0.359375,0.346154,0.188889,0.176,0.323077,0.39604,0.210526,0.171875,0.369231,0.39604,0.228261,0.177419


In [0]:
# Usar no X_test a normalização feita para o X_train
X_test_norm_0_1 = pd.DataFrame(minmax_scaler.transform(X_test), columns=X_train.columns)


In [73]:
# Modelo 3 (Mod3_MLP): Sem otimização de hiperparâmetros e com dados normalizados (Z-score)
%%time

Classif_MLP_3 = MLPClassifier(activation='relu', 
                              solver='adam',
                              hidden_layer_sizes = (100),
                              max_iter=1000,
                              random_state = 2020)

Mod3_MLP = Classif_MLP_3.fit(X_train_norm_0_1, y_train)

# Predição
y_mod3_MLP = Mod3_MLP.predict(X_test_norm_0_1)

CPU times: user 13 s, sys: 8.4 s, total: 21.4 s
Wall time: 10.8 s


#### Métricas para avaliação do Modelo

In [74]:
# Matriz de Confusão

MC_Mod3_MLP = pd.DataFrame(confusion_matrix(y_test, y_mod3_MLP,
                                            labels=['A', 'B', 'C', 'D', 'E', 'F']),
                           index=['Obs A', 'Obs B', 'Obs C', 'Obs D', 'Obs E', 'Obs F'],
                           columns=['Pred A', 'Pred B', 'Pred C', 'Pred D', 'Pred E', 'Pred F']
                           )
print(MC_Mod3_MLP)

       Pred A  Pred B  Pred C  Pred D  Pred E  Pred F
Obs A     447       0       7       0       6       0
Obs B       0     202       0       4       5       0
Obs C       1       1     375      19       1      10
Obs D       1       1      27      88       2      69
Obs E       9       4       0       0     182      17
Obs F       0       1       7       7      13     425


In [75]:
# Acurácia
acc_3_MLP = accuracy_score(y_test, y_mod3_MLP)
print('Acurácia Mod3_MLP:', acc_3_MLP)

Acurácia Mod3_MLP: 0.8902123252200932


In [76]:
# Precisão
prec_3_MLP = precision_score(y_test, y_mod3_MLP, average=None)
print('Precisão Mod3_MLP:', prec_3_MLP)

Precisão Mod3_MLP: [0.97598253 0.96650718 0.90144231 0.74576271 0.8708134  0.81573896]


In [77]:
# Recall
recall_3_MLP = recall_score(y_test, y_mod3_MLP, average=None)
print('Recall Mod3_MLP:', recall_3_MLP)

Recall Mod3_MLP: [0.97173913 0.95734597 0.92137592 0.46808511 0.85849057 0.93818985]


In [78]:
# f-1 score
f1_3_MLP = f1_score(y_test, y_mod3_MLP, average=None)
print('f-1 Score Mod3_MLP:', f1_3_MLP)

f-1 Score Mod3_MLP: [0.97385621 0.96190476 0.91130012 0.5751634  0.86460808 0.87268994]


#### Usando o 'classification_report' do sklearn

In [79]:
# Precisão, Recall e f1-score do Modelo
print(classification_report(y_test, y_mod3_MLP))

              precision    recall  f1-score   support

           A       0.98      0.97      0.97       460
           B       0.97      0.96      0.96       211
           C       0.90      0.92      0.91       407
           D       0.75      0.47      0.58       188
           E       0.87      0.86      0.86       212
           F       0.82      0.94      0.87       453

    accuracy                           0.89      1931
   macro avg       0.88      0.85      0.86      1931
weighted avg       0.89      0.89      0.88      1931



# ATIVIDADE PRÁTICA - AULA 09

## Exercício 1

*   Utilize o arquivo "landsat_full.csv"
*   Considere um problema para classificação de Algodão (S) em contraposição a ‘Não Algodão (N)
*   Utilize as técnicas KNN, RandomForest, XGBoost e Redes Neurais (ANN)

##### 1.A) Para cada um das técnicas e cada hiperparâmetro otimizado, apresente o intervalo (range) utilizado e o valor ótimo encontrado e utilizado para fazer a classificação. Construa uma tabela (DataFrame) semelhante ao quadro abaixo.

* OBS 1: Utilize RandomizedSearch do Scikit-Learn  
* OBS 2: Para Neural Networks, considere todas camadas com o mesmo número de neurônios

In [0]:
# Construir uma tabela (DataFrame) com as técnicas nas linhas e os valores nas colunas
linhas = ['KNN','Random Forest','XGBoost','Neural Networks']
colunas = ['Técnica', 'Hiperparâmetro', 'Valor Mínimo','Valor Máximo','Ótimo']
df_HyperOpt = pd.DataFrame(columns=colunas,
                           index=linhas)
print(df_HyperOpt)

                Técnica Hiperparâmetro Valor Mínimo Valor Máximo Ótimo
KNN                 NaN            NaN          NaN          NaN   NaN
Random Forest       NaN            NaN          NaN          NaN   NaN
XGBoost             NaN            NaN          NaN          NaN   NaN
Neural Networks     NaN            NaN          NaN          NaN   NaN


##### 1.B) Utilize as técnicas KNN, RandomForest, XGBoost e Redes Neurais (ANN) para fazer a classificação. Utilize os dados normalizados.

* Apresente os resultados da Acurácia, Precisão, Recall e f-1 Score para cada técnica SEM e COM otimização de hiperparâmetros.

* Quais as duas melhores (que você recomendaria a utilização)? Por que? Apresente os argumentos e, quando achar necessário/conveniente, acrescente algum recurso para subsidiar sua recomendação. Por exemplo, Matriz de Confusão, gráfico, etc.

## Exercício 2

* Repita todo o procedimento realizado no Exercício 1, utilizando o conjunto de dados do arquivo "landsat_central.csv", que representa apenas os pontos centrais de cada linha do conjunto de dados anterior.

##### 2.A) Qual dos modelos você recomendaria para ser utilizado para classificação de algodão utilizando o conjunto de dados "*landsat_central*"? Justifique.

* Siga as mesmas recomendações do Exercício 1

##### 2.B) Discuta os resultados obtidos utilizando apenas os dados do pixel central com os resultados obtidos utilizando os pixels ao redor do pixel central.

## Exercício 3 (Bônus)

* Considere o conjunto de dados "*landsat_central.csv*"

* Frequentemente, quando se trabalha com imagens de satélite, são criados índices, que são combinações de bandas. O NDVI é um exemplo de índice que combina as bandas do infravermelho próximo (NIR, Near Infra Red) e do vermelho (Red).

> $ NDVI = \frac{NIR - Red}{NIR + Red} $

* Assuma que no conjunto de dados, a Banda 3 seja a banda "**Red**" e a Banda 4 seja a banda "**NIR (Near InfraRed**)"

* Construa um atributo NDVI para o conjunto de dados

* Escolha a técnica com melhor resultado no Exercício 2 e execute-a novamente, desta vez, incluindo o atributo NDVI.

* Discuta sobre os novos resultados comparando-os com os resultados anteriores