
# Projeto Nadir
Desenvolvimento do Projeto de Conclusão de Curso da Pós-Graduação BI-Master da PUC-Rio lecionada pelo Laboratório ICA que será chamado de projeto Nadir.<br><br>
O objetivo a ser atingido é:


*   Criar uma Rede Neural capaz de classificar imagens de constelações

Por se tratar de uma Prova de Conceito, será abordado apenas 4 constelações:
1. Órion
2. Cruzeiro do Sul
3. Cão Maior
4. Ursa Menor







In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import os
import imutils
from imutils import paths
from keras.models import Sequential, Model, Input
from keras.layers import Dense, Flatten, Dropout, Activation, Conv2D, MaxPooling2D, AveragePooling2D
from keras.preprocessing.image import ImageDataGenerator, img_to_array, array_to_img
from keras.applications import VGG16
from keras.applications.vgg16 import preprocess_input,decode_predictions
from keras.optimizers import Adam, SGD
from keras.utils import np_utils
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
from sklearn.preprocessing import LabelBinarizer, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [None]:
from google.colab import drive
drive.mount('/content/drive')

workdir_path = '/content/drive/My Drive/Colab Notebooks/TCC' #MODIFICAR!!! #Caminho para o seu workspace
os.chdir(workdir_path)

## Definição de Funções de Apoio para o desenvolvimento do projeto

In [None]:
dicionario = {
    0: 'canis major',
    1: 'crux',
    2: 'orion',
    3: 'ursa minor'
}

In [None]:
#Testa 1 imagem já lida e pré-processada
def testImage(model, image, label_real, dicionario = dicionario):
    plt.imshow(image)
    plt.title(dicionario[np.argmax(label_real)])
    plt.xlabel("Prediction: " + (dicionario[np.argmax(model.predict(image.reshape(-1,224,224,3)))]))
    plt.show()

In [None]:
#Testa um conjunto de 8 imagens simultaneamente
def gridImage8(model, image, label_real, inicio = 0, dicionario = dicionario):
  plt.figure(figsize=(25,10))
  for i in range(8):
    a = 240 + i + 1
    plt.subplot(int(a))
    plt.imshow(image[i + inicio])
    plt.title(dicionario[np.argmax(label_real[i + inicio])])
    plt.xlabel("Prediction: " + (dicionario[np.argmax(model.predict(image[i + inicio].reshape(-1,224,224,3)))]))

  plt.show()

In [None]:
def matrix(labels, prediction):

  y = [np.argmax(ans) for ans in labels]
  pred = [np.argmax(ans) for ans in prediction]

  print(classification_report(y, pred))
  print('--------------------------------------')
  plt.figure(figsize=(5,5))
  plt.title('Matriz de Confusão')
  sns.heatmap(confusion_matrix(y, pred), annot=True, cbar=False, square=True)
  plt.savefig('confusion_matrix.png')
  plt.show()

# Lendo os Dados

In [None]:
all = '/content/drive/My Drive/Colab Notebooks/TCC/dataset/all/'

Lendo as labels existentes e registrando

In [None]:
labels = os.listdir(all)

In [None]:
#Teste de código
labels[0]

Lendo os dados e armazenando em variáveis

In [None]:
#Lendo os dados
X = []
y = []
for label in labels:

  caminhos = list(paths.list_images(all +  label))

  for caminho in caminhos:
    image = cv2.imread(caminho)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (224, 224))

    y.append(label)
    X.append(image)

Convertantendo para os formatos necessários

In [None]:
X, y = np.array(X)/255.0, np.array(y)

In [None]:
#Checando se está tudo certo
print(y[0])
plt.figure(figsize=(10,10))
plt.imshow(X[0]);
print('--------------------------')

In [None]:
print(y[5])
plt.figure(figsize=(10,10))
plt.imshow(X[5]);
print('--------------------------')

# Data Augmentation

Devido ao baixo número de imagens obtidas para o treinamento e teste da Rede Neural, se faz necessário gerar novas imagens sintéticas a partir das imagens originais obtidas

Montando o gerador de imagens:

In [None]:
#Data Augmentation
DataGen = ImageDataGenerator(
 rotation_range=30, zoom_range=0.3, width_shift_range=0.3, height_shift_range = 0.3,featurewise_center=True,
    featurewise_std_normalization=True, horizontal_flip=True
)

In [None]:
batch_size = 16

data_generator = DataGen.flow(X, np.array(pd.get_dummies(y)), 
                                     batch_size=batch_size, 
                                     shuffle=True)

Teste do gerador de imagens:

In [None]:
x_, y_ = data_generator.__next__()

In [None]:
pd.get_dummies(y).head()

In [None]:
y_[0]

In [None]:
plt.imshow(x_[0]);

Gerando imagens novas:

In [None]:
train_steps_per_epoch = X.shape[0]//batch_size + 1

increase_by=40
steps_per_epoch = train_steps_per_epoch*increase_by
train_features=[]
train_labels=[]
for i in range(steps_per_epoch):
    print("\rTraining Batches: {}/{}\n".format(i+1,steps_per_epoch), end="")
    x_,y_ = data_generator.__next__()
    train_features.append(x_)
    train_labels.append(y_)
    


train_features = np.concatenate(train_features,axis=0)
train_labels = np.concatenate(train_labels,axis=0)


print('\nCreation of vectors image finished!')

In [None]:
print(f'Quantidade de imagens geradas a serem usadas é: {train_labels.shape[0]}')

In [None]:
X_train, X_test, y_train, y_test = train_test_split(train_features, train_labels, train_size = 0.8, stratify = train_labels, random_state = 42)

# Montando o Modelo

Será usado Transfer Learning usando de base a rede VGG16

In [None]:
model_vgg = VGG16(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
model_vgg.summary()

In [None]:
INIT_LR = 1e-3
EPOCHS = 1000
BS = 32

In [None]:
headmodel = model_vgg.output
headmodel = Flatten(name="flatten")(headmodel)
headmodel = Dense(128, activation='relu')(headmodel)
headmodel = Dropout(0.5)(headmodel)
headmodel = Dense(64, activation='relu')(headmodel)
headmodel = Dropout(0.5)(headmodel)
headmodel = Dense(4, activation='softmax')(headmodel)

model = Model(inputs = model_vgg.input, outputs = headmodel, name='Nadir')

for layer in model_vgg.layers:
  layer.trainable = False

opt = SGD(learning_rate=INIT_LR, decay=INIT_LR/EPOCHS)

model.compile(loss = "categorical_crossentropy", optimizer = opt, metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
callbacks = [
    ReduceLROnPlateau(patience=20, factor=0.5, verbose=True),
    ModelCheckpoint('best.model', save_best_only=True),
    EarlyStopping(patience=40, verbose=True)
]


In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, stratify = y_train, test_size = 0.2)

In [None]:
h = model.fit(
    X_train, y_train,
    batch_size = BS,
    validation_data = (X_val, y_val), 
    epochs = EPOCHS,
    callbacks = callbacks
)

# Avaliação do modelo

In [None]:
df_history = pd.DataFrame(h.history)

fig = plt.figure();
ax = df_history[['val_loss', 'loss']].plot(figsize=(20, 10));
df_history['lr'].plot(ax=ax.twinx(), color='gray');
plt.savefig('loss_val_loss_lr.png')

In [None]:
fig = plt.figure();
ax = df_history[['val_accuracy', 'accuracy']].plot(figsize=(20, 10));
plt.savefig('accuracy_val_accuracy.png')

In [None]:
df_history.tail()

In [None]:
gridImage8(
    model,
    X_test,
    y_test,
    inicio = 10
)

In [None]:
testImage(
    model,
    X_test[0],
    y_test[0]
)

In [None]:
testImage(
    model,
    X_test[1],
    y_test[1]
)

In [None]:
pred = model.predict(X_test)

In [None]:
matrix(y_test, pred)

In [None]:
df_history.to_csv('df_history.csv')

In [None]:
model.save('oficial_nadir_model.h5')