<a href="https://colab.research.google.com/github/deiveleal/multi_modal_machine_learning/blob/main/PrimeiroModelo/MultiModalAlgorithm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#### Multimodal Algorithm

Data &#8594; Pre-process &#8594; Separate Training &#8594; Fusion &#8594; Metric

### Hello world (Iris)

### Processa imagem (cria um vetor com a imagem)

In [34]:
import os

import numpy as np
from sklearn import preprocessing
from sklearn.cluster import KMeans
from PIL import Image
import cv2
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler


# Alterar as opções de impressão
# np.set_printoptions(threshold=np.inf)

In [35]:
def normalize_features(features):
    scaler = MinMaxScaler()
    normalized_features = scaler.fit_transform(features)
    return pd.DataFrame(normalized_features, columns=features.columns, index=features.index)


In [2]:
def vetorizar_imagem(caminho_imagem):
    # Ler a imagem
    imagem = cv2.imread(caminho_imagem)

    # Converter a imagem em um vetor unidimensional
    vetor_imagem = imagem.flatten()

    return vetor_imagem

In [3]:
def vetorizar_imagens(diretorio):
    vetor_imagens = {}
    for root, dirs, files in os.walk(diretorio):
        for filename in files:
            if filename.endswith(".jpg") or filename.endswith(".png"):
                caminho_imagem = os.path.join(root, filename)
                imagem = cv2.imread(caminho_imagem, cv2.IMREAD_GRAYSCALE)
                vetor_imagem = imagem.flatten()
                file_name = filename.split('.')[0]
                vetor_imagens[file_name] = np.array(vetor_imagem)
    return vetor_imagens

In [4]:
directory = 'sample/iris/train'
vetor_imagens = vetorizar_imagens(directory)

In [36]:
def prepara_dados_vetor(vetor_imagens):
    # Converter o dicionário em DataFrame
    df_imagens = pd.DataFrame.from_dict(vetor_imagens, orient='index')

    # Converter o DataFrame em um array numpy
    vetor_imagens_np = df_imagens.to_numpy()
    return vetor_imagens_np

def treinar_keras_imagens(vetor_imagens_np):
    # Modelo neural para características de imagem
    vetor_imagens_input = tf.keras.Input(shape=(vetor_imagens_np.shape[1],))
    y = tf.keras.layers.Dense(64, activation="relu")(vetor_imagens_input)
    y = tf.keras.layers.Dense(32, activation="relu")(y)
    vetor_imagens_output = tf.keras.layers.Dense(3, activation="softmax")(y)
    vetor_imagens_model = tf.keras.Model(vetor_imagens_input, vetor_imagens_output)
    return vetor_imagens_model, vetor_imagens_input, vetor_imagens_output

In [37]:
vetor_imagens_np = prepara_dados_vetor(vetor_imagens)
vetor_imagens_model, vetor_imagens_input, vetor_imagens_output = treinar_keras_imagens(vetor_imagens_np=vetor_imagens_np)

In [40]:
df_iris_train = pd.read_csv('sample/iris/iris_train.csv')
df_iris_train.head(3)

Unnamed: 0,nome_arquivo,areas,perimetros,convexidades,classe,cor_r,cor_g,cor_b,geral
0,iris_setosa_1,197.052632,52.762603,0.306671,setosa,12.999487,21.248263,17.33115,[197.05263158 52.76260295 0.30667115 12.99...
1,iris_setosa_10,34.419453,16.64839,0.243514,setosa,88.672619,101.446085,98.016598,[ 34.41945289 16.64838998 0.24351401 88.67...
2,iris_setosa_11,1036.19697,98.229666,0.426195,setosa,41.932528,106.137651,5.576583,[1.03619697e+03 9.82296660e+01 4.26194642e-01 ...


In [41]:
def load_data(df, columns):
    features = df.drop(columns, axis=1)
    features = normalize_features(features)
    features = features.to_numpy()
    return features

In [42]:
features_np = load_data(df_iris_train, ["nome_arquivo", "classe", "geral"])

In [43]:
# Modelo neural para características de imagem
def treina_feature(features_np):
    feature_input = tf.keras.Input(shape=(features_np.shape[1],))
    y = tf.keras.layers.Dense(64, activation="relu")(feature_input)
    y = tf.keras.layers.Dense(32, activation="relu")(y)
    feature_output = tf.keras.layers.Dense(3, activation="softmax")(y)
    feature_model = tf.keras.Model(feature_input, feature_output)
    return feature_model, feature_input, feature_output

In [45]:
features_np = load_data(df_iris_train, ["nome_arquivo", "classe", "geral"])
feature_model, feature_input, feature_output = treina_feature(features_np=features_np)

In [46]:

# Fusão precoce
combined_input = tf.keras.layers.concatenate([vetor_imagens_model.output, feature_model.output])
z = tf.keras.layers.Dense(64, activation="relu")(combined_input)
z = tf.keras.layers.Dense(32, activation="relu")(z)
combined_output = tf.keras.layers.Dense(3, activation="softmax")(z)


In [47]:
print("Combined Input: {}", combined_input)

Combined Input: {} <KerasTensor shape=(None, 6), dtype=float32, sparse=False, name=keras_tensor_28>


In [48]:

# Modelo multimodal
multimodal_model = tf.keras.Model(inputs=[vetor_imagens_model.input, feature_model.input], outputs=combined_output)

In [49]:
# Compilar o modelo
multimodal_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [59]:
df_test_raw = pd.read_csv("sample/iris/iris_test.csv")
vetor_imagens_raw_test = vetorizar_imagens("sample/iris/test")

In [None]:
vetor_imagens_raw_test

In [52]:
vetor_imagens_np_test = prepara_dados_vetor(vetor_imagens_raw_test)
vetor_imagens_model_test, vetor_imagens_input_test, vetor_imagens_output_test = treinar_keras_imagens(vetor_imagens_np=vetor_imagens_np_test)

In [53]:
df_iris_test = pd.read_csv('sample/iris/iris_test.csv')

In [62]:
df_iris_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 84 entries, 0 to 83
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   nome_arquivo  84 non-null     object 
 1   areas         84 non-null     float64
 2   perimetros    84 non-null     float64
 3   convexidades  84 non-null     float64
 4   classe        84 non-null     object 
 5   cor_r         84 non-null     float64
 6   cor_g         84 non-null     float64
 7   cor_b         84 non-null     float64
 8   geral         84 non-null     object 
dtypes: float64(6), object(3)
memory usage: 6.0+ KB


In [54]:
df_iris_test.head(3)

Unnamed: 0,nome_arquivo,areas,perimetros,convexidades,classe,cor_r,cor_g,cor_b,geral
0,iris_setosa_55,171.927885,40.850386,0.242437,setosa,23.383403,86.727547,16.235557,[171.92788462 40.85038598 0.24243655 23.38...
1,iris_setosa_56,116.768817,44.329692,0.279369,setosa,65.959971,113.949199,27.027802,[116.7688172 44.32969207 0.27936911 65.95...
2,iris_setosa_57,1147.882353,76.52518,0.162317,setosa,135.790702,163.820702,110.795965,[1.14788235e+03 7.65251797e+01 1.62317109e-01 ...


In [73]:
features_np_test = load_data(df_iris_test, ["nome_arquivo", "classe", "geral"])
feature_model_test, feature_input_test, feature_output_test = treina_feature(features_np=features_np_test)

In [75]:
previsoes = multimodal_model.predict([vetor_imagens_np_test, features_np_test])

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step


In [76]:
classes_previstas = np.argmax(previsoes, axis=1)

In [77]:
classes_previstas

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])