In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
pip install roboflow



In [4]:
import pandas as pd
import re
import cv2
from roboflow import Roboflow
import os
import glob
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [5]:
dataset = '/content/drive/MyDrive/Inteligência artificial/CBIS-DDSM'

### Calcificações

In [6]:
# patient_id, abnormality type, calc type, pathology, image file path

calc_case_description_train_set = pd.read_csv(dataset + '/csv/calc_case_description_train_set.csv',
                                              header=0,
                                              usecols=["patient_id", "abnormality type", "calc type", "pathology", "image file path"])

calc_case_description_test_set = pd.read_csv(dataset + '/csv/calc_case_description_test_set.csv',
                                             header=0,
                                             usecols=["patient_id", "abnormality type", "calc type", "pathology", "image file path"])


### Massas

In [7]:
# patient_id, abnormality type, mass shape, pathology, image file path

mass_case_description_train_set = pd.read_csv(dataset + '/csv/mass_case_description_train_set.csv',
                                              header=0,
                                              usecols=["patient_id", "abnormality type", "mass shape", "pathology", "image file path"])

mass_case_description_test_set = pd.read_csv(dataset + '/csv/mass_case_description_test_set.csv',
                                                                                           header=0,
                                              usecols=["patient_id", "abnormality type", "mass shape", "pathology", "image file path"])


In [7]:
from PIL import Image

def redimensionar_imagem(input_path, output_path, max_size):
    # Abrir a imagem
    image = Image.open(input_path)

  #  largura e altura da imagem
    width, height = 150, 150

    resized_image = image.resize((width, height), Image.LANCZOS)

    # # Verificar qual é o lado mais longo da imagem
    # maior_lado = max(width, height)

    # # Se o maior lado já for menor ou igual ao tamanho máximo, não é necessário redimensionar
    # if maior_lado <= max_size:
    #     image.save(output_path)
    # else:
    #     # Calcular a proporção para redimensionar a imagem mantendo a mesma proporção
    #     ratio = max_size / maior_lado

    #     # Redimensionar a imagem
    #     new_width = int(width * ratio)
    #     new_height = int(height * ratio)
    #     resized_image = image.resize((new_width, new_height), Image.LANCZOS)

    os.remove(input_path)
    resized_image.save(output_path)

In [8]:
def alterar_nome_da_pasta(nome):
  teste = "Mass-Test_P_00017_LEFT_CC/1.3.6.1.4.1.9590.100.1.2.289610447411344525237308079592285912683/1.3.6.1.4.1.9590.100.1.2.22131189612893294827907969600765582967/000000.dcm"

  padrao = r'.*?/.*?/(?P<string>.*?)/'
  resultado = re.search(padrao, nome)

  string_desejada = resultado.group('string')
  string_desejada = dataset + '/jpeg/' + string_desejada
  # print(string_desejada)

  img = obter_nomes_imagens(string_desejada)

  res = string_desejada + '/' + img[0]

  # # Redimensionar imagem
  # input_path = res
  # output_path = res
  # max_size = 400  # Tamanho máximo desejado
  # redimensionar_imagem(input_path, output_path, max_size)

  return res

def obter_nomes_imagens(diretorio, extensoes=['*.jpg']):
    caminho_completo = os.path.join(diretorio, '*')
    nomes_imagens = []

    for extensao in extensoes:
        caminho_imagens = os.path.join(diretorio, extensao)
        nomes_imagens.extend(glob.glob(caminho_imagens))

    nomes_imagens = [os.path.basename(imagem) for imagem in nomes_imagens]
    return nomes_imagens


In [None]:
print(mass_case_description_train_set.head())
print(mass_case_description_train_set.describe())
print(mass_case_description_train_set['pathology'].value_counts())
print(mass_case_description_train_set['image file path'])

### Alterando o caminho das pastas

In [9]:
calc_case_description_train_set['image file path'] = calc_case_description_train_set['image file path'].apply(alterar_nome_da_pasta)

calc_case_description_test_set['image file path'] = calc_case_description_test_set['image file path'].apply(alterar_nome_da_pasta)

mass_case_description_train_set['image file path'] = mass_case_description_train_set['image file path'].apply(alterar_nome_da_pasta)

mass_case_description_test_set['image file path'] = mass_case_description_test_set['image file path'].apply(alterar_nome_da_pasta)


In [None]:
print(mass_case_description_train_set['image file path'])

### Cria o ROI

In [None]:

def cortar_imagem(nome_pasta):
    rf = Roboflow(api_key="CEfThkqYjDhVzpzg2wJO")
    project = rf.workspace().project("breast-cancer-roi-n4ssp")
    model = project.version(1).model

    nome_pasta = dataset + '/jpeg/' + nome_pasta
    nomes_imagens = obter_nomes_imagens(nome_pasta)

    prediction = model.predict(nome_pasta + '/' + nomes_imagens[0], confidence=50, overlap=50).json()
    prediction_list = prediction["predictions"]
    prediction_dict = prediction_list[0]

    roi_x = int(prediction_dict['x'] - prediction_dict['width'] / 2)
    roi_y = int(prediction_dict['y'] - prediction_dict['height'] / 2)
    roi_width = int(prediction_dict['width'])
    roi_height = int(prediction_dict['height'])

    image = cv2.imread(nome_pasta + '/' + nomes_imagens[0])

    roi = image[roi_y:roi_y+roi_height, roi_x:roi_x+roi_width]

    cv2.imwrite(nome_pasta + "/roi_" + nomes_imagens[0], roi)

# cortar_imagem(nome_pasta)

In [None]:
print(calc_case_description_train_set.head())
print(mass_case_description_train_set.head())

In [10]:
# CNN

# train calc type
train_calc = calc_case_description_train_set.drop('calc type', axis=1)
# train mass shape
train_mass = mass_case_description_train_set.drop('mass shape', axis=1)
# test calc type
test_calc = calc_case_description_test_set.drop('calc type', axis=1)
# train mass shape
test_mass = mass_case_description_test_set.drop('mass shape', axis=1)

train = pd.concat([train_calc, train_mass ], axis=0)
test = pd.concat([test_calc, test_mass ], axis=0)

# Reinicializar os índices do DataFrame resultante
train.reset_index(drop=True, inplace=True)
test.reset_index(drop=True, inplace=True)

# Excluir os valores NaN da coluna 'image file path'
train = train.dropna(subset=['image file path'])
test = test.dropna(subset=['image file path'])

# convertendo para string
train['image file path'] = train['image file path'].astype(str)
test['image file path'] = test['image file path'].astype(str)

In [None]:
print(train['abnormality type'].value_counts())
print(train.shape[0])
print(train['image file path'][0])
train['abnormality type']

In [11]:
# Salvando o DataFrame em um arquivo CSV
train.to_csv('./new_train.csv', index=False)

In [None]:
# img_width, img_height = 2936, 4216
img_width, img_height = 150, 150

class_names = ['calcification', 'mass']

# Tamanho do lote de treinamento
batch_size = 32
# Caminho para o diretório raiz das imagens
nome_pasta_root = dataset + '/jpeg/'

# ImageDataGenerator para pré-processar as imagens
train_datagen = ImageDataGenerator(
    rescale=1.0/255,  # Normaliza os valores dos pixels para o intervalo [0, 1]
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

test_generator = train_datagen.flow_from_dataframe(
    test,
    x_col='image file path',
    y_col='abnormality type',  # Coluna que contém os rótulos das categorias (mass ou calcification)
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary',  # Classificação binária
    classes=['mass', 'calcification'],  # Nomes das classes
    directory=nome_pasta_root
)

train_generator = train_datagen.flow_from_dataframe(
    train,
    x_col='image file path',
    y_col='abnormality type',  # Coluna que contém os rótulos das categorias (mass ou calcification)
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary',  # Classificação binária
    classes=['mass', 'calcification'],  # Nomes das classes
    directory=nome_pasta_root
)

# Montar as camadas do modelo
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(img_width, img_height, 3)),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid') # Camada de saída com ativação sigmóide para classificação binária
])

# Compilar o modelo
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


epochs = 15  # Número de épocas de treinamento

model.fit(
    train_generator,
    validation_data=test_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    epochs=epochs
)


Found 704 validated image filenames belonging to 2 classes.
Found 2864 validated image filenames belonging to 2 classes.
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15


In [18]:
  # salvar model
  model.save("/content/drive/MyDrive/Inteligência artificial/saved_models/model.h5")

  results = model.evaluate(test_generator)



In [19]:
# Accuracy
from sklearn import metrics
saved_model = keras.models.load_model("/content/drive/MyDrive/Inteligência artificial/saved_models/model.h5")

predict = saved_model.predict(test_generator)
y_pred = np.argmax(predict, axis=1)
y_test = test_generator.classes

print("y_test", y_test)
print("y_pred", y_pred)

print("Accuracy", metrics.accuracy_score(y_test, y_pred))


y_test [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,

In [24]:
# matriz de confusão
from sklearn.metrics import confusion_matrix
pd.DataFrame(confusion_matrix(y_test, y_pred),
             index=['mass', 'calcification'], columns=['mass', 'calcification'])


Unnamed: 0,1,0
1,378,0
0,326,0


In [None]:
print(train.head())

print(train['abnormality type'].value_counts())

print(train['image file path'].value_counts())

print(test.head())

print(type(train['abnormality type'].value_counts()))

print(train['abnormality type'].value_counts())

print(test['abnormality type'].value_counts())

print(type(train['image file path']))