In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
pip install roboflow

In [None]:
import pandas as pd
import re
import cv2
from roboflow import Roboflow
import os
import glob
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
dataset = '/content/drive/MyDrive/Inteligência artificial/CBIS-DDSM'

In [None]:
dicom_data = pd.read_csv(dataset + '/csv/dicom_info.csv')

### Calcificações

In [None]:
# patient_id, abnormality type, calc type, pathology, image file path

calc_case_description_train_set = pd.read_csv(dataset + '/csv/calc_case_description_train_set.csv',
                                              header=0,
                                              usecols=["patient_id", "abnormality type", "calc type", "pathology", "image file path"])

calc_case_description_test_set = pd.read_csv(dataset + '/csv/calc_case_description_test_set.csv',
                                             header=0,
                                             usecols=["patient_id", "abnormality type", "calc type", "pathology", "image file path"])


### Massas

In [None]:
# patient_id, abnormality type, mass shape, pathology, image file path

mass_case_description_train_set = pd.read_csv(dataset + '/csv/mass_case_description_test_set.csv',
                                              header=0,
                                              usecols=["patient_id", "abnormality type", "mass shape", "pathology", "image file path"])

mass_case_description_test_set = pd.read_csv(dataset + '/csv/mass_case_description_test_set.csv',
                                                                                           header=0,
                                              usecols=["patient_id", "abnormality type", "mass shape", "pathology", "image file path"])


In [None]:
def alterar_nome_da_pasta(nome):
  teste = "Mass-Test_P_00017_LEFT_CC/1.3.6.1.4.1.9590.100.1.2.289610447411344525237308079592285912683/1.3.6.1.4.1.9590.100.1.2.22131189612893294827907969600765582967/000000.dcm"

  padrao = r'.*?/.*?/(?P<string>.*?)/'
  resultado = re.search(padrao, nome)

  string_desejada = resultado.group('string')
  string_desejada = dataset + '/jpeg/' + string_desejada
  # print(string_desejada)

  return string_desejada

In [None]:
print(mass_case_description_train_set.head())
print(mass_case_description_train_set.describe())
print(mass_case_description_train_set['pathology'].value_counts())
print(mass_case_description_train_set['image file path'])

### Alterando o caminho das pastas

In [None]:
calc_case_description_train_set['image file path'] = mass_case_description_train_set['image file path'].apply(alterar_nome_da_pasta)

calc_case_description_test_set['image file path'] = mass_case_description_train_set['image file path'].apply(alterar_nome_da_pasta)

mass_case_description_train_set['image file path'] = mass_case_description_train_set['image file path'].apply(alterar_nome_da_pasta)

mass_case_description_test_set['image file path'] = mass_case_description_test_set['image file path'].apply(alterar_nome_da_pasta)


In [None]:
print(mass_case_description_train_set['image file path'])

### Cria o ROI

In [None]:
def obter_nomes_imagens(diretorio, extensoes=['*.jpg']):
    caminho_completo = os.path.join(diretorio, '*')
    nomes_imagens = []

    for extensao in extensoes:
        caminho_imagens = os.path.join(diretorio, extensao)
        nomes_imagens.extend(glob.glob(caminho_imagens))

    nomes_imagens = [os.path.basename(imagem) for imagem in nomes_imagens]
    return nomes_imagens

def cortar_imagem(nome_pasta):
    rf = Roboflow(api_key="CEfThkqYjDhVzpzg2wJO")
    project = rf.workspace().project("breast-cancer-roi-n4ssp")
    model = project.version(1).model

    # print(model.predict("/content/1-126.jpg", confidence=50, overlap=50).json())

    nome_pasta = dataset + '/jpeg/' + nome_pasta
    nomes_imagens = obter_nomes_imagens(nome_pasta)

    prediction = model.predict(nome_pasta + '/' + nomes_imagens[0], confidence=50, overlap=50).json()
    prediction_list = prediction["predictions"]
    prediction_dict = prediction_list[0]

    roi_x = int(prediction_dict['x'] - prediction_dict['width'] / 2)
    roi_y = int(prediction_dict['y'] - prediction_dict['height'] / 2)
    roi_width = int(prediction_dict['width'])
    roi_height = int(prediction_dict['height'])

    image = cv2.imread(nome_pasta + '/' + nomes_imagens[0])

    roi = image[roi_y:roi_y+roi_height, roi_x:roi_x+roi_width]
    # print(roi)

    cv2.imwrite(nome_pasta + "/roi_" + nomes_imagens[0], roi)

# falta chamar essa função para todas as pastas de treinamento e teste
# nome_pasta = '1.3.6.1.4.1.9590.100.1.2.126082211045731020508108042042916052'
# cortar_imagem(nome_pasta)

In [None]:
print(calc_case_description_train_set.head())
print(mass_case_description_train_set.head())

In [None]:
# CNN

# calc type
train_calc = calc_case_description_train_set.drop('calc type', axis=1)
# mass shape
train_mass = mass_case_description_train_set.drop('mass shape', axis=1)


# Merge df?
# train = pd.merge(train_calc , train_mass, how = 'outer')
test = pd.merge(calc_case_description_test_set, mass_case_description_test_set, how = 'outer')

# ------------------------------------

train = pd.concat([train_calc, # train = pd.merge(train_calc , train_mass, how = 'outer')
], axis=0)

# Reinicializar os índices do DataFrame resultante
train.reset_index(drop=True, inplace=True)

# ------------------------------------



# train['image file path'].value_counts()
# train.head()
# train['abnormality type'].value_counts()

# Use o método dropna para excluir os valores NaN da coluna 'image file path'
train = train.dropna(subset=['image file path'])

# convertendo para string
train['image file path'] = train['image file path'].astype(str)

# img_width, img_height = 150, 150
img_width, img_height = 2936, 4216
#2.936 x 4.216

class_names = ['calcification', 'mass']

# Tamanho do lote de treinamento
batch_size = 32
# Caminho para o diretório raiz das imagens
nome_pasta_root = dataset + '/jpeg/'

# ImageDataGenerator para pré-processar as imagens
train_datagen = ImageDataGenerator(
    rescale=1.0/255,  # Normaliza os valores dos pixels para o intervalo [0, 1]
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

train_generator = train_datagen.flow_from_dataframe(
    train,
    x_col='image file path',
    y_col='abnormality type',  # Coluna que contém os rótulos das categorias (mass ou calcification)
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary',  # Classificação binária
    classes=['mass', 'calcification'],  # Nomes das classes
    directory=nome_pasta_root
)

# Montar as camadas do modelo
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(img_width, img_height)),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid') # Camada de saída com ativação sigmóide para classificação binária
    # keras.layers.Dense(2, activation='softmax') # 2 categorias (calcification, mass)

])

# Compilar o modelo
# loss='sparse_categorical_crossentropy',
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


epochs = 2  # Número de épocas de treinamento

model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    epochs=epochs
)

In [None]:
print(train.head())

print(train['abnormality type'].value_counts())

print(train['image file path'].value_counts())

print(test.head())

print(type(train['abnormality type'].value_counts()))

print(train['abnormality type'].value_counts())

print(test['abnormality type'].value_counts())

print(type(train['image file path']))