# **Prepara√ß√£o dos Dados**

In [None]:
import os

# Upload do arquivo .tar.gz baixado do link acima
from google.colab import files
print("üìÅ Fa√ßa o upload do arquivo all-mias.tar.gz (baixe de http://peipa.essex.ac.uk/pix/mias/all-mias.tar.gz)")
uploaded = files.upload()

# Listar os arquivos enviados no diret√≥rio atual
print("üìÇ Arquivos dispon√≠veis no /content:")
for file in os.listdir("/content"):
    print("-", file)

üìÅ Fa√ßa o upload do arquivo all-mias.tar.gz (baixe de http://peipa.essex.ac.uk/pix/mias/all-mias.tar.gz)


TypeError: 'NoneType' object is not subscriptable

In [None]:
# =====================================================
#  IMPORTA√á√ïES INICIAIS
# =====================================================
import os
import tarfile
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from IPython.display import display

# =====================================================
#  ETAPA 1: EXTRA√á√ÉO E LEITURA DOS DADOS
# =====================================================
base_path = "./mini-mias"
os.makedirs(base_path, exist_ok=True)

# Renomear o arquivo .tar.gz (caso necess√°rio)
for file in os.listdir("."):
    if file.endswith(".tar.gz"):
        os.rename(file, "all-mias.tar.gz")
        print(f"‚úÖ Arquivo renomeado: {file} ‚Üí all-mias.tar.gz")
        break

# Extrair conte√∫do
print("üì¶ Extraindo conte√∫do...")
with tarfile.open("all-mias.tar.gz", "r:gz") as tar:
    tar.extractall(path=base_path)

# Listar imagens
pgm_files = sorted([f for f in os.listdir(base_path) if f.endswith(".pgm")])
print(f"üìÅ Total de imagens encontradas: {len(pgm_files)}")

# Mostrar amostras
plt.figure(figsize=(12, 6))
for i, file in enumerate(pgm_files[:6]):
    img = cv2.imread(os.path.join(base_path, file), cv2.IMREAD_GRAYSCALE)
    plt.subplot(2, 3, i + 1)
    plt.imshow(img, cmap="gray")
    plt.title(file)
    plt.axis("off")
plt.tight_layout()
plt.suptitle("üñºÔ∏è Amostras da Base Mini-MIAS", fontsize=16)
plt.show()

# Estat√≠sticas das dimens√µes
shapes = [cv2.imread(os.path.join(base_path, f), cv2.IMREAD_GRAYSCALE).shape for f in pgm_files]
heights, widths = zip(*shapes)
print("\nüìä Estat√≠sticas das imagens:")
print(f"- Dimens√µes √∫nicas: {np.unique(shapes, axis=0)}")
print(f"- M√©dia altura: {np.mean(heights):.2f}px | M√©dia largura: {np.mean(widths):.2f}px")

# =====================================================
#  ETAPA 2: LEITURA DO INFO.TXT
# =====================================================
info_path = os.path.join(base_path, "Info.txt")
print("\nüìÑ Lendo Info.txt...")

data = []
with open(info_path, "r") as f:
    for line in f:
        line = line.strip()
        if not line.startswith("mdb"):
            continue

        parts = line.split()
        filename = parts[0]
        tissue = parts[1] if len(parts) > 1 else ""
        abnormality = parts[2] if len(parts) > 2 else ""

        severity = "normal"
        x = y = radius = 0

        if len(parts) == 7:
            severity = parts[3]
            try:
                x, y, radius = map(int, parts[4:7])
            except:
                pass
        elif len(parts) == 3:
            severity = "normal"
        else:
            severity = parts[3]

        data.append([filename, tissue, abnormality, severity, x, y, radius])

df_info = pd.DataFrame(data, columns=["filename", "tissue", "abnormality", "severity", "x", "y", "radius"])
print(df_info.head())
print(f"\nTotal de registros processados: {len(df_info)}")

# Estat√≠sticas das classes
print("\nüìä Distribui√ß√£o das classes (abnormality):")
print(df_info["abnormality"].value_counts())
print("\nüìä Distribui√ß√£o da severidade (severity):")
print(df_info["severity"].value_counts())

# Resumo final
normais = df_info[df_info["abnormality"].str.upper() == "NORM"].shape[0]
anormais = df_info[df_info["abnormality"].str.upper() != "NORM"].shape[0]
total = len(df_info)

resumo = pd.DataFrame({
    "Tipo de imagem": ["Normais", "Com anormalidade", "Total"],
    "Quantidade": [normais, anormais, total]
})
print("\nüìã Resumo:")
display(resumo)

# =====================================================
#  ETAPA 3: PREPARAR LABELS
# =====================================================
df_labels = df_info.copy()
class_map = {"normal": 0, "B": 1, "M": 1}  # bin√°rio: normal vs anormal
df_labels["label"] = df_labels["severity"].map(class_map)
df_labels["filename"] = df_labels["filename"].apply(lambda f: f if f.endswith(".pgm") else f + ".pgm")

print(df_labels[["filename", "severity", "label"]].head())

# **Resnet18**

In [None]:
# =====================================================
#  ETAPA 4: RESNET18 SEM PR√â-PROCESSAMENTO
# =====================================================
!pip install -q tqdm torchvision torchmetrics

import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from tqdm import tqdm
import os, cv2, numpy as np
from sklearn.metrics import classification_report
import torchvision.models as models

# =====================================================
#  DATASET
# =====================================================
class MiniMIASDataset(Dataset):
    def __init__(self, df, base_path):
        self.df = df
        self.base_path = base_path

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(self.base_path, row["filename"])
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        image = cv2.resize(image, (224, 224))

        # Converter para tensor normalizado
        image = np.expand_dims(image, axis=0)  # [1, H, W]
        image = torch.tensor(image, dtype=torch.float32) / 255.0
        image = (image - 0.5) / 0.5  # normaliza√ß√£o [-1, 1]

        label = torch.tensor(row["label"], dtype=torch.long)
        return image, label

# =====================================================
#  DIVIS√ÉO TREINO/TESTE
# =====================================================
df_train = df_labels.sample(frac=0.8, random_state=42)
df_test = df_labels.drop(df_train.index)

dataset_train = MiniMIASDataset(df_train, base_path)
dataset_test = MiniMIASDataset(df_test, base_path)

# =====================================================
#  BALANCEAMENTO
# =====================================================
class_counts = df_train['label'].value_counts().sort_index()
weights_per_class = 1.0 / class_counts
sample_weights = df_train['label'].map(weights_per_class).values

sampler = WeightedRandomSampler(
    weights=sample_weights,
    num_samples=len(sample_weights),
    replacement=True
)

dataloader_train = DataLoader(dataset_train, batch_size=16, sampler=sampler)
dataloader_test = DataLoader(dataset_test, batch_size=16, shuffle=False)

# =====================================================
#  MODELO: RESNET18
# =====================================================
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"\nTreinando na device: {device}")

# Carrega ResNet18 pr√©-treinada no ImageNet
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)

# Ajusta primeira camada para 1 canal
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)

# Ajusta √∫ltima camada para 2 classes
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 2)

model = model.to(device)

# =====================================================
#  TREINAMENTO
# =====================================================
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
num_epochs = 30

for epoch in range(num_epochs):
    print(f"\nEpoch [{epoch+1}/{num_epochs}]")
    model.train()
    running_loss = 0.0

    for data, targets in tqdm(dataloader_train):
        data, targets = data.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Loss m√©dio: {running_loss/len(dataloader_train):.4f}")

print("\nTreinamento conclu√≠do.")

# =====================================================
#  AVALIA√á√ÉO
# =====================================================
model.eval()
correct, total = 0, 0
y_true, y_pred = [], []

with torch.no_grad():
    for images, labels in dataloader_test:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(preds.cpu().numpy())

test_accuracy = correct / total
print(f"\nTest Accuracy: {test_accuracy:.4f}")

print("\nüìä Relat√≥rio de Classifica√ß√£o:")
print(classification_report(y_true, y_pred, target_names=["Normal", "Anormal"]))
