In [1]:
import os
from PIL import Image
from shutil import copyfile
import pandas as pd
# Đường dẫn đến thư mục chứa ảnh
img_dir = 'C:/Users/ADMIN/Desktop/data/'

# Đọc thông tin từ các file metadata
df_train_meta = pd.read_csv('C:/Users/ADMIN/Desktop/data/train_meta.csv')
df_valid_meta = pd.read_csv('C:/Users/ADMIN/Desktop/data/valid_meta.csv')

# Tạo cột 'image_filename' nếu nó chưa tồn tại
df_train_meta['image_filename'] = df_train_meta['patient_id'].astype(str) + '_' + df_train_meta['image_id'].astype(str) + '.png'
df_valid_meta['image_filename'] = df_valid_meta['patient_id'].astype(str) + '_' + df_valid_meta['image_id'].astype(str) + '.png'

# Đường dẫn đến thư mục đích cho ảnh train và valid
train_dir = 'C:/Users/ADMIN/Desktop/data/train_images'
valid_dir = 'C:/Users/ADMIN/Desktop/data/valid_images'

# Tạo thư mục train và valid nếu chúng chưa tồn tại
os.makedirs(train_dir, exist_ok=True)
os.makedirs(valid_dir, exist_ok=True)

# Di chuyển hình ảnh vào thư mục đích
for filename in df_train_meta['image_filename']:
    src_file_path = os.path.join(img_dir, filename)
    dst_file_path = os.path.join(train_dir, filename)
    if os.path.isfile(src_file_path):  # Kiểm tra xem file có tồn tại
        copyfile(src_file_path, dst_file_path)

for filename in df_valid_meta['image_filename']:
    src_file_path = os.path.join(img_dir, filename)
    dst_file_path = os.path.join(valid_dir, filename)
    if os.path.isfile(src_file_path):  # Kiểm tra xem file có tồn tại
        copyfile(src_file_path, dst_file_path)

print(f'Total images in train directory: {len(os.listdir(train_dir))}')
print(f'Total images in valid directory: {len(os.listdir(valid_dir))}')

Total images in train directory: 54706
Total images in valid directory: 463


In [2]:
# Defining the data set
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

class MammographyDataset(Dataset):
    def __init__(self, meta_df, img_dir, transform=None):
        
        self.df = meta_df
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        
        # Get label from meta data
        label = self.df.iloc[idx]['cancer']
        img_filename = self.df.iloc[idx]['image_filename']  # make sure this column exists in your dataframe

        # Load image
        img_path = os.path.join(self.img_dir, img_filename)
        try:
            with Image.open(img_path) as img:
                if self.transform:
                    img = self.transform(img)
        except FileNotFoundError:
            print(f"File not found: {img_path}")
            return None
            
        # Get metadata features
        feature_names = [
            'age', 
            'laterality_L', 'laterality_R', 
            'view_AT', 'view_CC', 'view_MLO',
            'implant_0', 'implant_1'
                        ]
        
        meta_features = self.df[feature_names]
        meta_features = meta_features.iloc[idx, :].to_numpy()

        
        return img, meta_features, label

# Defining the transformations
transform = transforms.Compose([
    transforms.ToTensor(),
])

# Initialize the datasets
train_dataset = MammographyDataset(
    meta_df=df_train_meta,
    img_dir='C:/Users/ADMIN/Desktop/data/train_images',
    transform=transform,
)

valid_dataset = MammographyDataset(
    meta_df=df_valid_meta,
    img_dir='C:/Users/ADMIN/Desktop/data/valid_images',
    transform=transform,
)

# Initialize the DataLoader
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=64, shuffle=False)


In [3]:
import torch
import torch.nn as nn
import torchvision
from torchvision.models import ResNet50_Weights
from torch.utils.data import DataLoader


class MammographyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.rnet = torchvision.models.resnet50(weights=ResNet50_Weights.DEFAULT)
        self.rnet.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        self.rnet.fc = nn.Linear(in_features=2048, out_features=500)
        self.fc1 = nn.Linear(508, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, img, meta_features):
        resnet_out = self.rnet(img)
        resnet_out = torch.sigmoid(resnet_out)

        if meta_features.dim() == 1:
            meta_features = meta_features.unsqueeze(0)  

        x_final = torch.cat((resnet_out, meta_features), dim=1)

        x_final = self.fc1(x_final)
        out = self.sigmoid(x_final)
        return out



model = MammographyModel()
model_path = 'C:/Users/ADMIN/Desktop/saved_models/model_epoch_25.pth'  
model.load_state_dict(torch.load(model_path))
model.eval()



MammographyModel(
  (rnet): ResNet(
    (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
    

In [8]:
import torch
from sklearn.metrics import confusion_matrix

# Khai báo hàm mất mát
loss_function = torch.nn.BCELoss()

# Lưu dự đoán và nhãn thực tế
all_labels = []
all_preds = []

# Đánh giá mô hình
total_loss = 0
correct = 0
total = 0
with torch.no_grad():
    for images, meta_features, labels in valid_loader:
        # Đảm bảo rằng đầu vào cho mô hình có cùng kiểu dữ liệu
        images = images.float()
        meta_features = meta_features.float()
        labels = labels.float()

        outputs = model(images, meta_features)
        loss = loss_function(outputs, labels.unsqueeze(1))
        total_loss += loss.item()

        predicted = outputs.round()
        total += labels.size(0)
        correct += (predicted == labels.unsqueeze(1)).sum().item()

        all_labels.extend(labels.tolist())
        all_preds.extend(predicted.squeeze().tolist())


average_loss = total_loss / len(valid_loader)
accuracy = 100 * correct / total

# Tính toán confusion matrix
tn, fp, fn, tp = confusion_matrix(all_labels, all_preds).ravel()

sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f'Loss on validation set: {average_loss:.2f}')
print(f'Accuracy on validation set: {accuracy:.2f}%')
print(f'Sensitivity on validation set: {sensitivity:.2f}')
print(f'Specificity on validation set: {specificity:.2f}')


Loss on validation set: 0.69
Accuracy on validation set: 58.10%
Sensitivity on validation set: 0.33
Specificity on validation set: 0.84


In [15]:
import numpy as np
def prepare_metadata(age, laterality, view, implant, df_train_meta):
    
    age_normalized = (age - df_train_meta['age'].min()) / (df_train_meta['age'].max() - df_train_meta['age'].min())

    laterality_features = [1.0 if laterality == 'L' else 0.0, 1.0 if laterality == 'R' else 0.0]
    
    view_features = [1.0 if view == 'AT' else 0.0, 1.0 if view == 'CC' else 0.0, 1.0 if view == 'MLO' else 0.0]
    
    implant_features = [1.0 if implant == 0 else 0.0, 1.0 if implant == 1 else 0.0]
    
    meta_features = np.array([age_normalized] + laterality_features + view_features + implant_features, dtype=np.float32)
    
    return torch.tensor(meta_features).unsqueeze(0)  


def predict_cancer(image_path, age, laterality, view, implant, model, df_train_meta):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.Grayscale(num_output_channels=1),  
        transforms.ToTensor(),
    ])
    
  
    img = Image.open(image_path).convert('L')  
    img = transform(img).unsqueeze(0)  

 
    meta_features = prepare_metadata(age, laterality, view, implant, df_train_meta)

    model.eval()  
    with torch.no_grad():
        preds = model(img, meta_features)
        cancer_prob = torch.sigmoid(preds).item() 

    print("Xác suất ung thư: {:.2f}".format(cancer_prob))
    if cancer_prob > 0.5:
        print("Dự đoán: Có dấu hiệu của bệnh ung thư vú.")
    else:
        print("Dự đoán: Không có dấu hiệu của bệnh ung thư vú.")

image_path = 'C:/Users/ADMIN/Desktop/data/valid_images/52181_1884342055.png'
age = 45  
laterality = 'L'  
view = 'MLO'  
implant = 0  

predict_cancer(image_path, age, laterality, view, implant, model, df_train_meta)


Xác suất ung thư: 0.73
Dự đoán: Có dấu hiệu của bệnh ung thư vú.
