In [152]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
import os 
import pandas as pd 
import numpy as np
from PIL import Image
from transformers import ViTFeatureExtractor, ViTForImageClassification
import requests


In [153]:
TRAIN_DIR = './data/Covid19-dataset/train'
TEST_DIR = './data/Covid19-dataset/test'

def load_data(data_path):
    filepaths = []
    labels = []

    folds = os.listdir(data_path)
    for fold in folds:
        foldpath = os.path.join(data_path, fold)
        filelist = os.listdir(foldpath)
        for file in filelist:
            fpath = os.path.join(foldpath, file)
            filepaths.append(fpath)
            labels.append(fold)

    # Concatenate data paths with labels into one dataframe
    Fseries = pd.Series(filepaths, name= 'filepaths')
    Lseries = pd.Series(labels, name='labels')
    df = pd.concat([Fseries, Lseries], axis= 1)
    return df

MAPPING = {'Normal':0,
           'Viral Pneumonia': 1,
           'Covid':2}

train_data = load_data(TRAIN_DIR)
# val_data = train_data.iloc[-int(len(train_data) * 0.3):-1, :]
test_data = load_data(TEST_DIR)
train_data.iloc[:,1] = train_data.iloc[:,1].apply(lambda x: MAPPING[x])
test_data.iloc[:,1] = test_data.iloc[:,1].apply(lambda x: MAPPING[x])
# data = pd.concat([train_data, test_data])

# data['true_label'] = data['labels'].apply(lambda x: MAPPING[x])
# train_data = train_data[train_data.labels != 'Viral Pneumonia']

# test_data = test_data[test_data.labels != 'Viral Pneumonia']
# train_data.labels = np.where(train_data.labels == 'Normal', 0, train_data.labels == 'Normal')
# train_data.labels = np.where(train_data.labels == 'Covi', 0, 1)
# test_data.labels = np.where(test_data.labels == 'Normal', 0, 1)


In [None]:
class CustomImageDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.data_frame = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        img_name = self.data_frame.iloc[idx, 0]
        image = Image.open(img_name).convert('RGB')
        label = self.data_frame.iloc[idx, 1]
        label = torch.tensor(label, dtype=torch.float32)
        
        if self.transform:
            image = self.transform(image)
        
        return image, label
    

# Define the transformations
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        # transforms.Normalize(mean=0, std=1)
        # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        # transforms.Normalize(mean=0, std=1)
        # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

train_dataset = CustomImageDataset(train_data, transform=data_transforms['train'])
val_dataset = CustomImageDataset(test_data, transform=data_transforms['val'])

train_loader = DataLoader(train_dataset, batch_size=5, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=5, shuffle=False)

dataloaders = {'train': train_loader, 'val': val_loader}
dataset_sizes = {'train': len(train_dataset), 'val': len(val_dataset)}

In [137]:
model = models.densenet121(pretrained=True)

device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
# Modify the model for 3 classes
num_ftrs = model.classifier.in_features
model.classifier = nn.Linear(num_ftrs, 3)

criterion = nn.CrossEntropyLoss()
lr = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr = lr)
num_epochs = 5




In [139]:
# Training loop
for epoch in range(num_epochs):
    for phase in ['train', 'val']:
        if phase == 'train':
            model.train()
        else:
            model.eval()
        
        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in dataloaders[phase]:
            inputs = inputs.to(device)
            # labels = labels.to(device).float().unsqueeze(1)
            labels = labels.to(device).long()
    
            optimizer.zero_grad()
            
            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)
                
                if phase == 'train':
                    loss.backward()
                    optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
        
        epoch_loss = running_loss / dataset_sizes[phase]
        epoch_acc = running_corrects.double() / dataset_sizes[phase]
        
        print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
    
    print()

# # Save the model
torch.save(model.state_dict(), 'covid_classifier.pth')


train Loss: 0.5483 Acc: 0.7769
val Loss: 2.6856 Acc: 0.3030

train Loss: 0.5037 Acc: 0.7968
val Loss: 0.7465 Acc: 0.7879

train Loss: 0.4516 Acc: 0.8406
val Loss: 0.5633 Acc: 0.7424

train Loss: 0.5164 Acc: 0.8127
val Loss: 0.1432 Acc: 0.9848

train Loss: 0.3608 Acc: 0.9004
val Loss: 0.2539 Acc: 0.9242



In [146]:
def load_model(model_path, device):
    model = models.densenet121(pretrained=True)
    num_ftrs = model.classifier.in_features
    model.classifier = nn.Linear(num_ftrs, 3)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model = model.to(device)
    model.eval()
    return model

def predict(model, dataloader, device):
    model.eval()
    all_labels = []
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
         
            all_labels.extend(preds.numpy())
    
    return all_labels

model_path = 'covid_classifier.pth'
model = load_model(model_path, device)
val_dataloader = dataloaders['val']


test_labels = predict(model, val_dataloader, device)



In [147]:
test_data['fc'] = test_labels

In [154]:
test_data[test_data.labels != test_data.fc]

AttributeError: 'DataFrame' object has no attribute 'fc'

In [None]:


# Function to preprocess an image
def preprocess_image(img_path):
    img = Image.open(img_path)
    if img.mode != 'RGB':
        img = img.convert('RGB')
    img_t = preprocess(img)
    # img_t = transforms.ToTensor(img)
    # img_t = torch.Tensor(img)
    batch_t = torch.unsqueeze(img_t, 0)
    return batch_t

# Function to classify an image
def classify_image(img_path):
    img_t = preprocess_image(img_path)
    
    with torch.no_grad():
        outputs = model(img_t)
    
    # probabilities = torch.nn.functional.softmax(outputs[0], dim=0)
    probabilities = torch.nn.functional.softmax(outputs[0], dim=0)
    print(probabilities)
    predicted_class_idx = torch.argmax(probabilities).item()
    probability = probabilities[predicted_class_idx].item()
    
    return predicted_class_idx, probability

# Function to classify a dataset
def classify_dataset(image_paths):
    results = []
    for img_path in image_paths:
        predicted_class, probability = classify_image(img_path)
        results.append({
            'image_path': img_path,
            'predicted_class': predicted_class,
            'probability': probability
        })
    return pd.DataFrame(results)

# Classify the dataset
classified_results = classify_dataset(data.filepaths)
print(classified_results)


In [None]:
classified_results

Unnamed: 0,image_path,predicted_class,probability
0,./data/Covid19-dataset/train/Viral Pneumonia/0...,1,0.598285
1,./data/Covid19-dataset/train/Viral Pneumonia/0...,1,0.521644
2,./data/Covid19-dataset/train/Viral Pneumonia/0...,0,0.658299
3,./data/Covid19-dataset/train/Viral Pneumonia/0...,0,0.622783
4,./data/Covid19-dataset/train/Viral Pneumonia/0...,1,0.512274
...,...,...,...
312,./data/Covid19-dataset/test/Covid/0108.jpeg,0,0.634804
313,./data/Covid19-dataset/test/Covid/COVID-00022.jpg,0,0.586603
314,./data/Covid19-dataset/test/Covid/COVID-00003b...,0,0.688426
315,./data/Covid19-dataset/test/Covid/0120.jpg,0,0.645071


In [None]:
classified_results.predicted_class.value_counts()

predicted_class
0    241
1     76
Name: count, dtype: int64

In [None]:
data

Unnamed: 0,filepaths,labels,true_label
0,./data/Covid19-dataset/train/Viral Pneumonia/0...,Viral Pneumonia,1
1,./data/Covid19-dataset/train/Viral Pneumonia/0...,Viral Pneumonia,1
2,./data/Covid19-dataset/train/Viral Pneumonia/0...,Viral Pneumonia,1
3,./data/Covid19-dataset/train/Viral Pneumonia/0...,Viral Pneumonia,1
4,./data/Covid19-dataset/train/Viral Pneumonia/0...,Viral Pneumonia,1
...,...,...,...
61,./data/Covid19-dataset/test/Covid/0108.jpeg,Covid,1
62,./data/Covid19-dataset/test/Covid/COVID-00022.jpg,Covid,1
63,./data/Covid19-dataset/test/Covid/COVID-00003b...,Covid,1
64,./data/Covid19-dataset/test/Covid/0120.jpg,Covid,1


In [None]:
data.true_label.value_counts()

true_label
1    227
0     90
Name: count, dtype: int64

In [None]:
fin = classified_results.merge(data, left_on='image_path',right_on='filepaths')

In [None]:
fin[fin.true_label != fin.predicted_class]

Unnamed: 0,image_path,predicted_class,probability,filepaths,labels,true_label
0,./data/Covid19-dataset/train/Viral Pneumonia/0...,0,0.531899,./data/Covid19-dataset/train/Viral Pneumonia/0...,Viral Pneumonia,1
2,./data/Covid19-dataset/train/Viral Pneumonia/0...,0,0.500725,./data/Covid19-dataset/train/Viral Pneumonia/0...,Viral Pneumonia,1
3,./data/Covid19-dataset/train/Viral Pneumonia/0...,0,0.618483,./data/Covid19-dataset/train/Viral Pneumonia/0...,Viral Pneumonia,1
4,./data/Covid19-dataset/train/Viral Pneumonia/0...,0,0.518757,./data/Covid19-dataset/train/Viral Pneumonia/0...,Viral Pneumonia,1
6,./data/Covid19-dataset/train/Viral Pneumonia/0...,0,0.591290,./data/Covid19-dataset/train/Viral Pneumonia/0...,Viral Pneumonia,1
...,...,...,...,...,...,...
309,./data/Covid19-dataset/test/Covid/0118.jpeg,0,0.552529,./data/Covid19-dataset/test/Covid/0118.jpeg,Covid,1
311,./data/Covid19-dataset/test/Covid/auntminnie-d...,0,0.572959,./data/Covid19-dataset/test/Covid/auntminnie-d...,Covid,1
312,./data/Covid19-dataset/test/Covid/0108.jpeg,0,0.554062,./data/Covid19-dataset/test/Covid/0108.jpeg,Covid,1
313,./data/Covid19-dataset/test/Covid/COVID-00022.jpg,0,0.582874,./data/Covid19-dataset/test/Covid/COVID-00022.jpg,Covid,1


In [None]:

# Load the pretrained ViT model
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')
model.classifier = torch.nn.Linear(model.classifier.in_features, 2)
model.eval()

# Function to predict image class
def classify_image(img_path):
    img = Image.open(img_path)
    if img.mode != 'RGB':
        img = img.convert('RGB')
    inputs = feature_extractor(images=img, return_tensors="pt")
    
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits
    predicted_class_idx = logits.argmax(-1).item()
    proba = logits.softmax(dim=-1)[0].tolist()
    
    return predicted_class_idx

# Example usage
# img_path = test_df.iloc[0].filepaths
# predicted_class, probabilities = classify_image(img_path)
# print(f'Predicted Class: {predicted_class}')
# print(f'Class Probabilities: {probabilities}')

data['predicted_label'] = data['filepaths'].apply(lambda x: classify_image(x))




In [None]:
data.predicted_label.value_counts()

predicted_label
0    316
1      1
Name: count, dtype: int64

In [None]:
data[data.true_label ==0].predicted_label.value_counts()

predicted_label
2    86
0     4
Name: count, dtype: int64

In [None]:
class CustomImageDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.data_frame = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        img_name = self.data_frame.iloc[idx, 0]
        image = Image.open(img_name).convert('RGB')
        label = self.data_frame.iloc[idx, 1]
        label = torch.tensor(label, dtype=torch.float32)
        
        if self.transform:
            image = self.transform(image)
        
        return image, label
    

# Define the transformations
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

train_dataset = CustomImageDataset(train_data, transform=data_transforms['train'])
val_dataset = CustomImageDataset(val_df, transform=data_transforms['val'])

train_loader = DataLoader(train_dataset, batch_size=5, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=5, shuffle=False)

dataloaders = {'train': train_loader, 'val': val_loader}
dataset_sizes = {'train': len(train_dataset), 'val': len(val_dataset)}

# Load a pre-trained model
model = models.resnet50(pretrained=True)

# Modify the final layer for binary classification
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 1)

# Move the model to GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Define the criterion and optimizer
# criterion = nn.BCEWithLogitsLoss()
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 3
for epoch in range(num_epochs):
    for phase in ['train', 'val']:
        if phase == 'train':
            model.train()
        else:
            model.eval()
        
        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in dataloaders[phase]:
            inputs = inputs.to(device)
            labels = labels.to(device).float().unsqueeze(1)
            
            optimizer.zero_grad()
            
            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                preds = torch.sigmoid(outputs).round()
                
                if phase == 'train':
                    loss.backward()
                    optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
        
        epoch_loss = running_loss / dataset_sizes[phase]
        epoch_acc = running_corrects.double() / dataset_sizes[phase]
        
        print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
    
    print()

# # Save the model
torch.save(model.state_dict(), 'covid_classifier.pth')


train Loss: 0.6714 Acc: 0.7072
val Loss: 0.7528 Acc: 0.6667

train Loss: 0.4695 Acc: 0.6685
val Loss: 0.2020 Acc: 1.0000

train Loss: 0.3336 Acc: 0.6685
val Loss: 5.2334 Acc: 1.0000



In [None]:
def load_model(model_path, device):
    model = models.resnet50(pretrained=False)
    num_ftrs = model.fc.in_features
    model.fc = torch.nn.Linear(num_ftrs, 1)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model = model.to(device)
    model.eval()
    return model

def predict(model, dataloader, device):
    model.eval()
    all_labels = []
    
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device).float().unsqueeze(1)
            outputs = model(inputs)
         
            all_labels.extend(labels.cpu().numpy())
    
    return all_labels

model_path = 'covid_classifier.pth'
model = load_model(model_path, device)

test_set = CustomImageDataset(test_df, transform=data_transforms['val'])
test_loader = DataLoader(test_set, batch_size=5, shuffle=False)

# Get predictions for validation and test datasets
val_labels = predict(model, val_loader, device)
test_labels = predict(model, test_loader, device)



In [None]:
ts1 = test_df.copy()

In [None]:
ts1['fc'] = [x[0] for x in test_labels]

In [None]:
ts1

Unnamed: 0,filepaths,labels,fc
28,./data/Covid19-dataset/test/Normal/0120.jpeg,0,0.0
31,./data/Covid19-dataset/test/Normal/0103.jpeg,0,0.0
47,./data/Covid19-dataset/test/Covid/098.jpeg,1,1.0
50,./data/Covid19-dataset/test/Covid/auntminnie-b...,1,1.0
38,./data/Covid19-dataset/test/Normal/0108.jpeg,0,0.0
33,./data/Covid19-dataset/test/Normal/0114.jpeg,0,0.0
44,./data/Covid19-dataset/test/Covid/0112.jpg,1,1.0
30,./data/Covid19-dataset/test/Normal/0115.jpeg,0,0.0
25,./data/Covid19-dataset/test/Normal/0101.jpeg,0,0.0
21,./data/Covid19-dataset/test/Normal/0111.jpeg,0,0.0
