# **Project: Diagnosis of Pneumonia**

In [None]:
import fastai
from fastai.vision.all import *
import pandas as pd
import seaborn as sns
from pathlib import Path

In [None]:
# Set the Kaggle API configuration directory
import os
os.environ['KAGGLE_CONFIG_DIR'] = '/content'

In [None]:
# Download the competition dataset using the Kaggle API
!kaggle competitions download -c pnevmoniya

In [None]:
# Change directory to the desired location
%cd /content/

# Unzip the downloaded dataset
!unzip pnevmoniya.zip

In [None]:
# Set the path to the training data directory
path = Path("/content/train")

# Define a data block for loading and processing images
image_block = DataBlock(
    blocks=[ImageBlock, CategoryBlock],
    get_items=get_image_files,
    splitter=RandomSplitter(seed=42),
    get_y=parent_label,
    item_tfms=Resize(224)
)

# Create a dataloader from the data block
dls = image_block.dataloaders(path)

In [None]:
# Create a convolutional neural network (CNN) learner with a pre-trained ResNet-152 model
model = cnn_learner(dls, resnet152, metrics=[accuracy, Recall(), Precision()])

# Fine-tune the pre-trained model on the training data
model.fine_tune(5)

In [None]:
# Create an interpretation object for the trained model
interp = ClassificationInterpretation.from_learner(model)

# Plot the confusion matrix based on the validation data
interp.plot_confusion_matrix(figsize=(6, 6))

In [None]:
# Plot the top losses (images with highest prediction errors) along with their labels
interp.plot_top_losses(8, nrows=2, figsize=(16, 8))

In [None]:
# Read the sample solution CSV file
sample_solution = pd.read_csv("sample_solution.csv")

# Iterate over each row in the sample solution CSV file
for i, row in sample_solution.iterrows():
    img_path = f"/content/test/{row['id']}"
    img = PILImage.create(img_path)

    # Use the trained model to predict the label and probability for each test image
    label, pred_id, prob = model.predict(img)

    # Update the 'labels' column in the sample solution DataFrame with the predicted label
    sample_solution.at[i, 'labels'] = label

In [None]:
# Map the label names to binary values (0 for 'NORMAL', 1 for 'PNEUMONIA')
sample_solution['labels'] = sample_solution['labels'].map({"PNEUMONIA": 1, "NORMAL": 0})

# Save the updated sample solution DataFrame to a new CSV file
sample_solution.to_csv('DP_Solutions.csv', index=False)