<a href="https://colab.research.google.com/github/aaekay/CovidAID/blob/master/CovidAID.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Enable GPU first

In [None]:
!rm -rf ./CovidAID
!git clone --recursive https://github.com/aaekay/CovidAID.git
!wget https://sogppg.bl.files.1drv.com/y4m7xCVzOJm2tZO9UttZgEidLVEYUsYqN4zjak7oGSlj9k75JYCOwQNF-NXmkolYeS0oUJ8STXyq5wKG7SHTBXF_uRXefR4FbA0x8-b-lQVHNVqp5Ts72XLkxvG4R6Z_EonfAX-XYeSRpkSgmOVqXGVCCOB-GQAbnxrpi0TdvZB4--kBMLqtLe7ESNTaruxnOpjt7z466AvplCvYc8UtdO86Q
!mv /content/y4m7xCVzOJm2tZO9UttZgEidLVEYUsYqN4zjak7oGSlj9k75JYCOwQNF-NXmkolYeS0oUJ8STXyq5wKG7SHTBXF_uRXefR4FbA0x8-b-lQVHNVqp5Ts72XLkxvG4R6Z_EonfAX-XYeSRpkSgmOVqXGVCCOB-GQAbnxrpi0TdvZB4--kBMLqtLe7ESNTaruxnOpjt7z466AvplCvYc8UtdO86Q pneumo.zip
!unzip ./pneumo.zip -d ./CovidAID/chest-xray-pneumonia

In [None]:
#for installing the torch version 0.3.0
!wget https://download.pytorch.org/whl/cu75/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl

In [None]:
!pip install /content/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl

In [None]:
# this is for installing requirements
!pip install -r /content/CovidAID/requirements.txt 

In [None]:
!python3 /content/CovidAID/data_tools/prepare_covid_data.py

In [None]:
!python3 /content/CovidAID/data_tools/prepare_data.py --combine_pneumonia

In [None]:
!python3 /content/CovidAID/tools/transfer.py

In [None]:
!python3 ./CovidAID/tools/trainer.py --mode train --freeze --checkpoint ./CovidAID/models/CovidAID_3_class.pth --bs 16 --save ./new_models

### Now preparing for the covid data set

In [None]:
import os
import numpy as np
import pandas as pd
from collections import Counter 


In [None]:
COVID_DATA_PATH='./CovidAID/covid-chestxray-dataset'
METADATA_CSV = os.path.join(COVID_DATA_PATH, 'metadata.csv')
TRAIN_FILE = './CovidAID/data/covid19/train_list.txt'
VAL_FILE = './CovidAID/data/covid19/val_list.txt'
TEST_FILE = './CovidAID/data/covid19/test_list.txt'


In [None]:
# Load patient stats
covids = dict()
df = pd.read_csv(METADATA_CSV)
df = df[(df['finding'] == 'COVID-19') & (df['modality'] == 'X-ray') & (
                (df['view'] == 'PA') | (df['view'] == 'AP') | (df['view'] == 'AP Supine')
            )]
print(df)

In [None]:
patient_ids = Counter(df['patientid'].tolist())
covids = {k: v for k, v in sorted(patient_ids.items(), key=lambda item: item[1])}
total_data = sum([v for k,v in covids.items()])
print ("Patient-#X-Rays statistics:")
print (covids)
print ("Total Images:", total_data, '\n')

In [None]:
# Assign train-val-test split
test_patients = set({4, 15, 86, 59, 6, 82, 80, 78, 76, 65, 36, 32, 50, 18, 115, 152, 138, 70, 116})
val_patients = set({73, 51, 48, 11, 43, 24, 112})


In [None]:
print ('#Train patients:', len(set(covids.keys()).difference(test_patients.union(val_patients))))
print ('#Test patients:', len(test_patients))
print ('#Val patients:', len(val_patients))
print ()
print ('#Train data points:', sum([v for k, v in covids.items() if int(k) not in test_patients.union(val_patients)]))
print ('#Test data points:', sum([v for k, v in covids.items() if int(k) in test_patients]))
print ('#Val data points:', sum([v for k, v in covids.items() if int(k) in val_patients]))

In [None]:
# Construct the split lists
train_list = []
test_list = []
val_list = []

In [None]:
for i, row in df.iterrows():
    patient_id = row['patientid']
    filename = os.path.join(row['folder'], row['filename'])

    if int(patient_id) in test_patients:
        test_list.append(filename)
    elif int(patient_id) in val_patients:
        val_list.append(filename)
    else:
        train_list.append(filename)

print (len(train_list), len(test_list), len(val_list))

In [None]:
# Write image list in file
def make_img_list(data_file, img_file_list):
    with open(data_file, 'w') as f:
        for imgfile in img_file_list:
            try: 
                assert os.path.isfile(os.path.join(COVID_DATA_PATH, imgfile))
                f.write("%s\n" % imgfile)
            except: 
                print ("Image %s NOT FOUND" % imgfile)

In [None]:
make_img_list(TRAIN_FILE, train_list)
make_img_list(VAL_FILE, val_list)
make_img_list(TEST_FILE, test_list)
TRAIN_FILE, train_list

### Prepare combined dataset
