# Automatic detection of Pneumonia using deep learning.

 This notebook file is used to preprocess the data and train the VGG16 model and save the trained state so that predictions can be made in the web app.
 Dataset : http://www.kaggle.com/nih-chest-xrays/data

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

### Importing require libraries and modules

In [None]:
from fastai import *
from fastai.vision import *
from fastai.basic_data import *
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from glob import glob
import matplotlib.pyplot as plt

In [None]:
# BATCH SIZE AND IMAGE SIZE

bs = 8
size=224

### Data Preprocessing

In [None]:
# LOADING DATA

all_xray_df = pd.read_csv('../input/Data_Entry_2017.csv')
all_image_paths = {os.path.basename(x): x for x in 
                   glob(os.path.join('..', 'input', 'images*', '*', '*.png'))}
print('Scans found:', len(all_image_paths), ', Total Headers', all_xray_df.shape[0])
all_xray_df['path'] = all_xray_df['Image Index'].map(all_image_paths.get)
all_xray_df['Patient Age'] = all_xray_df['Patient Age'].map(lambda x: int(x))
all_xray_df.head(5)

In [None]:
new_data=all_xray_df[['path','Finding Labels']].copy()

In [None]:
new_data.head(5)

In [None]:
print(new_data['Finding Labels'].nunique())


In [None]:

label_counts = all_xray_df['Finding Labels'].value_counts()[:15]
fig, ax1 = plt.subplots(1,1,figsize = (12, 8))
ax1.bar(np.arange(len(label_counts))+0.5, label_counts)
ax1.set_xticks(np.arange(len(label_counts))+0.5)
_ = ax1.set_xticklabels(label_counts.index, rotation = 90)

In [None]:
# NO FINDINGS REPLACED BY ' ' (BLANK). HENCE IF RESULT IS BLANK X-RAY WILL BE CONSIDERED WITHOUT ANOMALY

new_data['Finding Labels']=new_data['Finding Labels'].map(lambda x: x.replace('No Finding', ''))
new_data.sample(3)

In [None]:
# IMAGE PREPROCESSING

data =(ImageList.from_df(new_data,'../input')
        .split_by_rand_pct(valid_pct=0.2)
        .label_from_df(label_delim='|')
        .transform(get_transforms(flip_vert=False,max_zoom=1.1,max_lighting=0.2),size=224)
        .databunch()
        .normalize(imagenet_stats))

In [None]:
 data.show_batch(rows=3, figsize=(12,10))

In [None]:
len(data.classes)

### Model

In [None]:
arch = models.vgg16_bn

In [None]:
acc_02 = partial(accuracy_thresh, thresh=0.2)
f_score = partial(fbeta, thresh=0.2)
learn = cnn_learner(data, arch, metrics=[acc_02, f_score], model_dir="/tmp/model/")

In [None]:
new_data.head().T

In [None]:
learn.lr_find()

In [None]:
learn.recorder.plot()

In [None]:
# LEARNING RATE

lr = 0.01

In [None]:
# TRAINING

learn.fit_one_cycle(2, slice(lr))

In [None]:
learn.save('stage_1_vgg16')

In [None]:
learn.path = Path('/kaggle/working')

In [None]:
learn.save('weights')

In [None]:
# SAVING THE .PKL FILE

learn.export('model.pkl')
learn.save('weights')

In [None]:
from IPython.display import FileLink
FileLink(r'model.pkl')