# Bussiness Task
The goal of this competition is to classify the blood clot origins in ischemic stroke. Using whole slide digital pathology images, i'll build a model that differentiates between the two major acute ischemic stroke (AIS) etiology subtypes: cardiac and large artery atherosclerosis.


I hope you find this NoteBook helpful and some <span style="color : red;" >**UPVOTES** </span> would be appreciated.


# what is ischemic stroke ?

<iframe width="853" height="480" src="https://www.youtube.com/embed/vbH3yg2Vo1I" title="Acute Ischemic Stroke - Signs and Symptoms (Stroke Syndromes) | Causes & Mechanisms | Treatment" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>

An ischaemic stroke happens when a blockage cuts off the blood supply to part of your brain, killing brain cells. Damage to brain cells can affect how the body works. It can also change how you think and feel.

It’s the most common type of stroke, and around 85% of strokes in the UK are ischaemic strokes. The other 15% of strokes are due to bleeding in or around the brain, known as haemorrhagic stroke. 

A transient ischaemic attack (TIA or mini-stroke) is the same as a stroke but the symptoms only last for a short amount of time. It is a major warning sign of a stroke and should always be taken seriously.



# Import libraries.


In [3]:
# Core
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from pathlib import Path
import seaborn as sns
import glob
import random
import os
import cv2
#tesor fow & keras
import tensorflow as tf
from tensorflow import keras
from keras.regularizers import l2     
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from keras.models import load_model
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten, Dense,BatchNormalization,Dropout,Input
from keras.models import Sequential, Model
from keras.layers import Conv2D,GlobalMaxPooling2D
from tensorflow.keras.applications import  Xception,VGG16,InceptionResNetV2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
#cnn
from tensorflow.keras import datasets, layers, models

from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, concatenate, Conv2DTranspose, BatchNormalization, Dropout, Lambda
from keras.engine.base_layer import Layer
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.utils import class_weight
from sklearn.utils.class_weight import compute_class_weight

In [4]:
seed = 42
np.random.seed =seed

In [5]:
image_path = '../input/mayo-clinic-strip-ai/'

# Helper Function




In [6]:
#check duplicate data 
def check_duplicate(df):
    if df.duplicated().all():
        return  'There are duplicate Data in Data Frame Nedded To be  removed ' 
    else :
        return 'Data Is clean ,No Duplicate Data Found '

# get label Name   
def get_Label(number):
    labels = {0:'Cat', 1:'Dog'}
    return labels[number]


#plot predction function
def plot_predection(model_name):
    plt.figure(figsize=(20,15))
    plt.suptitle("Predection  Images", fontsize=20)
    images = [] 
    path =image_path+'/'+'test1/'
    count = 0  #val_images,val_labels
    for i,files in enumerate(os.listdir(path)) :
        img = plt.imread(path+files)
        img = cv2.resize(img,(128,128))
        plt.imshow(img,cmap=plt.cm.binary)
        img = np.expand_dims(img, axis=0)
        feature = model_name.predict(img)
        predection  = np.argmax(feature, axis=1)
        print(predection)
        plt.subplot(5,7,i+1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.xlabel("Predicted"+get_Label(int(predection)))
        # plt.xlabel("Predicted "+ int(predection))

        # plt.ylabel(get_Label(val_labels[i]))
        count += 1
        if count == 34 :
            break 

def freezing_layers(model_name):
    for layer in model_name.layers:
      layer.trainable = False   
    
def numerical_plotting(df, col, title, symb):
    fig, ax = plt.subplots(2, 1, sharex=True, figsize=(8,5),gridspec_kw={"height_ratios": (.2, .8)})
    ax[0].set_title(title,fontsize=18)
    sns.boxplot(x=col, data=df, ax=ax[0])
    ax[0].set(yticks=[])
    sns.distplot(df[col],kde=True)
    plt.xticks(rotation=45)
    ax[1].set_xlabel(col, fontsize=16)
    plt.axvline(df[col].mean(), color='darkgreen', linewidth=2.2, label='mean=' + str(np.round(df[col].mean(),1)) + symb)
    plt.axvline(df[col].median(), color='red', linewidth=2.2, label='median='+ str(np.round(df[col].median(),1)) + symb)
    plt.axvline(df[col].mode()[0], color='purple', linewidth=2.2, label='mode='+ str(df[col].mode()[0]) + symb)
    plt.legend(bbox_to_anchor=(1, 1.03), ncol=1, fontsize=17, fancybox=True, shadow=True, frameon=True)
    plt.tight_layout()
    plt.show()   

def categorical_plotting(df,col,title):
    fig, ax = plt.subplots(figsize=(10,5))
    ax=sns.countplot(x=col, data=df, palette='flare', order = df[col].value_counts().index)
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
    ax.bar_label(ax.containers[0])
    plt.title(title)
    plt.show()
    
def average_plotting(df,col,output,number,title):
    data_list = df[col].value_counts().index[:number].tolist()
    plt.figure(figsize=(15,5))
    ax=sns.barplot(x=col, y=output, data=df[df[col].isin(data_list)],order=data_list,palette='flare',ci=False,edgecolor="black") 
    plt.xticks(rotation=45);
    ax.bar_label(ax.containers[0])
    plt.title(title)
    plt.show()  
    
def draw_unique_value (df,title):
    plt.figure(figsize=(10,5))
    plt.title(title)
    unique_counts = df.nunique().to_dict()
    ax = sns.barplot(list(unique_counts.keys()), list(unique_counts.values()),palette='flare')
    ax.bar_label(ax.containers[0])
    plt.plot()

# Loading  Preperation 

In [7]:
train_df = pd.read_csv(image_path + 'train.csv')
test_df  = pd.read_csv(image_path + 'test.csv')
other_df = pd.read_csv(image_path + 'other.csv')

# Data Analaysis & Visualization
in this part we will analays and versialize each part of data to be in near step from our goal then pased on deployed models we will sense best factior that affect on our bussiness goal

In [8]:
train_df.head(5)

In [9]:
# store path of images
train_images_paths = []
train_images       =[]
train_path =image_path+'train/'
for i,img in enumerate(os.listdir(train_path)) :
    image_full_path = train_path+img
    train_images_paths.append(image_full_path)
    train_images.append(img)
    # add path to data frame
train_df['path'] = train_images_paths  
train_df['image'] = train_images  
train_df.head(5)

In [10]:
# store path of test images
test_images_paths = []
test_images       =[]
test_path =image_path+'test/'
for i,img in enumerate(os.listdir(test_path)) :
    image_full_path = test_path+img
    test_images_paths.append(image_full_path)
    test_images.append(img)
    # add path to test data frame
test_df['path'] = test_images_paths 
test_df['image'] = test_images  

test_df.head(5)

In [11]:
train_df.info()

In [107]:
train_df.shape

In [108]:
# check null value
train_df.isnull().sum().sort_values(ascending=False)

In [109]:
#check duplicate data 
check_duplicate(train_df)

In [110]:
cols =train_df.columns
sns.heatmap(train_df[cols].isnull(), cmap='viridis')

In [111]:
train_df['image_id'].value_counts()

In [16]:
train_df.info()

In [17]:
#get  numerical column
numCol  = [col for col in train_df.columns if  train_df[col].dtype != "O"]
numCol

In [18]:
#get categoriacl column
CatCol  = [col for col in train_df.columns if  train_df[col].dtype == "O"]
CatCol

In [19]:
 draw_unique_value (train_df,'Unique Value Of Data Frame')

<font size="4" >**Center_id** </font> 
* **Identifies the medical center where the slide was obtained** .

In [20]:
numerical_plotting(train_df,'center_id','Centar id Distribution',' ')

In [21]:
train_df['center_id'].value_counts()

In [22]:
train_df.describe()[['center_id']].T

In [23]:
#what is the most top center_id in data set ?
train_df['center_id'].sort_values(ascending=False).head(5)

**Notes** ⏳

* **max center id is 11 & min is 1.**
* **there are 11 center which collect our data .**

<font size="4" >**Patient_id** </font> 

In [24]:
train_df['patient_id'].value_counts()

In [25]:
categorical_plotting(train_df,'patient_id','total count of patient_id per Image')

In [26]:
average_plotting(train_df,'patient_id','center_id',15,'Average Cental_id per Patient')

In [27]:
#what is total number of images per patient id  ?
train_df.groupby('patient_id').count()[['image_num']].sort_values(by='image_num',ascending=False).head(5).T

In [28]:
train_df[train_df['patient_id'] == '91b9d3']

In [29]:
##what is total  of images per patient id  ?
train_df.groupby('patient_id').sum()[['image_num']].sort_values(by='image_num',ascending=False).head(5).T

<font size="4" >**image_num** </font> 

In [30]:
train_df['image_num'].value_counts()

In [31]:
categorical_plotting(train_df,'image_num','total count of image  per Paient')

 
**Notes** ⏳

* **most of  patient has one image**

In [32]:
#what is max  image  per patient   ?
train_df.groupby(['patient_id']).sum()[['image_num']].sort_values(by ='image_num',ascending=False).head(5).T

<font size="4" >**label** </font>

In [33]:
train_df['label'].value_counts()

In [34]:
categorical_plotting(train_df,'label','total count of image  per label')

* **from above we found that data is not balanced** .
* **72.5 % of data belong tom class CE .**


# Visialize Training Images

In [10]:
from PIL import Image
Image.MAX_IMAGE_PIXELS = None

In [36]:
plt.figure(figsize=(20,15))
plt.suptitle("Training Images", fontsize=20)
path =image_path+'train/'
counter =0
for i,img in enumerate(os.listdir(path))  :
        plt.subplot(3,3,i+1)
        full_image= Image.open(path+img)
#         full_image_1 =full_image.resize((full_image.size[0]//512,full_image.size[1]//512))
        full_image_1 =full_image.resize((512,512))
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(full_image_1, cmap=plt.cm.binary) 
        if i == 8:
            break

# Buliding CNN Model

**i will use Data augmentation,as it  is a set of techniques to artificially increase the amount of data by generating new data points from existing data. This includes making small changes to data or using deep learning models to generate new data points.** 



In [11]:
batch_size = 8
#create image generator for images 
image_gen = ImageDataGenerator(
                                 rescale = 1./255,
                                  shear_range = 0.2,
                                  zoom_range = 0.5,
                                  height_shift_range=0.2,
                                  width_shift_range=0.2,
                                  fill_mode='nearest',
                                   horizontal_flip=True,
                                   rotation_range = 20,
                               validation_split=0.2 
                               )

        
train = image_gen.flow_from_dataframe(
      train_df,
      train_path,
      x_col='image',
      y_col='label',
      target_size=(96,96),
      class_mode='categorical',
      shuffle=True, 
      batch_size=batch_size,
      subset = "training",
      color_mode='rgb',
      )
validate = image_gen.flow_from_dataframe(
      train_df,
      train_path,
      x_col='image',
      y_col='label',
      target_size=(96,96),
      class_mode='categorical',
      shuffle=True, 
      batch_size=batch_size,
      subset = "validation",
      color_mode='rgb'
      )
# make labels is balanced
class_weights = compute_class_weight(
                                        class_weight = "balanced",
                                        classes = np.unique(train.classes),
                                        y = train.classes                                                    
                                    )
class_weights = dict(zip(np.unique(train.classes), class_weights))
class_weights


In [19]:
train.class_indices

In [46]:
# get shape of train data 
for train_img , train_label in train :
    print('image shape ',train_img.shape)
    print('label  shape ',train_label.shape)
    break 

In [13]:
# get shape of validation  data 
for v_img , v_label in validate :
    print('image shape ',v_img.shape)
    print('label  shape ',v_label.shape)
    break 

In [12]:
cnn_model = Sequential()
cnn_model = models.Sequential()
cnn_model.add(layers.Conv2D(64,(3,3),padding ='Same',activation = 'relu',input_shape=(96,96,3)))
cnn_model.add(layers.MaxPooling2D(2,2))
cnn_model.add(layers.Conv2D(64,(3,3) ,padding ='same',activation='relu'))
cnn_model.add(layers.MaxPooling2D(2,2))
cnn_model.add(layers.Conv2D(128,(3,3),padding ='same',activation='relu'))
cnn_model.add(layers.MaxPooling2D(2,2)) 
cnn_model.add(layers.Conv2D(128,(3,3) ,padding ='same',activation='relu'))
cnn_model.add(layers.MaxPooling2D(2,2)) 
cnn_model.add(layers.Conv2D(256,(3,3) ,padding ='same',activation='relu'))
cnn_model.add(layers.MaxPooling2D(2,2)) 
cnn_model.add(BatchNormalization())
cnn_model.add(layers.Conv2D(256,(3,3) ,padding ='same',activation='relu'))
cnn_model.add(layers.MaxPooling2D(2,2)) 
cnn_model.add(BatchNormalization())
cnn_model.summary()
# cnn_model.add(Dropout(0.2))


In [13]:

cnn_model.add(layers.Flatten())
cnn_model.add(layers.Dense(1024, activation='relu'))
cnn_model.add(BatchNormalization())
cnn_model.add(Dropout(0.7))
cnn_model.add(layers.Dense(512, activation='relu'))
cnn_model.add(BatchNormalization())
cnn_model.add(Dropout(0.3))
cnn_model.add(layers.Dense(2, activation ='softmax'))
cnn_model.summary()


In [14]:
cnn_model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'],)

  **1-Defining Callbacks**

*   A callback is an object that can perform actions at various stages of training (e.g. at the start or end of an epoch, before or after a single batch, etc)


**2-Reduce Learning Rate on Plateau**
*   Is used to reduce the learning rate when a metric has stopped improving.



In [15]:
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau
early = EarlyStopping(monitor="loss", mode="min",min_delta = 0,
                          patience = 10,
                          verbose = 1,
                          restore_best_weights = True)
learning_rate_reduction = ReduceLROnPlateau(monitor='loss', patience = 2, verbose=1,factor=0.3, min_lr=0.000001)
callbacks_list = [ early, learning_rate_reduction]

In [None]:
# Training model

n_training_samples = len(train)
n_validation_samples = len(validate)
history = cnn_model.fit(
    train,
    epochs=60,
    validation_data=validate,
    validation_steps=n_validation_samples//batch_size,
    # steps_per_epoch =n_training_samples//batch_size,
    shuffle = True,
    callbacks=callbacks_list,
    class_weight=class_weights
    )

In [None]:
score, acc = cnn_model.evaluate(validate,batch_size=batch_size)                       
print('Test score:', score)
print('Test accuracy:', acc)

In [None]:
cnn_model.save('/content/drive/MyDrive/model_result/catvsdog/cnn_latest_95%.h5')

In [None]:
cnn_model=load_model('/content/drive/MyDrive/model_result/catvsdog/cnn_latest.h5')

In [None]:
cnn_prediction = cnn_model.predict(validate)
cnn_prediction

In [None]:
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')
plt.show()

# prepare data for predection.






In [None]:
#get image path
test_images = os.listdir(image_path+'/'+'test1')

#creat data frame
df_test =pd.DataFrame({
    'image': test_images,
})

df_test.head()

#prepare generator
val_data_gen  = ImageDataGenerator( rescale = 1./255, )  
        
testing = val_data_gen.flow_from_dataframe(
      df_test,
      image_path+'/'+'test1/',
      x_col='image',
      y_col= None,
      target_size=(128,128),
      class_mode=None,
      shuffle=True, 
      batch_size=batch_size
      )


# Predction Using CNN.

In [None]:
predection = cnn_model.predict(testing)
predection

In [None]:
np.argmax(predection,axis=1)[0:100]

In [None]:
plot_predection(cnn_model)

In [11]:
new_image_path = 'kaggle/working/'

In [12]:
# import shutil
# shutil.rmtree(new_image_path+'train')
# shutil.rmtree(new_image_path+'test')


In [13]:
create_train_path_new =os.makedirs(new_image_path+'train')
create_test_path_new =os.makedirs(new_image_path+'test')

In [14]:
train_path_new = new_image_path+'train'
test_path_new  = new_image_path+'test'

In [15]:
import os
os.listdir('kaggle/working')

In [18]:
import os
import cv2 
import PIL
import tifffile
import matplotlib.pyplot as plt



In [26]:
# path = '../input/mayo-clinic-strip-ai/train/026c97_0.tif'

train_images_paths_new = []
train_images_new       = []
train_path =image_path+'train/'
# for i,img in enumerate(os.listdir(train_path)) :
#     image_full_path = train_path+img
#     image_new_name  = image_full_path.split('/')[-1].split('.')[0]
#     imge_new_pathe= train_path_new+'/'+image_new_name+'.jpg'
#     train_images_paths_new.append(imge_new_pathe)

for i,img in enumerate(os.listdir(train_path)) :
    print(i)  
    image_full_path = train_path+img
    image_new_name  = image_full_path.split('/')[-1].split('.')[0]
    imge_new_pathe= train_path_new+'/'+image_new_name+'.jpg'
    train_images_paths_new.append(imge_new_pathe)
    train_images_new.append(image_new_name+'.jpg')
    image = tifffile.imread(image_full_path)
    resized_img = cv2.resize(image,(1024,1024))
    cv2.imwrite(train_path_new+'/'+image_new_name+'.jpg', resized_img)
    del image_full_path
    del image_new_name
    del image
    del resized_img
      
train_df['new_path']  = train_images_paths_new
train_df['new_image_name'] = train_images_new


In [19]:
# path = '../input/mayo-clinic-strip-ai/train/026c97_0.tif'

test_images_paths_new = []
test_images_new       = []
test_path =image_path+'test/'

for i,img in enumerate(os.listdir(test_path)) :
    print(i)  
    image_full_path = test_path+img
    image_new_name  = image_full_path.split('/')[-1].split('.')[0]
    imge_new_pathe= test_path_new+'/'+image_new_name+'.jpg'
    test_images_paths_new.append(imge_new_pathe)
    test_images_new.append(image_new_name+'.jpg')
    image = tifffile.imread(image_full_path)
    resized_img = cv2.resize(image,(1024,1024))
    cv2.imwrite(test_path_new+'/'+image_new_name+'.jpg', resized_img)
    del image_full_path
    del image_new_name
    del image
    del resized_img
      
test_df['new_path']  = test_images_paths_new
test_df['new_image_name'] = test_images_new


In [20]:
import shutil
os.chdir(r'/kaggle/working')
shutil.make_archive('images_1024', 'zip', 'kaggle/working')

In [21]:
    from IPython.display import FileLink
    FileLink(r'./images_1024.zip')


In [129]:
def change_name (x):
    name = x.split('jpg')[0]
    nename= name+'.jpg'
    return nename

In [143]:
train_df['new_image_name_1']=train_df['image'].apply(change_name)

In [144]:
train_df.head(5)

In [46]:
train_df.info()

In [49]:
train_path_new

In [142]:
batch_size = 8
#create image generator for images 
image_gen = ImageDataGenerator(
                                 rescale = 1./255,
                                  shear_range = 0.2,
                                  zoom_range = 0.5,
                                  height_shift_range=0.2,
                                  width_shift_range=0.2,
                                  fill_mode='nearest',
                                   horizontal_flip=True,
                                   rotation_range = 20,
                               validation_split=0.2 
                               )

        
train = image_gen.flow_from_dataframe(
      train_df,
      train_path_new+'/',
      x_col='new_image_name_1',
      y_col='label',
      target_size=(256,256),
      class_mode='categorical',
      shuffle=True, 
      batch_size=batch_size,
      subset = "training",
      color_mode='rgb',
      )
validate = image_gen.flow_from_dataframe(
      train_df,
      train_path_new+'/',
      x_col='new_image_name_1',
      y_col='label',
      target_size=(256,256),
      class_mode='categorical',
      shuffle=True, 
      batch_size=batch_size,
      subset = "validation",
      color_mode='rgb'
      )


# make labels is balanced
class_weights = compute_class_weight(
                                        class_weight = "balanced",
                                        classes = np.unique(train.classes),
                                        y = train.classes                                                    
                                    )
class_weights = dict(zip(np.unique(train.classes), class_weights))
class_weights


In [91]:
cnn_model = Sequential()
cnn_model.add(layers.Conv2D(64,(3,3),padding ='Same',activation = 'relu',input_shape=(256,256,3)))
cnn_model.add(layers.MaxPooling2D(2,2))
cnn_model.add(BatchNormalization())
cnn_model.add(layers.Conv2D(64,(3,3) ,padding ='same',activation='relu'))
cnn_model.add(layers.MaxPooling2D(2,2))
cnn_model.add(BatchNormalization())
cnn_model.add(layers.Conv2D(128,(3,3),padding ='same',activation='relu'))
cnn_model.add(layers.MaxPooling2D(2,2)) 
cnn_model.add(BatchNormalization())
cnn_model.add(layers.Conv2D(128,(3,3) ,padding ='same',activation='relu'))
cnn_model.add(layers.MaxPooling2D(2,2)) 
cnn_model.add(BatchNormalization())
cnn_model.add(layers.Conv2D(256,(3,3) ,padding ='same',activation='relu'))
cnn_model.add(layers.MaxPooling2D(2,2)) 
cnn_model.add(BatchNormalization())
cnn_model.add(layers.Conv2D(512,(3,3) ,padding ='same',activation='relu'))
cnn_model.add(layers.MaxPooling2D(2,2)) 
cnn_model.add(BatchNormalization())
cnn_model.add(layers.Conv2D(512,(3,3) ,padding ='same',activation='relu'))
cnn_model.add(layers.MaxPooling2D(2,2)) 
cnn_model.add(BatchNormalization())
# cnn_model.add(Dropout(0.3))
cnn_model.add(layers.Conv2D(512,(3,3) ,padding ='same',activation='relu'))
cnn_model.add(layers.MaxPooling2D(2,2)) 
cnn_model.add(BatchNormalization())
cnn_model.add(Dropout(0.3))
cnn_model.summary()


In [92]:

# cnn_model.add(layers.Flatten())
# cnn_model.add(layers.Dense(1024, activation='relu'))
# cnn_model.add(BatchNormalization())
# cnn_model.add(Dropout(0.7))
# cnn_model.add(layers.Dense(512, activation='relu'))
# cnn_model.add(BatchNormalization())
# cnn_model.add(Dropout(0.3))
# cnn_model.add(layers.Dense(2, activation ='softmax'))
# cnn_model.summary()


cnn_model.add(layers.Flatten())
cnn_model.add(layers.Dense(1024, activation='relu'))
cnn_model.add(BatchNormalization())
# cnn_model.add(Dropout(0.7))
cnn_model.add(layers.Dense(1024, activation='relu'))
cnn_model.add(BatchNormalization())
# cnn_model.add(Dropout(0.2))
cnn_model.add(layers.Dense(2, activation ='softmax'))
cnn_model.summary()

In [93]:
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau
early = EarlyStopping(monitor="loss", mode="min",min_delta = 0,
                          patience = 10,
                          verbose = 1,
                          restore_best_weights = True)
learning_rate_reduction = ReduceLROnPlateau(monitor='loss', patience = 2, verbose=1,factor=0.3, min_lr=0.000001)
callbacks_list = [ early, learning_rate_reduction]

In [94]:
cnn_model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'],)


In [97]:
# Training model
n_training_samples = len(train)
n_validation_samples = len(validate)
history = cnn_model.fit(
    train,
    epochs=60,
    validation_data=validate,
    validation_steps=n_validation_samples//batch_size,
    # steps_per_epoch =n_training_samples//batch_size,
    shuffle = True,
    callbacks=callbacks_list,
    class_weight=class_weights
    )

In [15]:
import os
import cv2 
import PIL
import tifffile
import matplotlib.pyplot as plt

In [26]:
path = '../input/mayo-clinic-strip-ai/train/026c97_0.tif'
img = tifffile.imread(path)
resized_img = cv2.resize(img,(512,512))
cv2.imwrite(f"kaggle/working/new/test.jpg", resized_img)
print (resized_img.shape)

In [27]:
plt.imshow(resized_img)


# Transfer learning 
i will use  below models  traying to get more accuarcvy


1.   VGG16







Transfer learning is a machine learning technique where a model trained on one task is re-purposed on a second related task.

Transfer learning is an optimization that allows rapid progress or improved performance when modeling the second task.

Transfer learning is related to problems such as multi-task learning and concept drift and is not exclusively an area of study for deep learning.

Nevertheless, transfer learning is popular in deep learning given the enormous resources required to train deep learning models or the large and challenging datasets on which deep learning models are trained.

Transfer learning only works in deep learning if the model features learned from the first task are general.

# VGG16 Model

In [None]:
from IPython.display import Image
Image(filename="/content/drive/MyDrive/datasets/vgg16-1-e1542731207177.png")

VGG16 is a convolutional neural network model proposed by K. Simonyan and A. Zisserman from the University of Oxford in the paper “Very Deep Convolutional Networks for Large-Scale Image Recognition”. The model achieves 92.7% top-5 test accuracy in ImageNet, which is a dataset of over 14 million images belonging to 1000 classes. It was one of the famous model submitted to ILSVRC-2014. It makes the improvement over AlexNet by replacing large kernel-sized filters (11 and 5 in the first and second convolutional layer, respectively) with multiple 3×3 kernel-sized filters one after another. VGG16 was trained for weeks and was using NVIDIA Titan Black GPU’s.

# Handel VGG model 

1. chose VGG model Layer to be addaptive with  our data 
2. load VGG Weights  
3. removing dense layer from model




In [None]:
vgg_base_model = VGG16(input_shape=(128,128,3),weights='imagenet', include_top=False)
vgg_base_model.summary()

In [None]:
#freezing layers
freezing_layers(vgg_base_model)    

In [None]:
vgg_model = Sequential()
vgg_model.add(vgg_base_model)
vgg_model.add(layers.Flatten())
vgg_model.add(layers.Dense(1024, activation='relu'))
vgg_model.add(BatchNormalization())
vgg_model.add(layers.Dense(512, activation='relu'))
vgg_model.add(BatchNormalization())
vgg_model.add(layers.Dense(2, activation ='softmax'))
vgg_model.summary()





In [None]:
vgg_model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'],)

In [None]:
# Training model
n_training_samples = len(train)
n_validation_samples = len(validate)
history = vgg_model.fit(
    train,
    epochs=60,
    validation_data=validate,
    validation_steps=n_validation_samples//batch_size,
    # steps_per_epoch =n_training_samples//batch_size,
    shuffle = True,
    callbacks=callbacks_list
    )

In [None]:
score, acc = vgg_model.evaluate(validate,
                            batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

In [None]:
vgg_model.save('/content/drive/MyDrive/model_result/catvsdog/vgg.h5')

In [None]:
predict = vgg_model.predict(validate)

In [None]:
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')
plt.show()

In [None]:
predict = vgg_model.predict(test)
y_pred = np.argmax(predict, axis=1)
y_test = np.argmax(test_labels, axis=1)
print('f1 Score : ',f1_score(y_test, y_pred, average="macro"))
print('recall  :',   recall_score(y_test,y_pred,average="macro"))
print('precision ',precision_score(y_test,y_pred,average="macro"))


In [None]:
from sklearn.metrics import classification_report,confusion_matrix
import pandas as pd
cm = pd.DataFrame(data=confusion_matrix(y_test, y_pred),index=["Actual Normal", "Actual Pneumonia"],
columns=["Predicted Normal", "Predicted Pneumonia"])
import seaborn as sns
sns.heatmap(cm,annot=True,fmt="d")

In [None]:
print(classification_report(y_test, y_pred))

# Predection using vgg

In [None]:
plot_predection(vgg_model)

# InceptionResNetV2 Model

In [None]:
inseption_base_model = InceptionResNetV2(include_top=False, weights="imagenet", input_shape=(150,150,3), pooling="avg")

In [None]:
#remove any dense layer from model
freezing_layers(inseption_base_model)  

In [None]:
inseption = Sequential()
inseption.add(inseption_base_model)
inseption.add(layers.Flatten())
inseption.add(layers.Dense(2048 ,activation='relu'))
inseption.add(BatchNormalization())
inseption.add(Dropout(0.5))
inseption.add(layers.Dense(2, activation ='sigmoid'))
inseption.summary()

In [None]:
inseption.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
# Training model
n_training_samples = len(train)
n_validation_samples = len(test)
history = inseption.fit(
    train,
    epochs=50,
    validation_data=test,
    validation_steps=n_validation_samples//batch_size,
    # steps_per_epoch =n_training_samples//batch_size,
    shuffle = True,
    callbacks=callbacks_list
    )

In [None]:
predict = inseption.predict(test)

In [None]:
score, acc = inseption.evaluate(test,
                            batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

In [None]:
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')
plt.show()

In [None]:
predict = inseption.predict(test)
y_pred = np.argmax(predict, axis=1)
y_test = np.argmax(test_labels, axis=1)
print('f1 Score : ',f1_score(y_test, y_pred, average="macro"))
print('recall  :',   recall_score(y_test,y_pred,average="macro"))
print('precision ',precision_score(y_test,y_pred,average="macro"))

In [None]:
from sklearn.metrics import classification_report,confusion_matrix
import pandas as pd
cm = pd.DataFrame(data=confusion_matrix(y_test, y_pred),index=["Actual Normal", "Actual Pneumonia"],
columns=["Predicted Normal", "Predicted Pneumonia"])
import seaborn as sns
sns.heatmap(cm,annot=True,fmt="d")

In [None]:
print(classification_report(y_test, y_pred))

# Predection using Inception



In [None]:
plot_predection(inseption) 

# Xception Model

In [None]:
xception_base_model = Xception(include_top=False,weights='imagenet',input_shape = (128,128,3))

In [None]:
#remove any dense layer from model
freezing_layers(xception_base_model)

In [None]:
xception = Sequential()
xception.add(xception_base_model)
xception.add(layers.Flatten())
xception.add(layers.Dense(1024, activation='relu'))
xception.add(BatchNormalization())
xception.add(layers.Dense(512, activation='relu'))
xception.add(BatchNormalization())
xception.add(layers.Dense(2, activation ='softmax'))
xception.summary()


In [None]:
xception.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'],)

In [None]:
# Training model
n_training_samples = len(train)
n_validation_samples = len(validate)
history = xception.fit(
    train,
    epochs=60,
    validation_data=validate,
    validation_steps=n_validation_samples//batch_size,
    # steps_per_epoch =n_training_samples//batch_size,
    shuffle = True,
    callbacks=callbacks_list
    )

In [None]:
score, acc = xception.evaluate(validate,  batch_size=batch_size)
                          
print('Test score:', score)
print('Test accuracy:', acc)

In [None]:
xception.save('/content/drive/MyDrive/model_result/catvsdog/xception.h5')

In [None]:
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')
plt.show()

In [None]:
x_predection = xception.predict(testing)
x_predection[0:35]

In [None]:
np.argmax(x_predection,axis=1)[0:35]

In [None]:
predict = xception.predict(testing)
y_pred = np.argmax(predict, axis=1)
y_test = np.argmax(test_labels, axis=1)
print('f1 Score : ',f1_score(y_test, y_pred, average="macro"))
print('recall  :',   recall_score(y_test,y_pred,average="macro"))
print('precision ',precision_score(y_test,y_pred,average="macro"))

In [None]:
from sklearn.metrics import classification_report,confusion_matrix
import pandas as pd
cm = pd.DataFrame(data=confusion_matrix(y_test, y_pred),index=["Actual Normal", "Actual Pneumonia"],
columns=["Predicted Normal", "Predicted Pneumonia"])
import seaborn as sns
sns.heatmap(cm,annot=True,fmt="d")

In [None]:
print(classification_report(y_test, y_pred))

# Predection using Exception

In [None]:
#plot_predection(xception)

In [None]:
df_test =pd.DataFrame({
    'image': test_images,
})
def plot_predection(model_name):
    plt.figure(figsize=(20,15))
    plt.suptitle("Predection  Images", fontsize=20)
    images = [] 
    path =image_path+'/'+'test1/'
    count = 0  #val_images,val_labels
    for i,files in enumerate(os.listdir(path)) :
        img = plt.imread(path+files)
        img = cv2.resize(img,(128,128))
        plt.imshow(img,cmap=plt.cm.binary)
        img = np.expand_dims(img, axis=0)
        feature = model_name.predict(img)
        predection  = np.argmax(feature, axis=1)
        print(predection)
        plt.subplot(5,7,i+1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.xlabel("Predicted"+get_Label(int(predection)))
        # plt.xlabel("Predicted "+ int(predection))

        # plt.ylabel(get_Label(val_labels[i]))
        count += 1
        if count == 34 :
            break 


In [None]:
plot_predection(xception)

In [None]:
count=0
plt.figure(figsize=(20,15))
for i in range(35):
        fn = os.path.join(image_path+'/'+'test1/', test_images[i])
        img = plt.imread(fn)
        img = cv2.resize(img,(128,128))
        plt.imshow(img,cmap=plt.cm.binary)
        img = np.expand_dims(img, axis=0)
        feature = xception.predict(img)
        predection  = np.argmax(feature, axis=1)
        print(predection)
        plt.subplot(5,7,i+1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.xlabel("Predicted"+get_Label(int(predection)))
        count += 1
        if count == 34 :
            break 

 
        
