Aja vain Import ja main-solut

# Imports

In [None]:
import os
import time
from subprocess import getoutput
from sklearn.metrics import roc_auc_score, confusion_matrix, cohen_kappa_score, f1_score
from sklearn.utils import class_weight
import tensorflow as tf
import pandas as pd
import numpy as np
import keras
from keras import optimizers
from keras.models import Model, load_model
from keras.layers import Dense, Flatten
from keras.callbacks import (ModelCheckpoint, TensorBoard)
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.densenet import preprocess_input, DenseNet169
import warnings
warnings.filterwarnings('ignore')  # Ignore python warnings

from google.colab import drive
drive.mount('/content/drive', force_remount=True)

getoutput("git clone -l -s https://github.com/jarvint12/ai_project.git ready_dense168")
#https://github.com/bigrewal/Musculoskeletal-Radiographs-abnormality-detection.git
#os.chdir('cloned-repo')
#from mura_model.src.model.dense169 import get_dense169

Mounted at /content/drive




# Dense169 model

In [None]:
def get_dense169(input_shape, learning_rate):
    # create the base pre-trained model
    dense_169_model = DenseNet169(include_top=False, weights='imagenet', input_shape=(input_shape, input_shape, 3))
    x = dense_169_model.output
    model = keras.Sequential([dense_169_model, Flatten(), Dense(1, activation='sigmoid')])

    for layer in dense_169_model.layers:
        layer.trainable = True
    adam = optimizers.Adam(lr=learning_rate)
    model.compile(optimizer=adam,
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    return model

# Averaged probabilities

In [None]:

'''
   This method is used to average the probabilties
   of all the images in a study.
   
   @:param probs. Predictions of the neural network
   @:param image_paths. file paths of all the images predicted
   @:param output file. File to be created which will contain the predictions per study
'''
def average_probabilities(probs, image_paths, output_file):
    averaged_probs = __average(probs, image_paths)
    
    # Create CSV filE 
    df = pd.DataFrame(list(averaged_probs.keys()), columns=['study'])
    df['label'] = list(averaged_probs.values())
    
    df.to_csv(output_file, index=False, header=None)
    print(output_file + " Created!")
    return averaged_probs #Addasin, ei ollut return valuea


def __average(probs, image_paths):
    img_paths = image_paths
    predictions = probs

    averaged_probabilities = {}

    for path in img_paths:
        study_name = '/'.join(path.split('/')[0:-1]) + "/"
        if study_name in averaged_probabilities:
            continue

        indices = [i for i, s in enumerate(img_paths) if study_name in s]
        probs = [predictions[i] for i in indices]

        # Based on the current directory structure, Keras has assigned the value 0 (Zero) to abnormal
        # and 1 (One) to Normal so in order to make sure that the class indices are in the right order
        # we have to subtract the predictions from 1.
        average = 1 - int(np.round(np.mean(probs)))

        averaged_probabilities[study_name] = average

    return averaged_probabilities

# Metrics

In [None]:
def print_all_metrics(y_true, y_pred):
    print("roc_auc_score: ", roc_auc_score(y_true, y_pred))
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()

    print("Sensitivity: ", get_sensitivity(tp, fn))
    print("Specificity: ", get_specificity(tn, fp))
    print("Cohen-Cappa-Score: ", cohen_kappa_score(y_true, y_pred))
    print("F1 Score: ", f1_score(y_true, y_pred))


def get_sensitivity(tp, fn):
    return tp / (tp + fn)


def get_specificity(tn, fp):
    return tn / (tn + fp)

# main-train

**Create the Architectures**

In [None]:
def get_best_model(body_part, model_location):
  best_model=None
  for root, dirs, files in os.walk(model_location, topdown=True):
    for file in files:
      if body_part in file:
        best_model=root+'/'+file #Supposes, that models are organised from worse to best
  if best_model==None:
    raise Exception("No model found for "+body_part+" from directory "+model_location+'.')
  return best_model


def main_train(body_part):
  input_shape = 320
  batch_size = 8
  epochs = 10
  learning_rate = 0.0001

  dense169_mura_single = get_dense169(input_shape, learning_rate)

  train_datagen = ImageDataGenerator(
      rotation_range=30,
      horizontal_flip=True,
      preprocessing_function=preprocess_input,
      validation_split=0)

  #print('/content/drive/MyDrive/MURA-v1.2/temp_'+body_part+'/train_data')
  train_generator = train_datagen.flow_from_directory(
      '/content/drive/MyDrive/MURA-v1.2/temp_'+body_part+'/train_data',
      target_size=(input_shape, input_shape),
      batch_size=batch_size,
      class_mode='binary',
      subset='training')
  training_data_size = len(train_generator.filenames)
  print("Number of Training examples: ", training_data_size)

  valid_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    validation_split=0.7)

  valid_generator = valid_datagen.flow_from_directory(
      '/content/drive/MyDrive/MURA-v1.2/temp_'+body_part+'/valid_data',
      target_size=(input_shape, input_shape),
      batch_size=batch_size,
      class_mode='binary',
      shuffle=False,
      subset="training")
  validation_data_size = len(valid_generator.filenames)

  print("Number of Validation examples: ", validation_data_size)


  test_generator = valid_datagen.flow_from_directory(
      '/content/drive/MyDrive/MURA-v1.2/temp_'+body_part+'/valid_data',
      target_size=(input_shape, input_shape),
      batch_size=batch_size,
      class_mode='binary',
      shuffle=False,
      subset="validation")
  
  weights = class_weight.compute_class_weight('balanced', np.unique(train_generator.classes), train_generator.classes)
  print("Weights: ",weights)

  #val_acc muutettava val_accuracyksi
  filepath="/content/drive/MyDrive/MURA-v1.2/output_separate/dense169-"\
  +body_part+"-{epoch:02d}-{val_accuracy:.2f}.hdf5"  # Change the name based on which model is being trained (epoch number, validation loss)
  
  model_location="/content/drive/MyDrive/MURA-v1.2/output_separate/"
  checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
  tensorboard = TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=False)

  callbacks_list = [checkpoint, tensorboard]

  print(dense169_mura_single.summary())

  dense169_mura_single.fit(train_generator,
                        validation_data=valid_generator,
                        steps_per_epoch=training_data_size // batch_size,
                        class_weight={0:weights[0], 1:weights[1]},
                        callbacks=callbacks_list, 
                        validation_steps=validation_data_size // batch_size,
                        epochs=epochs)
  return get_best_model(body_part, model_location), test_generator

# Submission mura

In [None]:
def submission_mura(body_part, model, test_generator):

  model_mura = load_model(model)
  N=len(test_generator.filenames)

  print("Images to be prediced: ", N)

  print("Predicting ...")
  probs = model_mura.predict_generator(test_generator, steps=N)
  print("Done!")

  print(probs[:10])
  print(test_generator.filenames[:10])

  print(len(probs))

  file_names = test_generator.filenames
  print(file_names[0])
  #file_names = [prefix + file for file in file_names]

  #print(file_names[0])

  #from src.data.postprocessor import average_probabilities

  predictions = average_probabilities(probs, file_names, \
  "/content/drive/MyDrive/MURA-v1.2/output_separate/predictions_"+body_part+".csv")

  len(predictions)
  return predictions



# Evaluate model

**Load trained model**

In [None]:
# ABNORMAL_XR_ELBOW_patient11186_study1_positive_image1.png

def extract_studies(predictions, filenames, body_part, y_true):
  pred = []
  true = []

  for index, fileName in enumerate(filenames):
    if body_part in fileName:
        pred.append(predictions[index])
        true.append(y_true[index])
        

  print("===== "+body_part+" ======")
  print_all_metrics(true,pred)    

In [None]:
def model_evaluation(model, test_generator, predictions, body_part):
  print("In model evaluation.")
  print( "Loading Model" )
  model_mura = load_model(model)
  print("Model Loaded!")

  #TEST_DIR = "/content/drive/MyDrive/MURA-v1.2/valid_data/"
  #input_shape = 320
  #batch_size = 1

  print(test_generator.class_indices)
  print("Predicting "+str(len(test_generator.filenames))+" files.")
  start = time.time()

  predictions = model_mura.predict_generator(test_generator, steps=len(test_generator.filenames)/1) #steps=3197//1

  end = time.time()
  print("It took: ", end - start)

  predictions = predictions.flatten()
  y_true = test_generator.classes
  print("pred, shape",predictions.shape)
  predictions = np.round(predictions)

  prefix="/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/valid_data/"
  file_names = [prefix + file for file in test_generator.filenames]
  print(file_names[0])
  print("lens",len(file_names), len(predictions), len(y_true))
  extract_studies(predictions, file_names, body_part, y_true)

  confusion_matrix(y_true, predictions)

# main

Define create directories and move files functions, as well as delete previous models if necessary

In [None]:
def move_files_temp(body_part):
  for root, dirs, files in os.walk('/content/drive/MyDrive/MURA-v1.2', topdown=True):
    if "temp_"+body_part in root:
      continue
    for file in files:
      if body_part in file:
        if "valid_data" in root:
          if "abnormal" in root:
            os.rename(root+'/'+file, "/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/valid_data/abnormal/"+file)
          elif "normal" in root:
            os.rename(root+'/'+file, "/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/valid_data/normal/"+file)
        elif "train_data" in root:
         # print(root)
          if "abnormal" in root:
            #print(root+'/'+file, "/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/train_data/abnormal/"+file)
            os.rename(root+'/'+file, "/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/train_data/abnormal/"+file)
          elif "normal" in root:
            #print(root+'/'+file, "/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/train_data/normal/"+file)
            os.rename(root+'/'+file, "/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/train_data/normal/"+file)
  print("All "+body_part+" files moved to temporal location.")

def move_files_back(body_part):
  for root, dirs, files in os.walk("/content/drive/MyDrive/MURA-v1.2/temp_"+body_part, topdown=True):
    for file in files:
      if body_part in file:
        if "valid_data" in root:
          if "abnormal" in root:
            #print("abnormal")
            os.rename(root+'/'+file, "/content/drive/MyDrive/MURA-v1.2/valid_data/abnormal/"+file)
          elif "normal" in root:
            #print("normal")
            os.rename(root+'/'+file, "/content/drive/MyDrive/MURA-v1.2/valid_data/normal/"+file)
        elif "train_data" in root:
          #print(root)
          #continue
          if "abnormal" in root:
            #print(root+'/'+file, "/content/drive/MyDrive/MURA-v1.2/train_data/abnormal/"+file)
            os.rename(root+'/'+file, "/content/drive/MyDrive/MURA-v1.2/train_data/abnormal/"+file)
          elif "normal" in root:
            #print(root+'/'+file, "/content/drive/MyDrive/MURA-v1.2/train_data/normal/"+file)
            os.rename(root+'/'+file, "/content/drive/MyDrive/MURA-v1.2/train_data/normal/"+file)
  print("All "+body_part+" files moved back to their original location.")


def create_directories(body_part):
  os.mkdir("/content/drive/MyDrive/MURA-v1.2/temp_"+body_part)
  os.mkdir("/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/train_data")
  os.mkdir("/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/valid_data")
  os.mkdir("/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/train_data/abnormal")
  os.mkdir("/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/train_data/normal")
  os.mkdir("/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/valid_data/abnormal")
  os.mkdir("/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/valid_data/normal")
  print("Temporal directories created for program.")

def remove_directories(body_part):
  os.rmdir("/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/train_data/abnormal")
  os.rmdir("/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/train_data/normal")
  os.rmdir("/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/valid_data/abnormal")
  os.rmdir("/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/valid_data/normal")
  os.rmdir("/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/train_data")
  os.rmdir("/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/valid_data")
  os.rmdir("/content/drive/MyDrive/MURA-v1.2/temp_"+body_part)
  print("All temporal directories removed.")

def delete_old_models(location, body_part):
  for root, dirs, files in os.walk(location, topdown=True):
    for file in files:
      if body_part in file:
        os.remove(root+'/'+file)
        print("Deleted old file, "+root+'/'+file+'.')

Main program

In [None]:


def main():
  #body_part="HUMERUS" 
  body_parts=["SHOULDER", "WRIST"]
  for part in body_parts:
    body_part=part
    delete_old_models("/content/drive/MyDrive/MURA-v1.2/output_separate/", body_part)
    print("Handling body part "+body_part+'.')
    if not os.path.isdir("/content/drive/MyDrive/MURA-v1.2/temp_"+body_part):
      create_directories(body_part)
    move_files_temp(body_part)
    time.sleep(5)
    if not os.path.isdir("/content/drive/MyDrive/MURA-v1.2/output_separate"):
      os.mkdir("/content/drive/MyDrive/MURA-v1.2/output_separate")
    model, test_generator=main_train(body_part)
    print("The best model: "+model)
    predictions=submission_mura(body_part, model, test_generator)
    model_evaluation(model, test_generator, predictions, body_part)
    move_files_back(body_part)
    time.sleep(2)
    remove_directories(body_part)


main()
#Next: directory structure with 14 folders (wrist_normal, wrist_abnormal)
#flow from directory class_mode="categorical"
#Sitten malli toimii kuin normaalistikin

Deleted old file, /content/drive/MyDrive/MURA-v1.2/output_separate//dense169-SHOULDER-01-0.59.hdf5.
Deleted old file, /content/drive/MyDrive/MURA-v1.2/output_separate//dense169-SHOULDER-03-0.60.hdf5.
Deleted old file, /content/drive/MyDrive/MURA-v1.2/output_separate//dense169-SHOULDER-04-0.61.hdf5.
Deleted old file, /content/drive/MyDrive/MURA-v1.2/output_separate//dense169-SHOULDER-05-0.65.hdf5.
Handling body part SHOULDER.
All SHOULDER files moved to temporal location.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5
Found 8379 images belonging to 2 classes.
Number of Training examples:  8379
Found 170 images belonging to 2 classes.
Number of Validation examples:  170
Found 393 images belonging to 2 classes.
Weights:  [1.00515835 0.99489432]
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param 

In [None]:
# body_part="HAND"
# for root, dirs, files in os.walk("/content/drive/MyDrive/MURA-v1.2/temp_"+body_part, topdown=True):
#   for file in files:
#     if body_part in file:
#       if "valid" in root:
#         if "abnormal" in root:
#           os.rename(root+'/'+file, "/content/drive/MyDrive/MURA-v1.2/valid_data/abnormal/"+file)
#         else:
#           os.rename(root+'/'+file, "/content/drive/MyDrive/MURA-v1.2/valid_data/normal/"+file)
#       else:
#         if "abnormal" in root:
#           os.rename(root+'/'+file, "/content/drive/MyDrive/MURA-v1.2/train_data/abnormal/"+file)
#         else:
#           os.rename(root+'/'+file, "/content/drive/MyDrive/MURA-v1.2/train_data/normal/"+file)
# print("All "+body_part+" files moved back to their original location.")


# os.rmdir("/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/train_data/abnormal")
# os.rmdir("/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/train_data/normal")
# os.rmdir("/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/valid_data/abnormal")
# os.rmdir("/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/valid_data/normal")
# os.rmdir("/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/train_data")
# os.rmdir("/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/valid_data")
# os.rmdir("/content/drive/MyDrive/MURA-v1.2/temp_"+body_part)
# print("All temporal directories removed.")

#         # if "valid_data" in root:
#         #   pass
#         # #  if "abnormal" in root:
#         # #    os.rename(root+'/'+file, "/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/valid_data/abnormal/"+file)
#         #  # elif "normal" in root:
#         #  #   os.rename(root+'/'+file, "/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/valid_data/normal/"+file)
#         # elif "train_data" in root:
#         #   #print("ON")
#         #   if "normal" in root:
#         #     print("JOO")
#         #     os.rename(root+'/'+file, "/content/drive/MyDrive/MURA-v1.2/temp_"+"FOREARM"+"/train_data/normal/"+file)
#              # elif "normal" in root:
#           #   os.rename(root+'/'+file, "/content/drive/MyDrive/MURA-v1.2/temp_"+body_part+"/train_data/normal/"+file)