In [0]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from scklearn.metrics import average_precision_score
from sklearn.metrics import precision_recall_curve
from sklearn.utils.multiclass import unique_labels
import seaborn as sn
import numpy as np
from matplotlib.pyplot import cm
import scikitplot as skplt
import argparse

class Diagnostics(self, actual, predicted, acc=0, loss=0, pixel_values=0, feature_list=0, cross_val=0, auc=0,):
  
  # Mandatory lists for diagnostics
  self.actual=actual; self.predicted=predicted 
  
  # User-added lists for diagnostics
  self.acc=acc; self.loss=loss; self.auc = auc; self.pixel_values=pixel_values; self.feature_lists=feature_list; self.cross_val=cross_val
  

  # Plot axes formatting
  plt.rcParams['axes.linewidth']=3
  plt.rcParams['xtick.major.width'] = 2
  plt.rcParams['ytick.major.width'] = 2
  plt.rcParams['xtick.minor.width'] = 2
  plt.rcParams['ytick.minor.width'] = 2
  plt.rc('xtick.major', size=8, pad=8)
  plt.rc('xtick.minor', size=6, pad=5)
  plt.rc('ytick.major', size=8, pad=8)
  plt.rc('ytick.minor', size=6, pad=5)
  
  
  # Plots confusion matrix. If norm is set, values are between 0-1. Shows figure if show is set
  def plot_cm(figsize = (6, 4), norm=True, show=True):
    """
    Creates a confusion matrix for the predicted and actual labels for your model
    
    Input:
       - figsize: tuple, the figure size of the desired plot
       - norm: boolean, whether or not you want your confusion matrix normalized (between 0-1)
       - show: boolean, whether you want to plt.show() your figure or just save it to your computer 
    """
    cm=confusion_matrix(self.actual, self.predicted)
    plt.figure(figsize=figsize)
    labels = np.unique(self.predicted).tolist()
    if (norm):
      heatmap_value = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
      file_name = "Confusion_Matrix_Norm.jpeg"
    else:
      heatmap_value = cm.astype('float')
      file_name = "Confusion_Matrix.jpeg"
    sn.heatmap(heatmap_value, annot=True, xticklabels=labels, yticklabels=labels, cmap="Blues", annot_kws={"size": 14})
    
    if (norm):
      plt.title("Normalized Confusion Matrix", fontsize=14)
    else:
      plt.title("Confusion Matrix", fontsize=14)
      
    plt.yticks(fontsize=14)
    plt.xticks(fontsize=14)
    plt.ylabel("True Label", fontsize=14)
    plt.xlabel("Predicted Label", fontsize=14)
    
    plt.savefig(file_name)
    if (show): plt.show()
    plt.close()
    np.set_printoptions(precision=2)
    
  
  # Plots metrics by epoch. Plots either "loss" or "accuracy" based on keyword (default is both). Shows figure if show is set
  def plot_metrics_per_epoch(figsize = (6, 4), name_plot=(0,1,2), show=True):
      """
      Plots accuracy, loss, and auc curves per epoch
      
      Input: 
        - figsize: tuple, the size of the metric curve
        - name_plot: tuple, whether you want '0' (loss), '1' (accuracy), and/or '2' (auc) plots
        - show: boolean, whether you want to plt.show() your figure or just save it to your computer 
      """
      num_graphs = len(name_plot)
      fig, axes = plt.subplots(nrows=1, ncols=num_graphs, figsize=figsize)
      format_plot_axes()
      
      for i,ele in enumerate(name_plot):
        if (ele == 0):
          metric_epoch_train = self.loss[0]
          metric_epoch_valid = self.loss[1]
          name_plot = "Loss"
        elif (ele == 1):
          metric_epoch_train = self.acc[0]
          metric_epoch_valid = self.acc[1]
          name_plot = "Accuracy"
        elif (ele == 2):
          metric_epoch_train = self.auc[0]
          metric_epoch_valid = self.auc[1]
          name_plot = "AUC"
        else:
          print("Improper value inputted, ignoring value")
          break
                  
        axes[i].plot(metric_epoch_train, '-', color='seagreen', label='Training')
        axes[i].plot(metric_epoch_valid, '--', color='blue', label='Validation')
        fig.title("Epoch vs " + name_plot, fontsize=26)

      fig.xlabel("Epoch", fontsize=20)
      fig.ylabel(name_plot, fontsize=20)
      fig.xticks(fontsize=16); plt.yticks(fontsize=16)
      fig.legend(loc='best')

      file_name = title.replace(" ", "_") + ".jpeg"
      
      extent = axes[i].get_window_extent().transformed(fig.dpi_scale_trans.inverted())

      fig.savefig(file_name, bbox_inches='tight', transparent=True, bbox_inches=extent)
      
    if (show): fig.show()      
    fig.close()
      
    
    
  def plot_cross_validation(figsize = (6, 4), show=True):
    file_name = "K_fold_Cross_Validation.jpeg"
    plt.figure(figsize=figsize)
    plt.tile("K-fold Cross Validation", fontsize=14)
    plt.yticks(fontsize=14)
    plt.xticks(fontsize=14)
    plt.ylabel("Folds", fontsize=14)
    plt.xlabel("Accuracy", fontsize=14)
    plt.plot(self.loss)
    plt.savefig(file_name)
    if (show): plt.show()
    plt.close()
    
    
  def ROC_plot_sk(figsize = (6, 4), show=True):
    """
    Plots the ROC curve between the predicted and actual labels
    Note: "actual" labels must be a 1D array
    """
    skplt.metrics.plot_roc(self.actual, self.prediction, figsize=figsize)
    if (show): plt.show()
    plt.close()
    
    
  def ROC_plot(figsize = (6, 4), show=True):
    true_positive = numpy.count_nonzeros(self.actual*self.predicted) # people are actually positive that you declare positive
    false_negative = numpy.count_nonzeros(self.predicted*numpy.where(self.actual == 1, 0, 1)) # people are actually negative that you declare positive
    true_positive_rate = true_positive/(true_positive+false_negative) 
    true_negatve = numpy.count_nonzero(self.predicted)-true_positive # people are actually negative that you declare negative
    false_positive = numpy.count_nonzeros(numpy.where(self.predicted == 1, 0, 1) - false_negative # people are actually positive that you declare negative
    false_positive_rate = 1 - (true_negative/(true_negative+false_positive))
                                          
                                          
    file_name = "ROC.jpeg"
    plt.figure(figsize=figsize)
    plt.tile("ROC", fontsize=14)
    plt.yticks(fontsize=14)
    plt.xticks(fontsize=14)
    plt.ylabel("TPR", fontsize=14)
    plt.xlabel("FPR", fontsize=14)
    plt.plot(false_positive_rate, true_positive_rate)
    plt.savefig(file_name)
    if (show): plt.show()
    plt.close()
                                              
    
  def residual_dist_by_feature(figsize = (6,8), target='Target', hex_bin=False, show=True):
   file_name = '{}_errors_by_feature.pdf'.format(target)
   #Calculate residuals as fractional error.
   error=2*(self.predicted-self.actual)/(abs(self.actual)+abs(self.predicted))
   num_features=len(self.feature_list.columns); figure_width, figure_height = figsize
   fig=plt.figure(figsize=(figure_width, figure_height*num_features))
   for i in range(0, num_features):
       ax = fig.add_subplot(num_features, 1, i+1)
       #Plot the errors vs. feature.
       if hex_bin==True:
           ax.hexbin(feature_list[feature_list.columns[i]],error, bins='log')
       else:
           ax.plot(feature_list[feature_list.columns[i]],error, '.', alpha=0.2)
       ax.set_xlabel(feature_list.columns[i], fontsize=14)
       ax.set_ylabel('Fractional Error', fontsize=14)
       plt.rc('xtick',labelsize=14)
       plt.rc('ytick',labelsize=14)
       ax.set_title('Fractional Error as a function of {}'.format(feature_list.columns[i]), fontsize=14)
       extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
       plt.savefig(file_name, bbox_inches=extent)
   if (show): fig.show()
    
  
    
  def one_to_one_plot(target_name='Target', axis_scale='linear', show=True):
    file_name = '{}_One_to_One.pdf'.format(target_name)
    plt.plot(self.actual, self.predicted, '.')
    plt.yticks(fontsize=14)
    plt.xticks(fontsize=14)
    plt.xlabel('True {}'.format(target_name), fontsize=14)
    plt.ylabel('Predicted {}'.format(target_name), fontsize=14)
    plt.title('One to one plot showing predicted vs. true {}'.format(target_name), fontsize=14)
    plt.xscale(axis_scale)
    plt.yscale(axis_scale)
    line_x, line_y = np.arange(min(self.actual),1.1*max(self.actual),(max(self.actual)-min(self.actual))/10), np.arange(min(self.actual),1.1*max(self.actual),(max(self.actual)-min(self.actual))/10)
    plt.plot(line_x,line_y,'r--')
    plt.savefig(file_name)
    if (show): plt.show()
    plt.close()
    
    
  def target_distributions(target='Target', x_scale='linear', y_scale='linear', show=True):
    file_name = '{}_distributions.pdf'.format(target)
    # Assign colors for each group and the names
    colors = ['#E69F00', '#56B4E9']
    names = ['True {}'.format(target), 'Predicted {}'.format(target)]
    plt.hist([self.actual,self.predicted], bins = 50, color=colors, label=names)
    plt.yscale(y_scale)
    plt.xscale(x_scale)
    plt.yticks(fontsize=14)
    plt.xticks(fontsize=14)
    # Plot formatting
    plt.legend()
    plt.xlabel(target, fontsize=14)
    plt.title('{} distributions for True and Predicted'.format(target), fontsize=14)
    plt.savefig(file_name)
    if (show): plt.show()
    plt.close()



  def plot_sample_img(data, labels, figsize, filename="Image_Sample.png", show=True):
    """
    Plots data where each row consists of the same image in different bands

    Input:
      data - an array of shape [batch_size, channels, height, width] OR [batch_size, height, width, channels]
      labels - a 1D array of labels that match to the corresponding label
      figsize - the figure size of the main plot
      filename - saved filename
    """
    import matplotlib.pyplot as plt
    import numpy as np

    plt.figure(figsize=figsize)
    #plt.subplots_adjust(hspace=0.35)

    counter = 1
    num_imgs = len(data)

    # if the image data is in the format [batch_size, channels, height, width]
    if (data.shape)[1] < (data.shape)[3]:
      num_bands = (data.shape)[1]
      for i in range(len(data)):
        for j in range(num_bands):
          plt.subplot(num_imgs, num_bands, counter)  
          plt.imshow(data[i][j], cmap='gray')
          plt.title("Label: "+ str(labels[i]), fontsize=14)
          counter += 1

    # if the image data is in the format [batch_size, height, width, channels]
    else:
      num_bands = (data.shape)[3]
      for i in range(len(data)):
        for j in range(num_bands):
          plt.subplot(num_imgs, num_bands, counter)
          plt.imshow(data[i, :, :, j], cmap='gray')
          plt.title("Label:"+ str(labels[i]), fontsize=14)
          counter +=1

    plt.savefig(filename)
    if (show): plt.show()
                                          
                                          
   def output_average_precision():
    average_precision = average_precision_score(self.actual, self.predicted)
    print('Average precision-recall score: {0:0.2f}'.format(
      average_precision))
                                          
   
   def precision_recall_plot(show=True):
     preicision, recall = precision_recall_curve(self.actual, self.predicted)
     tep_kwargs = ({'step': 'post'}
               if 'step' in signature(plt.fill_between).parameters
               else {})
     plt.step(recall, precision, color='b', alpha=0.2,
         where='post')
     plt.fill_between(recall, precision, alpha=0.2, color='b', **step_kwargs)

     plt.xlabel('Recall')
     plt.ylabel('Precision')
     plt.ylim([0.0, 1.05])
     plt.xlim([0.0, 1.0])
     plt.title('2-class Precision-Recall curve: AP={0:0.2f}'.format(
          average_precision))
     if (show): plt.show(); plt.close()
                                          
   def run_diagnostics(show = True):
                                          



           
   
    
    
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  