# Implementation of Quadratic Weighted Kappa

In [1]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
import scipy.misc as smp
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, precision_score, recall_score, classification_report, confusion_matrix
from keras import models, optimizers, Sequential, regularizers, layers
from keras.models import load_model, model_from_json
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.utils import to_categorical
from keras.applications import VGG16, VGG19
from keras.callbacks import EarlyStopping
from keras.layers import Flatten, Dropout, Reshape, Dense, GlobalAveragePooling2D
from keras.layers.convolutional import Conv2D, MaxPooling2D
import itertools

np.random.seed(123)

Using TensorFlow backend.


In [8]:
ls

 [0m[01;34mdata[0m/                            setup_and_baselines.ipynb
 diabetic_retinopathy.h5          vgg16_1.h5
 EDA.ipynb                        vgg19_2.h5
'PyTorch implementation.ipynb'    vgg19_first_pass.ipynb
 quadratic_weighted_kappa.ipynb


In [9]:
directory = 'data/train_images/'

In [10]:
data_all = ImageDataGenerator(rescale=1./255).flow_from_directory( 
        directory, 
        target_size=(224, 224), 
        batch_size = 3662, 
        seed = 123)

Found 3662 images belonging to 5 classes.


In [11]:
images, labels = next(data_all)

In [12]:
X_model, X_test, y_model, y_test = train_test_split(images, labels, test_size=0.20, random_state=123, stratify = labels)

In [13]:
X_train, X_val, y_train, y_val = train_test_split(X_model, y_model, test_size=0.20, random_state=123, stratify = y_model)

In [None]:
def quadratic_weighted_kappa(rater_a, rater_b, min_rating=None, max_rating=None):
    """
    Calculates the quadratic weighted kappa
    quadratic_weighted_kappa calculates the quadratic weighted kappa
    value, which is a measure of inter-rater agreement between two raters
    that provide discrete numeric ratings.  Potential values range from -1
    (representing complete disagreement) to 1 (representing complete
    agreement).  A kappa value of 0 is expected if all agreement is due to
    chance.
    quadratic_weighted_kappa(rater_a, rater_b), where rater_a and rater_b
    each correspond to a list of integer ratings.  These lists must have the
    same length.
    The ratings should be integers, and it is assumed that they contain
    the complete range of possible ratings.
    quadratic_weighted_kappa(X, min_rating, max_rating), where min_rating
    is the minimum possible rating, and max_rating is the maximum possible
    rating
    """
    rater_a = np.array(rater_a, dtype=int)
    rater_b = np.array(rater_b, dtype=int)
    assert(len(rater_a) == len(rater_b))
    if min_rating is None:
        min_rating = min(min(rater_a), min(rater_b))
    if max_rating is None:
        max_rating = max(max(rater_a), max(rater_b))
    conf_mat = confusion_matrix(rater_a, rater_b,
                                min_rating, max_rating)
    num_ratings = len(conf_mat)
    num_scored_items = float(len(rater_a))

    hist_rater_a = histogram(rater_a, min_rating, max_rating)
    hist_rater_b = histogram(rater_b, min_rating, max_rating)

    numerator = 0.0
    denominator = 0.0

    for i in range(num_ratings):
        for j in range(num_ratings):
            expected_count = (hist_rater_a[i] * hist_rater_b[j]
                              / num_scored_items)
            d = pow(i - j, 2.0) / pow(num_ratings - 1, 2.0)
            numerator += d * conf_mat[i][j] / num_scored_items
            denominator += d * expected_count / num_scored_items

    return 1.0 - numerator / denominator

In [None]:
cnn = models.Sequential()
cnn.add(layers.Conv2D(224, (3, 3), activation='relu', input_shape=(224,224,3)))
cnn.add(layers.MaxPooling2D((2, 2)))
cnn.add(layers.Conv2D(112, (3, 3), activation='relu'))
cnn.add(layers.MaxPooling2D((2, 2)))
cnn.add(layers.Flatten())
cnn.add(layers.Dropout(0.5))
cnn.add(layers.Dense(56, activation='relu'))
cnn.add(layers.Dense(5, activation='softmax'))

cnn.compile(loss='categorical_crossentropy',
              optimizer="sgd",
              metrics=['acc'])

In [None]:
# accidentally saved the weights as vgg16_1.h5 instead of vgg19_1.h5 on first model. Continuing with vgg19_2.h5
checkpoint = ModelCheckpoint("simple_qwk_test.h5", monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=1, mode='auto')

