In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.style as style
import numpy as np
import scipy.stats as spstat
import seaborn as sns
import math

def create_classic():
    style.reload_library()
    style.use('classic')
    mpl.rcParams['figure.facecolor'] = (1, 1, 1, 0)
    mpl.rcParams['figure.figsize'] = [6.0, 4.0]
    mpl.rcParams['figure.dpi'] = 100
    
def create_ggplot():
    style.reload_library()
    style.use('classic')
    mpl.rcParams['figure.facecolor'] = (1, 1, 1, 0)
    mpl.rcParams['figure.figsize'] = [6.0, 4.0]
    mpl.rcParams['figure.dpi'] = 100

In [5]:
import pandas as pd
import matplotlib.pyplot as plt 
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import roc_curve, roc_auc_score
import io
import numpy as np
import numpy.ma as ma

def write_labels_scores(labels, scores, file):
    preds = list(zip(labels, scores))
    df = pd.DataFrame(preds, columns=['label', 'score'])
    df.to_pickle(file)

def read_labels_scores(file):
    df = pd.read_pickle(file) 
    labels=df["label"].values
    scores=df["score"].values
    return(labels, scores) 

def write_tpr_fpr(tpr, fpr, file):
    preds = list(zip(tpr, fpr))
    df = pd.DataFrame(preds, columns=['tpr', 'fpr'])
    df.to_pickle(file)

def read_tpr_fpr(file):
    df = pd.read_pickle(file) 
    print(df.columns)
    tpr=df["tpr"].values
    fpr=df["fpr"].values
    return tpr,fpr 

In [2]:
def plot_precision_recall_curve(labels, scores, **kwargs):
    precision, recall, _ = precision_recall_curve(labels, scores)
    fpr, tpr, _ = roc_curve(labels, scores)
    plt.step(recall, precision, color='b', alpha=0.2, where='post')
    plt.fill_between(recall, precision, step='post', alpha=0.2, color='b')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.0])
    plt.xlim([0.0, 1.0])
    plt.show()
    return

def plot_roc_curve(labels, scores, **kwargs):
    fpr, tpr, _ = roc_curve(labels, scores)
    
    label_auc = ''
    if 'auc' in kwargs:
        if kwargs['auc']:
            score = roc_auc_score(labels, scores)
            label_auc = 'AUC:%0.2f' %score
        
    if 'label' in kwargs:
        label= kwargs['label']
        plt.step(fpr, tpr, alpha=0.5, where='post', label= label + label_auc, linewidth=3)
    else:
        plt.step(fpr, tpr, color='b', alpha=0.5, where='post', label = label_auc) 
        
    if 'logx' in kwargs:
        plt.semilogx(fpr, tpr)
   
    print('start threshold')
    if 'fpr_threshold' in kwargs:
        fpr_threshold = kwargs['fpr_threshold']
        delta=fpr_threshold * 0.1
        i =np.argwhere(np.logical_and(fpr < fpr_threshold + delta, fpr > fpr_threshold - delta))
        j = int(np.round(np.mean(i)))
        plt.axvline(fpr_threshold, color='r', label='fpr:%0.2f, throttle rate:%0.2f' % (fpr_threshold,tpr[j]))
    
        
    if 'xlabel' in kwargs:
        plt.xlabel(kwargs['xlabel'])
    else:
        plt.xlabel('fpr')
    if 'ylabel' in kwargs:
        plt.ylabel(kwargs['ylabel'])
    else:
        plt.ylabel('tpr')
    
    if 'xlim' in kwargs:
        plt.xlim(kwargs['xlim'])
    else:
        plt.xlim([0.0, 1.0])
    if 'ylim' in kwargs:
        plt.ylim(kwargs['ylim'])
    else:
        plt.ylim([0.0, 1.0])
    
    if 'title' in kwargs:
        title = kwargs['title']
    else:
        title=""
    plt.title(title) 
    plt.legend(loc="upper left")
    #plt.show()
    return 

def plot_precision_reach_curve(labels, scores, **kwargs):
    users = 7000000 # Assume we have 7M users
    precision, recall, _ = precision_recall_curve(labels, scores)
    ordered_labels = labels[np.argsort(scores)[::-1]]
    n = len(labels)
    
    masks = [ np.concatenate([np.ones(i), np.zeros(n-i)]) for i in range(1, n)]
    tp = [np.sum(tp) for tp in [ma.array(m, mask=1-ordered_labels) for m in masks]]
    fp = [np.sum(fp) for fp in [ma.array(m, mask=ordered_labels) for m in masks]]
    tn = [np.sum(tn) for tn in [ma.array(1-m, mask=ordered_labels) for m in masks]]
    fn = [np.sum(fn) for fn in [ma.array(1-m, mask=1-ordered_labels) for m in masks]]
 
    tp = np.array(tp)
    fp = np.array(fp)
    tn = np.array(tn)
    fn = np.array(fn)
    precision = tp / (tp + fp)
    reach = (tp + fp) / n * users
    
    plt.step(reach, precision, color='b', alpha=0.2, where='post')
    plt.fill_between(reach, precision, step='post', alpha=0.2, color='b')
    plt.xlabel('Reach')
    plt.ylabel('Precision')
    plt.ylim([0.0, 0.5])
    plt.xlim([0.0, users])
    plt.show()
    print(reach[-1],precision[-1])
    print(reach[n* 1//7], precision[n* 1//7])
    return

In [1]:
%run evaluation.ipynb
def plot_tpr_fpr(tpr, fpr, **kwargs):
    label_auc=''
    if 'auc' in kwargs:
        if kwargs['auc']:
            score = get_auc_from_fpr_tpr(fpr, tpr, verbose=False)
            label_auc = 'AUC:%0.2f' %score
    
    label = ''
    if 'label' in kwargs:
        label= kwargs['label']
    
    if 'logx' in kwargs:
        plt.semilogx(fpr, tpr, label= label + label_auc, linewidth=3)
    else:
        plt.step(fpr, tpr, color='b', alpha=0.5, where='post', label= label + label_auc, linewidth=3)
        
    plt.fill_between(fpr, tpr, step='post', alpha=0.2, color='b')
    plt.plot([0.0, 1.0], [0.0, 1.0], linestyle='--', c='gray')
    
    if 'fpr_threshold' in kwargs:
        fpr_threshold = kwargs['fpr_threshold']
        delta=fpr_threshold * 0.1
        i =np.argwhere(np.logical_and(fpr < fpr_threshold + delta, fpr > fpr_threshold - delta))
        j = int(np.round(np.mean(i)))
        plt.axvline(fpr_threshold, color='r', label='fpr:%0.2f, throttle rate:%0.2f' % (fpr_threshold,tpr[j]))
    
    if 'xlabel' in kwargs:
        plt.xlabel(kwargs['xlabel'])
    else:
        plt.xlabel('fpr')
    if 'ylabel' in kwargs:
        plt.ylabel(kwargs['ylabel'])
    else:
        plt.ylabel('tpr')
    
    if 'xlim' in kwargs:
        plt.xlim(kwargs['xlim'])
    else:
        plt.xlim([0.0, 1.0])
    if 'ylim' in kwargs:
        plt.ylim(kwargs['ylim'])
    else:
        plt.ylim([0.0, 1.0])
    
    if 'title' in kwargs:
        title = kwargs['title']
    else:
        title=""
    plt.title(title) 
    plt.legend(loc="upper left")
    return