# Calculate F-Measures based on ground truths
This notebook evaluates the teleconnections based on prior works ground truths. It calculates AUC and F1 scores.

In [1]:
import matplotlib.pyplot as plt
%matplotlib widget
import numpy as np
import os
import math
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
import pandas as pd


In [2]:
# Utility Function
def inBoundingBox(bl, tr, p):
    isLongInRange = False
    if (tr[1] < bl[1]) :
        isLongInRange = p[1] >= bl[1] or p[1] <= tr[1]
    else:
        isLongInRange = p[1] >= bl[1] and p[1] <= tr[1]
    return p[0] >= bl[0]  and  p[0] <= tr[0]  and  isLongInRange

In [None]:
#Causality 0.7323946969384352, 29
#Lagged 0.7362063171681619, 16

In [6]:
# Change dataName to the corresponding datafile in Data folder.
maxAUC = 0
maxDay = 0
maxf1ByDay = []
AUCByDay = []
for days in range(7,31):
    dataName = "LaggedCorrelation_d%d_l180_nx288_ny140_R0-100_v8"%days
    propertyName = "pvalue"
    useReverse = True
    gridRegions = [];
    nx, ny = (288+1,70*2+1)
    x = np.linspace(-180+1.25*0.5, 180+1.25*0.50, nx)
    y = np.linspace(-70+0.5, 70+0.5, ny)
    xv, yv = np.meshgrid(x, y,sparse=False, indexing='ij')
    for xi in range(nx-1):
        for yi in range(ny-1):
            gridRegions.append([yv[xi,yi], yv[xi+1,yi+1], xv[xi,yi],xv[xi+1,yi+1]]);


    boxes = {
         "Australia" : [[-50,90],[25,-110]],
         "Asia" : [[0,45],[50,100]],
         "Africa" : [[-40,-20],[20,55]],
         "S. America" : [[-20,-90],[24,-35]],
         "S. America 2" : [[-60,-90],[-21,-40]],
         "N. America" : [[25,-130],[60,-55]],
    }

    ENSORegion = [[-5 , -170] ,[5, -120]];


    dfelnino = pd.read_csv("../Data/elninoPoints_v2.csv")
    dflanina = pd.read_csv("../Data/laninaPoints_v2.csv")

    dfOrig = pd.read_csv("../Data/%s.csv"%dataName)
    if(useReverse):
        dfOrig["sortedProperty"] = -dfOrig[propertyName]
    else:
        dfOrig["sortedProperty"] = dfOrig[propertyName]
    # pvalues = df["causality"].to_numpy()
    pvalues = dfOrig["sortedProperty"].to_numpy();

    insideRegions = dfelnino["inside"].to_numpy()+dflanina["inside"].to_numpy();
    #pvalues = pvalues*0+0.001


    inAllBoxes = np.zeros(len(gridRegions),dtype=np.bool);
    inBoxes = {};
    inRegionCount = 0;
    outRegionCount = 0;
    for key, box in boxes.items():
        inBoxes[key] = np.zeros(len(gridRegions),dtype=np.bool);
        thisInBoxes = inBoxes[key];
        for i,region in enumerate(gridRegions):
            lat,lng = np.mean(region[0:1]),np.mean(region[2:3]);
            if(inBoundingBox(box[0],box[1],[lat,lng]) and not inBoundingBox(ENSORegion[0],ENSORegion[1],[lat,lng])):
                inAllBoxes[i] = True;
                thisInBoxes[i] = True;
                if(insideRegions[i]):
                    inRegionCount += 1;
                else:
                    outRegionCount += 1;

    pvalues[~inAllBoxes] = np.float("nan");

    positivePValues = np.array(pvalues)

    key = "All"
    # for key in list(inBoxes.keys())+["All"]:
    for key in ["All"]:
        recalls = [];
        precisions = [];
        TPRs = []
        FPRs = []
        FMeasures = [];
        F2Measures = [];
        if(key=="All"):
            insideBox = inAllBoxes;
        else:
            insideBox = inBoxes[key];

        selectedPValue = positivePValues[insideBox];
        selectedInsideRegions = insideRegions[insideBox];
        selectedIndices = np.argsort(selectedPValue)
        percentiles = np.linspace(100,0,len(selectedIndices))

        for index in range(len(selectedIndices)):
            confusionMatrix = np.zeros([2,2]);


            inThreshold = np.zeros(len(selectedInsideRegions),dtype=np.bool);
            inThreshold[selectedIndices[index:]] = True;
            inThresholdInRegions = selectedInsideRegions[inThreshold];
            inThresholdNotInRegions = ~selectedInsideRegions[inThreshold];

            notInThresholdInRegions = selectedInsideRegions[~inThreshold];
            notInThresholdNotInRegions = ~selectedInsideRegions[~inThreshold];

            #confusion Matrix
            c11 = np.sum(inThresholdInRegions);
            c01 = np.sum(inThresholdNotInRegions);
            c10 = np.sum(notInThresholdInRegions);
            c00 = np.sum(notInThresholdNotInRegions);

            TP = c11;
            FN = c10;
            FP = c01;
            TN = c00;
            TPRs.append(TP/(TP+FN));
            FPRs.append(FP/(FP+TN));
            precision=TP/(TP+FP);
            recall=TP/(TP+FN);
            FMeasure = 2.0*(precision*recall)/(precision+recall)
            F2Measure = (1+4)*(precision*recall)/(4*precision+recall)
            (precision,recall,FMeasure)
            precisions.append(precision);
            recalls.append(recall);
            FMeasures.append(FMeasure);
            F2Measures.append(F2Measure);
        fig = plt.figure(figsize=(5,2.5))

        xp = selectedPValue[selectedIndices]
        fp = percentiles
    #     for pLine in [0.05,0.01,0.001]:
    #         pPercentile = np.interp(pLine, xp, fp)
    #         plt.axvline(x=pPercentile,ls="--",lw=0.5,color=(0.3,0.3,0.3))
    #         plt.text(pPercentile+0.5, 0.98, "%g"%pLine,rotation=90,fontsize=8,color=(0.3,0.3,0.3))
        plt.plot(percentiles,recalls, label="Recall");
        plt.plot(percentiles,precisions, label="Precision");
        plt.plot(percentiles,FMeasures,label="F1");
    #     plt.plot(percentiles,F2Measures,label="F2");
        plt.title(key)
        plt.xlabel("Percentile")
        plt.legend(loc = 'lower right',fontsize='small',fancybox=False,labelspacing=0.3,frameon=False,borderaxespad=0.1)
        plt.tight_layout()
        fig.savefig("../Figures/%s_v2_test_%s.pdf"%(dataName,key))
        #plt.show()
        plt.close()

        fig = plt.figure(figsize=(2.5,2.5))
        plt.plot(FPRs,TPRs,color = "#991111")
        plt.plot([0,1],[0,1],"--",color="#AAAAAA")
        plt.xlabel("False Positives Ratio")
        plt.ylabel("True Positives Ratio")
        plt.xlim(0,1);
        plt.ylim(0,1);
        AUC = (-np.trapz(TPRs,FPRs))
        plt.title(key+" (AUC=%.2f)"%AUC)
        plt.tight_layout()
        fig.savefig("../Figures/%s_AUC_v2_test_%s.pdf"%(dataName,key))
        #plt.show()
        plt.close()
        if(AUC>maxAUC):
            maxAUC=AUC
            maxDay = days
        AUCByDay.append(AUC)
        maxf1ByDay.append(np.max(FMeasures))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [5]:
fig = plt.figure(figsize=(4.5,3.5))
plt.plot(np.arange(7,31),AUCByDay,label="AUC")
plt.plot(np.arange(7,31),maxf1ByDay,label="Max F-1")
plt.xlabel("Resolution (days)")
plt.ylabel("Classification metric")
# plt.xlim(0,1);
plt.ylim(0.6,0.75);
plt.legend()
plt.title(dataName)
plt.tight_layout()
fig.savefig("../Figures/AUCAndF1_%s.pdf"%(dataName))
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [3]:
# Change dataName to the corresponding datafile in Data folder.
dataName = "Causality_dAll_l180_nx288_ny140_R0-100_p0.00100_v8"
dataName2 = "LaggedCorrelation_dAll_l180_nx288_ny140_R0-100_p0.00100_v8"

propertyName = "counts"
gridRegions = [];
nx, ny = (288+1,70*2+1)
x = np.linspace(-180+1.25*0.5, 180+1.25*0.50, nx)
y = np.linspace(-70+0.5, 70+0.5, ny)
xv, yv = np.meshgrid(x, y,sparse=False, indexing='ij')
for xi in range(nx-1):
    for yi in range(ny-1):
        gridRegions.append([yv[xi,yi], yv[xi+1,yi+1], xv[xi,yi],xv[xi+1,yi+1]]);

        
boxes = {
     "Australia" : [[-50,90],[25,-110]],
     "Asia" : [[0,45],[50,100]],
     "Africa" : [[-40,-20],[20,55]],
     "S. America" : [[-20,-90],[24,-35]],
     "S. America 2" : [[-60,-90],[-21,-40]],
     "N. America" : [[25,-130],[60,-55]],
}

ENSORegion = [[-5 , -170] ,[5, -120]];

  
dfelnino = pd.read_csv("../Data/elninoPoints_v2.csv")
dflanina = pd.read_csv("../Data/laninaPoints_v2.csv")

dfOrig = pd.read_csv("../Data/%s.csv"%dataName)
dfOrig2 = pd.read_csv("../Data/%s.csv"%dataName)

# pvalues = df["causality"].to_numpy()
pvalues = dfOrig[propertyName].to_numpy();
pvalues2 = dfOrig2[propertyName].to_numpy();

pvalues +=pvalues2*0.5

insideRegions = dfelnino["inside"].to_numpy()+dflanina["inside"].to_numpy();
#pvalues = pvalues*0+0.001


inAllBoxes = np.zeros(len(gridRegions),dtype=np.bool);
inBoxes = {};
inRegionCount = 0;
outRegionCount = 0;
for key, box in boxes.items():
    inBoxes[key] = np.zeros(len(gridRegions),dtype=np.bool);
    thisInBoxes = inBoxes[key];
    for i,region in enumerate(gridRegions):
        lat,lng = np.mean(region[0:1]),np.mean(region[2:3]);
        if(inBoundingBox(box[0],box[1],[lat,lng]) and not inBoundingBox(ENSORegion[0],ENSORegion[1],[lat,lng])):
            inAllBoxes[i] = True;
            thisInBoxes[i] = True;
            if(insideRegions[i]):
                inRegionCount += 1;
            else:
                outRegionCount += 1;
                
pvalues[~inAllBoxes] = np.float("nan");

positivePValues = np.array(pvalues)

key = "All"
# for key in list(inBoxes.keys())+["All"]:
for key in ["All"]:
    recalls = [];
    precisions = [];
    TPRs = []
    FPRs = []
    FMeasures = [];
    F2Measures = [];
    if(key=="All"):
        insideBox = inAllBoxes;
    else:
        insideBox = inBoxes[key];
        
    selectedPValue = positivePValues[insideBox];
    selectedInsideRegions = insideRegions[insideBox];
    selectedIndices = np.argsort(selectedPValue)
    percentiles = np.linspace(100,0,len(selectedIndices))
            
    for index in range(len(selectedIndices)):
        confusionMatrix = np.zeros([2,2]);
        
        
        inThreshold = np.zeros(len(selectedInsideRegions),dtype=np.bool);
        inThreshold[selectedIndices[index:]] = True;
        inThresholdInRegions = selectedInsideRegions[inThreshold];
        inThresholdNotInRegions = ~selectedInsideRegions[inThreshold];
        
        notInThresholdInRegions = selectedInsideRegions[~inThreshold];
        notInThresholdNotInRegions = ~selectedInsideRegions[~inThreshold];
        
        #confusion Matrix
        c11 = np.sum(inThresholdInRegions);
        c01 = np.sum(inThresholdNotInRegions);
        c10 = np.sum(notInThresholdInRegions);
        c00 = np.sum(notInThresholdNotInRegions);
        
        TP = c11;
        FN = c10;
        FP = c01;
        TN = c00;
        TPRs.append(TP/(TP+FN));
        FPRs.append(FP/(FP+TN));
        precision=TP/(TP+FP);
        recall=TP/(TP+FN);
        FMeasure = 2.0*(precision*recall)/(precision+recall)
        F2Measure = (1+4)*(precision*recall)/(4*precision+recall)
        (precision,recall,FMeasure)
        precisions.append(precision);
        recalls.append(recall);
        FMeasures.append(FMeasure);
        F2Measures.append(F2Measure);
    fig = plt.figure(figsize=(5,2.5))
    
    xp = selectedPValue[selectedIndices]
    fp = percentiles
#     for pLine in [0.05,0.01,0.001]:
#         pPercentile = np.interp(pLine, xp, fp)
#         plt.axvline(x=pPercentile,ls="--",lw=0.5,color=(0.3,0.3,0.3))
#         plt.text(pPercentile+0.5, 0.98, "%g"%pLine,rotation=90,fontsize=8,color=(0.3,0.3,0.3))
    plt.plot(percentiles,recalls, label="Recall");
    plt.plot(percentiles,precisions, label="Precision");
    plt.plot(percentiles,FMeasures,label="F1");
    plt.plot(percentiles,F2Measures,label="F2");
    plt.title(key)
    plt.xlabel("Percentile")
    plt.legend(loc = 'lower right',fontsize='small',fancybox=False,labelspacing=0.3,frameon=False,borderaxespad=0.1)
    plt.tight_layout()
    fig.savefig("../Figures/%s_v2_test_%s.pdf"%(dataName,key))
    plt.show()
    
    fig = plt.figure(figsize=(2.5,2.5))
    plt.plot(FPRs,TPRs,color = "#991111")
    plt.plot([0,1],[0,1],"--",color="#AAAAAA")
    plt.xlabel("False Positives Ratio")
    plt.ylabel("True Positives Ratio")
    plt.xlim(0,1);
    plt.ylim(0,1);
    AUC = (-np.trapz(TPRs,FPRs))
    plt.title(key+" (AUC=%.2f)"%AUC)
    plt.tight_layout()
    fig.savefig("../Figures/%s_AUC_v2_test_%s.pdf"%(dataName,key))
    plt.show()
    

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …