# Draw regions found by the metrics in the map
This notebook is used to obtain the map drawings.

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.cm as cm
import numpy as np
import geopandas as gpd
from scipy.sparse import coo_matrix,csr_matrix,save_npz,load_npz
%matplotlib widget
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('retina')
import os
import math
import numpy as np
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from matplotlib.patches import Rectangle
import matplotlib.colors as mpc
import matplotlib.ticker as ticker
from collections import Counter
import matplotlib.patches as mpatches
import matplotlib as mpl

In [None]:
%load_ext Cython

In [None]:
%%cython

cimport numpy as np
import numpy as np
cimport cython
ctypedef np.float64_t DTYPE_t
from math import radians, cos, sin, asin, sqrt

def haversine(lng1, lat1, lng2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lng1, lat1, lng2, lat2 = map(radians, [lng1, lat1, lng2, lat2])

    # haversine formula 
    dlng = abs(lng2 - lng1)
    if dlng > np.pi: #warp around longitude
        dlng = 2*np.pi-dlng
    dlat = abs(lat2 - lat1)
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlng/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    return c * r

def manhatton(lng1, lat1, lng2, lat2):
    """
    Calculate the Manhattan distance between two points 
    on the cylinder (all gird distance are transformed into equator scale)
    """
    # convert decimal degrees to radians 
    lng1, lat1, lng2, lat2 = map(radians, [lng1, lat1, lng2, lat2])

    # haversine formula 
    dlng = abs(lng2 - lng1)
    if dlng > np.pi: #warp around longitude
        dlng = 2*np.pi-dlng
    dlat = abs(lat2 - lat1)
    c = dlng + dlat
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    return c * r

def geoKernel_cython(float[:,::1] data, float threshold):
    
    n = len(data)
    X = np.empty((n,n), dtype=float)
    for i in range(n):
        for j in range(i, n):
            # call the Haversine formula
            dist = manhatton(data[i][0],data[i][1],data[j][0],data[j][1])
            if (dist <= threshold)&(dist>0.1):
                X[i][j] = dist
            else:
                X[i][j] = 0
    return X

In [None]:
# Loads or recalculates and store geokernel
# set reCalculateGeoKernel to True to recalculate GeoKernel
reCalculateGeoKernel = False
if (reCalculateGeoKernel):
    dfCorrelation = pd.read_csv('../Data/LaggedCorrelation_d7_l90_v3.csv', sep=',')
    pointsCorrelation = gpd.GeoDataFrame(dfCorrelation, geometry=gpd.points_from_xy(dfCorrelation.lng, dfCorrelation.lat))
  
    data = pointsCorrelation[['lng','lat']].to_numpy()
    data = data.copy(order='C').astype('float32') 
    data.flags

    N, thresh = len(data), 200
    X = geoKernel_cython(data, thresh)

    X[X<0.0000001] = 0
    Xs = X + X.T - np.diag(X.diagonal())
    fullGeo = coo_matrix(Xs)
    save_npz('../Data/geokernel.npz', fullGeo)
else:
    fullGeo = load_npz("../Data/geokernel.npz")

In [None]:
# efficient sparse geo-weight-dist hadamard-product with input data. Notice this directly cacluates a distance matrix instead of a kernel
def geoWeightedDist(data, geoDist, smoothness):
    # creating masks for input data given the geo-kernel
    value_i = data[geoDist.row]
    value_j = data[geoDist.col]
    # only calculate between points that is non-zero in geo-kernel
    #Xd = np.subtract(value_i,value_j)
    Xshrink = np.multiply(value_i,value_j).reshape(len(value_i),)#Shrinking factor based on value, smaller the value, closer the distance
    #Xd = np.exp(Xd**2/smoothness**2).reshape(len(value_i),)
    #geoSmooth = geoDist.copy()
    Xd = np.exp(geoDist.data**2*np.power(Xshrink,smoothness)/geoDist.data.std()**2)
    #Xd = np.multiply(Xd,geoSmooth.data)
    C = coo_matrix((Xd/max(Xd), (geoDist.row, geoDist.col)), shape=(len(data),len(data)))
    return C

In [None]:

def drawNewMap(filename = 'Data/Correlation_d7_l180_v3.csv',percentile = 0.30,pvalueThreshold = None,smoothness = 0.5, quantile = 0.5, extraEpsilonFactor = 0.5, min_samples =5, min_points = 20,exportName="",useCircular=False,maxPositivePValue = None):
    dfCorrelation = pd.read_csv(filename, sep=',')
  
    gridRegions = [];
    for coords in dfCorrelation[["lat","lng"]].to_numpy():
        region = (coords[0] - 0.5, coords[0]+0.5,coords[1]-0.625,coords[1]+0.625)
        gridRegions.append(region)
  
  
    dfCorrelation["region"] = gridRegions;
  
  
    # pointsCorrelation = gpd.GeoDataFrame(dfCorrelation, geometry=gpd.points_from_xy(dfCorrelation.lng, dfCorrelation.lat))
    pointsCorrelation = dfCorrelation.reset_index(drop=True)
  
    if(percentile<1.0):
        thresholdCorrelation = np.quantile(pointsCorrelation[['pvalue']],percentile);
    else:
        thresholdCorrelation = 1.0
      
    if(pvalueThreshold is not None):
        thresholdCorrelation = pvalueThreshold;
      
    pointsCorrelation['log-pvalue'] = np.log10(pointsCorrelation['pvalue'])
    pointsCorrelation['thresh-pvalue'] = pointsCorrelation['pvalue']
  
  
    pointsCorrelation.loc[pointsCorrelation['thresh-pvalue']>thresholdCorrelation,'thresh-pvalue']=1
  
    points = pointsCorrelation;
  
    print("Chosen p-value: %.2g"%thresholdCorrelation);
    print("Chosen correlation: %.2g"%np.max(pointsCorrelation.loc[pointsCorrelation['thresh-pvalue']>thresholdCorrelation,'value']));
  
    tab20b = cm.get_cmap('tab20b', 20)
    tab20c = cm.get_cmap('tab20c', 20)
    newcmp = mpc.ListedColormap(tab20b(np.linspace(0.25, 0.75, 256)))
    a = np.concatenate([tab20b(range(20)),tab20c(range(20))])
    np.random.shuffle(a)
    newcmp = mpc.ListedColormap(a)
    from sklearn.cluster import DBSCAN
    def identity_scale(minval, maxval):
        def scalar(val):
            return 2
        return scalar
  
    # pthreshold=0.05
  
    if(quantile<1.0):
        plotGraph = geoWeightedDist(points[['thresh-pvalue']].to_numpy(),fullGeo,smoothness)
        clustering = DBSCAN(eps=np.quantile(plotGraph.data, quantile)*extraEpsilonFactor, min_samples=min_samples, metric='precomputed').fit(plotGraph)
        labels = clustering.labels_
  
        smallerGroups = set();
        allLabels = list(map(str,(labels)));
        for label,count in Counter(allLabels).items():
            if(count<min_points):
                smallerGroups.add(label)
  
        #labels2 = spectral_clustering(graph2, n_clusters=10, eigen_solver='arpack')
        # plt.hist(labels, normed=True, bins=30)
        toPlotPoints = points.copy()
        toPlotPoints['labels'] = [entry if entry not in smallerGroups else "-1" for entry in allLabels]
        toPlotData = toPlotPoints[(toPlotPoints["labels"]!="-1") * (toPlotPoints["thresh-pvalue"]<thresholdCorrelation)]
    else:
        toPlotData = points.copy();
  
  
    ENSORegion = [[-5 , -170] ,[5, -120]];
  
    cdict1 = {'red':   ((0.0, 0.0, 0.0),
                       (0.5, 0.5, 1.0),
                       (1.0, 1.0, 0.0)),
  
             'green': ((0.0, 0.0, 0.0),
                       (0.5, 1.0, 1.0),
                       (1.0, 0.0, 0.0)),
  
             'blue':  ((0.0, 0.0, 1.0),
                       (0.5, 1.0, 0.0),
                       (1.0, 0.0, 0.0))
            }
  
    bluered = matplotlib.cm.get_cmap("autumn");#LinearSegmentedColormap("bluered", cdict1)
  
    if(maxPositivePValue is None):
        maxPositivePValue = np.min(toPlotData["log-pvalue"][np.isfinite(toPlotData["log-pvalue"])])
    print(maxPositivePValue)
  #   maxPositivePValue = -40
  
    if(True):
        if(useCircular):
            projection = ccrs.Mollweide(central_longitude=150)
        else:
            projection = ccrs.PlateCarree(central_longitude=150);
  
        fig = plt.figure(figsize=(10,8),tight_layout=True)
        ax = plt.axes(projection=projection)
        # ax.set_extent([-30, 150, -70, 70])
        # ax.stock_img()
        ax.set_global()
        if(not useCircular):
            ax.set_extent([-30, 150, -70, 70])
        ax.add_feature(cfeature.LAND,facecolor='gray',alpha=1.0)
        ax.add_feature(cfeature.OCEAN,facecolor=(0.7,0.7,0.7),alpha=1.0)
        # ax.add_feature(cfeature.OCEAN)
  
      #Arrows lines 
  
        transform = projection._as_mpl_transform(ax)
      
        def plotRectangle(ax,region,name,color,transform = ccrs.Geodetic()):
            if(transform):
                ax.add_patch(mpatches.Rectangle(xy=[region[2], region[0]], width=(region[3]-region[2]), height=(region[1]-region[0]),
                                              fc=color,
                                              ec=(0.1,0.1,0.1,1.0),
                                              lw=0.1,
                                              transform=transform))
            else:
                ax.add_patch(mpatches.Rectangle(xy=[region[2], region[0]], width=(region[3]-region[2]), height=(region[1]-region[0]),
                                              fc=color,
                                              lw=0.1,
                                              ec=(0.1,0.1,0.1,1.0)))
            if(name):
                ax.text((region[3]+region[2])*0.5, (region[1]), name,horizontalalignment='center',verticalalignment='bottom', transform=ccrs.Geodetic(),zorder=6)
  
      
        norm = mpl.colors.Normalize(vmin=maxPositivePValue, vmax=np.log10(thresholdCorrelation))
      
        for region,pvalue in  toPlotData[["region","thresh-pvalue"]].to_numpy():
            regionPvalue = pvalue;
            regionPvalueLog = np.log10(regionPvalue);
            lat,lng = np.mean(region[0:1]),np.mean(region[2:3]);
            if(np.isfinite(regionPvalueLog) and regionPvalueLog>=maxPositivePValue):
                color=bluered(norm(regionPvalueLog));
            else:
                color=(0.5,0.0,0.0,1.0);
  
            plotRectangle(ax,region,"",color,transform=ccrs.Geodetic());
  
        plotRectangle(ax,[-5 , 5 ,-170, -120],"ENSO","#CCCCCC",transform=ccrs.Geodetic());
  
  
        ax.add_feature(cfeature.COASTLINE,edgecolor='black',alpha=0.5)
        ax.add_feature(cfeature.BORDERS,edgecolor='black',alpha=0.5)
  
  
        import json
  
        with open("../Data/ElNidoShapes.json","r") as fd:
            shapes = [shape[0] for shape in json.load(fd)];
  
        with open("../Data/LaNinaShapes.json","r") as fd:
            shapes += [shape[0] for shape in json.load(fd)];
  
  
  
        shapesData = [np.array(shape) for shape in shapes]  
  
        for i in range(len(shapesData)):
            polygon = mpatches.Polygon(shapesData[i],
              closed=False,
              facecolor='white',
              alpha=0.2,
              transform=ccrs.Geodetic())
            ax.add_patch(polygon)
  
  
  
        print(maxPositivePValue);
        if(exportName):
            fig.savefig("Figures/"+exportName+".pdf");
        plt.show()

    fig, ax = plt.subplots(figsize=(6, 1))
    fig.subplots_adjust(bottom=0.5)
    
    def fmt(x, pos):
        return '$10^{%d}$'%(x);
  
    if(-maxPositivePValue<10):
        bounds=np.arange(np.ceil(maxPositivePValue),round(np.log10(thresholdCorrelation))+1,1)
    elif(-maxPositivePValue<50):
        bounds=np.arange(np.ceil(maxPositivePValue),round(np.log10(thresholdCorrelation))+1,5)
    elif(-maxPositivePValue<100):
        bounds=np.arange(np.ceil(maxPositivePValue),round(np.log10(thresholdCorrelation))+1,10)
    elif(-maxPositivePValue<200):
        bounds=np.arange(np.ceil(maxPositivePValue),round(np.log10(thresholdCorrelation))+1,20)
    else:
        bounds=np.arange(np.ceil(maxPositivePValue),round(np.log10(thresholdCorrelation))+1,50)
      
    print(bounds);
    ccmap = matplotlib.cm.get_cmap("autumn")
    ccmap.set_under(color=(0.5,0.0,0.0,1.0))
    norm = mpl.colors.Normalize(vmin=maxPositivePValue, vmax=np.log10(thresholdCorrelation))
    cb1 = mpl.colorbar.ColorbarBase(ax, cmap=ccmap,
                                    norm=norm,
  #                                   boundaries=[-maxPositivePValue] + bounds + [maxPositivePValue],
                                    extend='min',
                                    extendfrac=0.05,
                                    spacing='uniform',
                                    ticks=bounds,
                                    orientation='horizontal',
                                   format=ticker.FuncFormatter(fmt))
  
    if(exportName):
        plt.savefig("Figures/cm_%s.pdf"%exportName)
    fig.show()


In [None]:
import warnings
warnings.filterwarnings("ignore")

plotConfigs = [
# ("LaggedCorrelation",0,None),
# ("LaggedCorrelation",30,None),
# ("LaggedCorrelation",90,None),
# ("LaggedCorrelation",180,None),
# ("Causality",30,-20),
# ("Causality",90,-20),
("Causality",180,-20)
]

windowDays = 7
maxLag = 90
propertyName = "LaggedCorrelationGeneral"
for propertyName,maxLag,maxPositivePValue in plotConfigs:
    drawNewMap('../Data/%s_d%d_l%d_v3.csv'%(propertyName,windowDays,maxLag),
          smoothness = 0.5, 
#           percentile = 1.0,# 0.2
          pvalueThreshold=0.01,
          quantile = 0.5,
          extraEpsilonFactor = 0.5,
          min_samples = 5,
          min_points = 30,
#           exportName="region_%s_d%d_l%d_v3_complete"%(propertyName,windowDays,maxLag),
          maxPositivePValue=maxPositivePValue,
         );