In [1]:
#%matplotlib notebook
%matplotlib qt

#This notebook is a testbed for importing PEAC Center USAPI
#raifnall data using pandas and doing some quick analysis

import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.colors as cols
import matplotlib.cm as cm

from mpl_toolkits.basemap import Basemap, shiftgrid

import numpy as np
import numpy.ma as ma

from datetime import date
import datetime

import calendar

from netCDF4 import Dataset

from scipy import signal, linalg, stats

from pycurrents.codas import to_day, to_date
from pycurrents.plot.mpltools import dday_to_mpl

from pycurrents.system import Bunch
from pycurrents.num import eof
from pycurrents.num import rangeslice

import pickle

from scipy.special import comb

In [2]:
station_rain = pd.ExcelFile('Station_Monthly_data.xlsx')
print(station_rain.sheet_names)

['Sheet1', 'Sheet2', 'Sheet3']


In [3]:
raw_data= pd.read_excel(station_rain, sheetname = 'Sheet1', skiprows = 0, parse_cols = "A:T")
print(raw_data.loc[:,['Country','Station Name','MI']])
#print(raw_data.loc[:,'Lon'])

           Country      Station Name         MI
0              FSM    Kapingamarangi  -9.687959
1              FSM     Kosrae Int AP  -8.468269
2              FSM          Lukunoch   4.829525
3              FSM         Metalanim   4.592416
4              FSM       Mokil Atoll   9.907702
5              FSM           Nukuoro  -8.320764
6              FSM                Oa  -0.550055
7              FSM       Paies-Kitti   8.494121
8              FSM           Palikir   1.166412
9              FSM          Pingelap   0.376868
10             FSM           Polowat  10.706327
11             FSM          Tufunsak -11.040146
12             FSM             Tofol  -9.252600
13             FSM            Ulithi  33.301242
14             FSM              Utwa  -6.410120
15             FSM            Woleai  14.536082
16       Marshalls     Ailinglapalap  14.749826
17       Marshalls              Arno   9.362520
18       Marshalls          Eniwetok  42.882188
19       Marshalls            Jaluit   5

In [4]:
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_samples

data = raw_data.loc[:,['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']]

s_kmeans = KMeans(n_clusters=3).fit(data)
silh_samp_vals = silhouette_samples(data,s_kmeans.labels_)
raw_data['cluster'] = s_kmeans.labels_
raw_data['Sil Score'] = silh_samp_vals
print(raw_data.loc[35:42,['Country','Station Name','MI','cluster', 'Sil Score']])

s_kmeans_2 = KMeans(n_clusters=2).fit(data)
silh_samp_vals_2 = silhouette_samples(data,s_kmeans_2.labels_)
raw_data['cluster 2'] = s_kmeans_2.labels_
raw_data['Sil Score 2'] = silh_samp_vals_2
print(raw_data.loc[35:42,['Country','Station Name','MI','cluster 2', 'Sil Score 2']])

s_kmeans_4 = KMeans(n_clusters=4).fit(data)
silh_samp_vals_4 = silhouette_samples(data,s_kmeans_4.labels_)
raw_data['cluster 4'] = s_kmeans_4.labels_
raw_data['Sil Score 4'] = silh_samp_vals_4
print(raw_data.loc[35:42,['Country','Station Name','MI','cluster 4', 'Sil Score 4']])

       Country Station Name         MI  cluster  Sil Score
35  Major PEAC         Guam  44.157543        0   0.380047
36  Major PEAC        Koror  15.735224        2   0.425674
37  Major PEAC          Yap  33.442830        2   0.106408
38  Major PEAC        Chuuk   7.356914        2   0.603715
39  Major PEAC      Pohnpei  -1.807341        1   0.430499
40  Major PEAC       Majuro  13.623488        2   0.546848
41  Major PEAC    Kwajalein  38.079769        0   0.421440
       Country Station Name         MI  cluster 2  Sil Score 2
35  Major PEAC         Guam  44.157543          0     0.528993
36  Major PEAC        Koror  15.735224          1     0.383563
37  Major PEAC          Yap  33.442830          0     0.304675
38  Major PEAC        Chuuk   7.356914          1     0.274054
39  Major PEAC      Pohnpei  -1.807341          1     0.544753
40  Major PEAC       Majuro  13.623488          1     0.128883
41  Major PEAC    Kwajalein  38.079769          0     0.576223
       Country Station N

In [21]:
import math

def DB_index(centroids,points,list_index,k):
    #Taken from ICS 365 Kmeans developed by G. Picot
    list_diameter=[]   
    for j in range(0,k):
        indeces=np.where(np.asarray(list_index)==j)[0]
        diameter=math.sqrt(sum(np.linalg.norm(points[indeces]- centroids[j,:],axis=1)**2)/len(indeces))
        list_diameter.append(diameter)
    Matrix_DB= np.zeros([k,k])
    for i in range(0,k):
        for j in range(0,i-1):
            Matrix_DB[i,j]=(list_diameter[i]+list_diameter[j])/(np.linalg.norm(centroids[i,:]-centroids[j,:]))
        for j in range(i+1,k):
            Matrix_DB[i,j]=(list_diameter[i]+list_diameter[j])/(np.linalg.norm(centroids[i,:]-centroids[j,:])) 

    DB=sum(np.amax(Matrix_DB, axis=1))/k

    return DB

In [41]:
DB_index_values = np.empty([len(data)])
DB_index_values = np.NaN*DB_index_values
print(np.shape(DB_index_values))
for k in range(2,len(data)):
    s_km = KMeans(n_clusters=k).fit(data)
    centroids = s_km.cluster_centers_
    points = pd.DataFrame.as_matrix(data)
    list_index = s_km.labels_

    DB_index_values[k] = DB_index(centroids, points, list_index,k)

(42,)


In [42]:
plt.plot(DB_index_values)

[<matplotlib.lines.Line2D at 0x7fe9c4fc5b00>]

In [7]:
from scipy.interpolate import griddata

xi = np.arange(130,180,1)
yi = np.arange(-5,25,1)

mi_gridded = griddata((raw_data.loc[:,'Lon'].tolist(),raw_data.loc[:,'Lat'].tolist()),
                      raw_data.loc[:,'MI'].tolist(),
                      (xi[None,:],yi[:,None]))

print(type(xi),np.shape(xi))
print(type(yi),np.shape(yi))
print(type(mi_gridded),np.shape(mi_gridded))

<class 'numpy.ndarray'> (50,)
<class 'numpy.ndarray'> (30,)
<class 'numpy.ndarray'> (30, 50)


In [9]:
season_list = ['DJF' , 'JFM' , 'FMA',
               'MAM' , 'AMJ' , 'MJJ',
               'JJA' , 'JAS' , 'ASO',
               'SON' , 'OND' , 'NDJ']

month_list = ['Jan' , 'Feb' , 'Mar',
              'Apr' , 'May' , 'Jun',
              'Jul' , 'Aug' , 'Sep',
              'Oct' , 'Nov' , 'Dec']


plt.rcParams['figure.dpi'] = 113
plt.rcParams['axes.titlesize'] = 'small'
plt.rcParams['ytick.labelsize'] = 'small'  # for colorbar

gmap = Basemap(projection='merc', llcrnrlat=-5.1, urcrnrlat=20.01,
                    llcrnrlon=130, urcrnrlon=175.5, lat_ts=0, resolution='c')


with open('mapcache.pk', mode='wb') as f:
    pickle.dump(gmap, f)
    
subplotparams = dict(left=0.03, right=0.88,
                     bottom=0.03, top=0.96,
                     wspace=0.05, hspace=0.2)

fig, axs = plt.subplots(#sharex=True,
                        figsize=(13, 7.8),
                        gridspec_kw=subplotparams)

with open('mapcache.pk', 'rb') as f:
    gmap = pickle.load(f)
gmap.ax = axs

gmap.drawmapboundary(fill_color='aqua')
gmap.fillcontinents(color='coral', lake_color='aqua')

cmap = plt.get_cmap('jet')
bounds = [-0.5, 0.5, 1.5, 2.5]
norm = mpl.colors.BoundaryNorm(bounds, cmap.N)

# x, y = gmap(*np.meshgrid(gpcp_precip_longitudes, gpcp_precip_latitudes))
# im = gmap.contourf(x, y, gpcp_anom_selection, levels=bounds, cmap=cmap, extend ='both')

X, Y = gmap(raw_data.loc[:,'Lon'].tolist(), raw_data.loc[:,'Lat'].tolist())
scalar_map = cm.ScalarMappable(cmap=cmap)
scalar_map.set_array(s_kmeans.labels_)

#draw_map(gmap, ax=axs[1])
cs = gmap.scatter(X, Y, c=s_kmeans.labels_, 
              cmap=cmap, marker='o',
              s = 50, norm=norm)
              #vmin = -2.5, vmax = 2.5)
    
for station, sil, x, y in zip(raw_data.loc[:,'Station Name'].tolist(), raw_data.loc[:,'Sil Score'].tolist(),
                         raw_data.loc[:,'Lon'].tolist(), raw_data.loc[:,'Lat'].tolist()):
    
    stations_to_label = ['Kwajalein', 'Majuro', 
                         'Koror', 'Yap', 'Chuuk', 'Pohnpei','Kosrae Int AP',
                         'Saipan Int AP', 'Guam',
                         'Eniwetok', 'Utirik', 'Wotje']
    
    if station in stations_to_label:
        sil_str = "{:.4}".format(str(sil))
        plt.annotate(station+' ('+sil_str+')',xy=gmap(x,y), xytext =gmap(x-1,y+0.5),
        textcoords = 'data', 
        ha = 'right', va='bottom', fontsize = 8,
        bbox = dict(facecolor = 'white',alpha=0.7)     ,        
        arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))
    
#hgt_bounds = np.arange(-40,40,1)
hgt_bounds = np.arange(0,100,10)
hgt_bounds_MI_threswold = [50]

x_hgt, y_hgt = gmap(*np.meshgrid(xi, yi))

im_hgt = gmap.contour(x_hgt, y_hgt, mi_gridded, 
                      levels=hgt_bounds,
                      linewidths=0.5, colors='k')
plt.clabel(im_hgt,fontsize=9,inline=1,fmt='%1.0f')

im_hgt_25 = gmap.contour(x_hgt, y_hgt, mi_gridded, 
                      levels=hgt_bounds_MI_threswold,
                      linewidths=1, colors='k')
plt.clabel(im_hgt_25,fontsize=9,inline=1,fmt='%1.0f')
# im_hgt = gmap.contour(X, Y, raw_data.loc[:,'MI'].tolist(), levels=hgt_bounds,
#                    linewidths=0.5, colors='k')
    
gmap.drawcoastlines()
parallels = np.arange(-90, 90, 10)
meridians = np.arange(-180, 180, 10)

gmap.drawparallels(parallels, labels = [1, 0, 0, 1], fontsize=8)
gmap.drawmeridians(meridians, labels = [1, 0, 0, 1], fontsize=8)


left = subplotparams['right'] + 0.02
bottom = subplotparams['bottom'] + 0.05
width = 0.015
height = subplotparams['top'] - subplotparams['bottom'] - 0.1

cax = fig.add_axes([left, bottom, width, height])
cb = plt.colorbar(cs, cax=cax, extend ='neither')
cb.set_ticks(np.array([0,1,2]), update_ticks=True)
cb.set_ticklabels(['0','1','2'], update_ticks=True)
cb.update_ticks()
cb.set_label('Cluster')
fig.savefig('cluster_map.pdf')

  xx = x[x.shape[0]/2,:]


In [11]:
season_list = ['DJF' , 'JFM' , 'FMA',
               'MAM' , 'AMJ' , 'MJJ',
               'JJA' , 'JAS' , 'ASO',
               'SON' , 'OND' , 'NDJ']

month_list = ['Jan' , 'Feb' , 'Mar',
              'Apr' , 'May' , 'Jun',
              'Jul' , 'Aug' , 'Sep',
              'Oct' , 'Nov' , 'Dec']


plt.rcParams['figure.dpi'] = 113
plt.rcParams['axes.titlesize'] = 'small'
plt.rcParams['ytick.labelsize'] = 'small'  # for colorbar

gmap = Basemap(projection='merc', llcrnrlat=-5.1, urcrnrlat=20.01,
                    llcrnrlon=130, urcrnrlon=175.5, lat_ts=0, resolution='c')


with open('mapcache.pk', mode='wb') as f:
    pickle.dump(gmap, f)
    
subplotparams = dict(left=0.03, right=0.88,
                     bottom=0.03, top=0.96,
                     wspace=0.05, hspace=0.2)

fig, axs = plt.subplots(#sharex=True,
                        figsize=(13, 7.8),
                        gridspec_kw=subplotparams)

with open('mapcache.pk', 'rb') as f:
    gmap = pickle.load(f)
gmap.ax = axs

gmap.drawmapboundary(fill_color='aqua')
gmap.fillcontinents(color='coral', lake_color='aqua')

cmap = plt.get_cmap('jet')
bounds = [-0.5, 0.5, 1.5]
norm = mpl.colors.BoundaryNorm(bounds, cmap.N)

# x, y = gmap(*np.meshgrid(gpcp_precip_longitudes, gpcp_precip_latitudes))
# im = gmap.contourf(x, y, gpcp_anom_selection, levels=bounds, cmap=cmap, extend ='both')

X, Y = gmap(raw_data.loc[:,'Lon'].tolist(), raw_data.loc[:,'Lat'].tolist())
scalar_map = cm.ScalarMappable(cmap=cmap)
scalar_map.set_array(s_kmeans_2.labels_)

#draw_map(gmap, ax=axs[1])
cs = gmap.scatter(X, Y, c=s_kmeans_2.labels_, 
              cmap=cmap, marker='o',
              s = 50, norm=norm)
              #vmin = -2.5, vmax = 2.5)
    
for station, sil, x, y in zip(raw_data.loc[:,'Station Name'].tolist(), raw_data.loc[:,'Sil Score 2'].tolist(),
                         raw_data.loc[:,'Lon'].tolist(), raw_data.loc[:,'Lat'].tolist()):
    
    stations_to_label = ['Kwajalein', 'Majuro', 
                         'Koror', 'Yap', 'Chuuk', 'Pohnpei','Kosrae Int AP',
                         'Saipan Int AP', 'Guam',
                         'Eniwetok', 'Utirik', 'Wotje']
    
    if station in stations_to_label:
        sil_str = "{:.4}".format(str(sil))
        plt.annotate(station+' ('+sil_str+')',xy=gmap(x,y), xytext =gmap(x-1,y+0.5),
        textcoords = 'data', 
        ha = 'right', va='bottom', fontsize = 8,
        bbox = dict(facecolor = 'white',alpha=0.7)     ,        
        arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))
    
#hgt_bounds = np.arange(-40,40,1)
hgt_bounds = np.arange(0,100,10)
hgt_bounds_MI_threswold = [50]

x_hgt, y_hgt = gmap(*np.meshgrid(xi, yi))

im_hgt = gmap.contour(x_hgt, y_hgt, mi_gridded, 
                      levels=hgt_bounds,
                      linewidths=0.5, colors='k')
plt.clabel(im_hgt,fontsize=9,inline=1,fmt='%1.0f')

im_hgt_25 = gmap.contour(x_hgt, y_hgt, mi_gridded, 
                      levels=hgt_bounds_MI_threswold,
                      linewidths=1, colors='k')
plt.clabel(im_hgt_25,fontsize=9,inline=1,fmt='%1.0f')
# im_hgt = gmap.contour(X, Y, raw_data.loc[:,'MI'].tolist(), levels=hgt_bounds,
#                    linewidths=0.5, colors='k')
    
gmap.drawcoastlines()
parallels = np.arange(-90, 90, 10)
meridians = np.arange(-180, 180, 10)

gmap.drawparallels(parallels, labels = [1, 0, 0, 1], fontsize=8)
gmap.drawmeridians(meridians, labels = [1, 0, 0, 1], fontsize=8)


left = subplotparams['right'] + 0.02
bottom = subplotparams['bottom'] + 0.05
width = 0.015
height = subplotparams['top'] - subplotparams['bottom'] - 0.1

cax = fig.add_axes([left, bottom, width, height])
cb = plt.colorbar(cs, cax=cax, extend ='neither')
cb.set_ticks(np.array([0,1]), update_ticks=True)
cb.set_ticklabels(['0','1'], update_ticks=True)
cb.update_ticks()
cb.set_label('Cluster')
fig.savefig('2_cluster_map.pdf')

  xx = x[x.shape[0]/2,:]
