In [1]:
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse
from matplotlib.cm import get_cmap

import os
import pickle

import SAM
import float_data as flt

import importlib
importlib.reload(SAM)
importlib.reload(flt)

from dask import delayed

def f(datac1, datac2, n_classes):
  a = [np.unique(datac2.where(datac1==k).values, return_counts=True) for k in range(n_classes)]
  a = [(x[0][0:-1].astype('int'), x[1][0:-1]) for x in a]
  return a

In [2]:
ids = ['r1i1p1f2', 'r2i1p1f2']#, 'r3i1p1f2', 'r4i1p1f2', 'r5i1p1f3', 'r6i1p1f3', 'r7i1p1f3', 'r8i1p1f2', 'r9i1p1f2', 'r10i1p1f2']
datas = {}
gmm = {}
pca = {}
n_classes = np.array([5, 6, 7])
model_folder = 'model'
mask = None

In [3]:
for m_id in ids:
  options = {'memberId' : m_id, 'raw' : True}
  data = flt.retrieve_profiles(timeRange = slice('1999-01', '2000-12'), options=options)
  datas[m_id] = data
  
  data = data.stack(n=('i', 'j',))
  if mask is None:
    mask = data.isel(time=0).dropna('n')['n'].values
  data = data.sel(n=mask)
  
  data_sampled = flt.random_sample(data, 100)
  data_normalised = flt.normalise_data(data_sampled, 'N').compute()
  
  # Train a PCA object
  
  pca[m_id] = flt.train_pca(data_normalised, 3)
  data_pca = flt.pca_transform(data_normalised, pca[m_id]).compute()
  
  # Train GMM objects for each number of classes
  gmm[m_id] = []
  for n in n_classes:
    gmm[m_id].append(flt.train_gmm(data_pca, n))
    print('{}, {}'.format(m_id, n), end="\r", flush=True)

r2i1p1f2, 7

In [5]:
avg_profiles = {}
for m_id in ids:
  
  data = datas[m_id]
  data = data.stack(n=('i', 'j'))
  if mask is None:
    mask = data.isel(time=0).dropna('n')['n'].values
  data = data.sel(n=mask)
  
  data_normalised = flt.normalise_data(data, ('n', 'time')).compute()
  data_pca = flt.pca_transform(data_normalised, pca[m_id]).compute()
  
  avg_profiles[m_id] = [flt.avg_profiles(data, flt.gmm_classify(data_pca, gmm[m_id][i]).compute(), n) for (i, n) in enumerate(n_classes)]
print('Done!')

KeyboardInterrupt: 

In [None]:
for (m, n) in enumerate(n_classes):
  indices_avg = np.zeros((len(ids), n))
  inds = flt.temp_sort(None, avg_profiles[ids[0]][m], True)
  for (k, v) in enumerate(ids):
      indices_avg[k, :] = flt.match_profiles([avg_profiles[ids[0]][m][j] for j in inds], avg_profiles[v][m])
  
  data_normalised = flt.normalise_data(datas[ids[0]], ('i', 'j', 'time'))
  data_pca = flt.pca_transform(data_normalised, pca[ids[0]])
  ref_classes = flt.gmm_classify(data_pca, gmm[ids[0]][m])
  ref_classes = flt.reorder(ref_classes, inds).compute()
  

  
  counts = []
  for m_id in ids:
    data_normalised = flt.normalise_data(datas[m_id], ('i', 'j', 'time'))
    data_pca = flt.pca_transform(data_normalised, pca[m_id])
    data_classes = flt.gmm_classify(data_pca, gmm[m_id][m]).compute()
    counts.append(f(ref_classes, data_classes, n))
  indices = np.zeros((len(ids), n))
  for i in range(len(ids)):
    for j in range(n):
      indices[i, j] = counts[i][j][0][np.argmax(counts[i][j][1])]
  indices = indices.astype('int')
  
  for (a, b, i) in zip(indices, indices_avg, ids):
    print(i)
    print("  Bijective spatial plots: {}".format(len(a) == len(np.unique(a))))
    print("  Bijective profiles: {}".format(len(b) == len(np.unique(b))))
    print("  Same assignment? {}".format(np.all(a == b)))
    print('')

NameError: name 'avg_profiles' is not defined

In [13]:
for (m, n) in enumerate(n_classes):
  indices_avg = np.zeros((len(ids), n))
  #inds = flt.temp_sort(None, avg_profiles[ids[0]][m], True)
  #for (k, v) in enumerate(ids):
      #indices_avg[k, :] = flt.match_profiles([avg_profiles[ids[0]][m][j] for j in inds], avg_profiles[v][m])
  
  data_normalised = flt.normalise_data(datas[ids[0]], ('i', 'j', 'time'))
  data_pca = flt.pca_transform(data_normalised, pca[ids[0]])
  ref_classes = flt.gmm_classify(data_pca, gmm[ids[0]][m]).compute()
  

  
  counts = {}
  for m_id in ids:
    data_normalised = flt.normalise_data(datas[m_id], ('i', 'j', 'time'))
    data_pca = flt.pca_transform(data_normalised, pca[m_id])
    data_classes = flt.gmm_classify(data_pca, gmm[m_id][m]).compute()
    counts[m_id] = [[((data_classes.where(ref_classes==k) == j) * np.cos(data_classes['lat'] * 3.141/180)).sum(skipna=True).values for k in range(n)] for j in range(n)]
  indices = np.zeros((len(ids), n))
  for i in range(len(ids)):
    for j in range(n):
      indices[i, j] = counts[i][j][0][np.argmax(counts[i][j][1])]
  indices = indices.astype('int')
  
  for (a, b, i) in zip(indices, indices_avg, ids):
    print(i)
    print("  Bijective spatial plots: {}".format(len(a) == len(np.unique(a))))
    print("  Bijective profiles: {}".format(len(b) == len(np.unique(b))))
    print("  Same assignment? {}".format(np.all(a == b)))
    print('')


KeyError: 0

In [26]:
np.argmax(np.array(counts[ids[1]]), axis=0)

array([0, 1, 2, 3, 4])