# This Looks Like That in Tensorflow

Useful Files and Links
- Chen et al. (2019)
- https://github.com/cfchen-duke/ProtoPNet

In [1]:
import os
import sys
import imp 

import numpy as np
from tqdm import tqdm
from tqdm import trange
from icecream import ic          # pip install icecream
import scipy.io as sio


import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib as mpl
import seaborn as sns
import cmasher as cmr            # pip install cmasher

import cartopy as ct

import tensorflow as tf

import network
import experiment_settings 
import data_functions
import push_prototypes
import plots
import common_functions

import warnings
warnings.filterwarnings( "ignore", module = "cartopy\..*" )
warnings.filterwarnings( "ignore", module = "matplotlib\..*" )

In [2]:
__author__ = "Elizabeth A. Barnes and Randal J Barnes"
__version__ = "24 August 2021"

In [3]:
mpl.rcParams['figure.facecolor'] = 'white'
mpl.rcParams['figure.dpi']= 150
dpiFig = 400.

## Print the detailed system info

In [4]:
print(f"python version = {sys.version}")
print(f"numpy version = {np.__version__}")
print(f"tensorflow version = {tf.__version__}")

python version = 3.9.4 (default, Apr  9 2021, 09:32:38) 
[Clang 10.0.0 ]
numpy version = 1.20.1
tensorflow version = 2.5.0


## Define experiment settings and directories

In [5]:
# EXP_NAME = 'quadrants_nol1l2'
EXP_NAME = 'mjo_hemi_10E_270E_nol1l2'

imp.reload(experiment_settings)
settings = experiment_settings.get_settings(EXP_NAME)

imp.reload(common_functions)
model_dir, model_diagnostics_dir, vizualization_dir = common_functions.get_exp_directories(EXP_NAME)
vizualization_dir = '/Users/eabarnes/GoogleDrive/WORK/RESEARCH/2021/thisLooksLikeThat/TLLT-clean/figures/final/'

## Define the network parameters

In [6]:
RANDOM_SEED          = settings['random_seed']
BATCH_SIZE_PREDICT   = settings['batch_size_predict']
BATCH_SIZE           = settings['batch_size']
NLAYERS              = settings['nlayers']
NFILTERS             = settings['nfilters']   
DOUBLE_CONV          = settings['double_conv']   
assert(len(NFILTERS)==NLAYERS)

NCLASSES             = settings['nclasses']
PROTOTYPES_PER_CLASS = settings['prototypes_per_class']
NPROTOTYPES          = np.sum(PROTOTYPES_PER_CLASS)

NEPOCHS              = settings['nepochs']
LR_INIT              = settings['lr']
LR_EPOCH_BOUND       = 10000
PATIENCE             = 100

STAGE                = settings['analyze_stage']

## Initialize

In [7]:
np.random.seed(RANDOM_SEED)
rng = np.random.default_rng(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

## Get and process the data

In [8]:
imp.reload(data_functions)
DATA_NAME = settings['data_name']
DATA_DIR = settings['data_dir']

if(EXP_NAME[:3]=='mjo'):

    labels, data, lat, lon, time = data_functions.load_mjo_data(DATA_DIR)
    X_train, y_train, time_train, X_val, y_val, time_val, X_test, y_test, time_test = data_functions.get_and_process_mjo_data(labels,
                                                                                         data,
                                                                                         time,
                                                                                         rng, 
                                                                                         colored=settings['colored'],
                                                                                         standardize=settings['standardize'],
                                                                                         shuffle=settings['shuffle'],
                                                                                        )        
elif(EXP_NAME[:8]=='extremes'):
    filename = DATA_DIR + DATA_NAME + '.mat'
    X_train, y_train, X_val, y_val, X_test, y_test, lat, lon = data_functions.get_and_process_data(filename, 
                                                                                        rng, 
                                                                                        colored=settings['colored'],
                                                                                        standardize=settings['standardize'],
                                                                                        shuffle=settings['shuffle'],
                                                                                        ) 
    
    X_train, y_train, X_val, y_val, X_test, y_test = data_functions.subsample_extremes(RANDOM_SEED, X_train, y_train, X_val, y_val, X_test, y_test)

    
elif(EXP_NAME[:5]=='jamin' or EXP_NAME[:9]=='quadrants'):
    filename = DATA_DIR + DATA_NAME + '.mat'
    X_train, y_train, X_val, y_val, X_test, y_test, lat, lon = data_functions.get_and_process_data(filename, 
                                                                                        rng, 
                                                                                        colored=settings['colored'],
                                                                                        standardize=settings['standardize'],
                                                                                        shuffle=settings['shuffle'],
                                                                                        )      
    

[0 1 2 3 4 5 6 7 8]


ic| years_train: array([1901, 1900, 1979, 1908, 2004, 1920, 1932, 1951, 1985, 1924, 1956,
                        1952, 2000, 1996, 1938, 1991, 1973, 1921, 1960, 1977, 1982, 1969,
                        1994, 1988, 1998, 1978, 1970, 2003, 2002, 1963, 1997, 1931, 1910,
                        1993, 1904, 1942, 1981, 1967, 1966, 1983, 1989, 2006, 1974, 1984,
                        1980, 1943, 1934, 1971, 1929, 1930, 1919, 1986, 1914, 1959, 1926,
                        1999, 1905, 1975, 1948, 1954, 1925, 1940, 1922, 1936, 1968, 1947,
                        1990, 1909, 1939, 1972, 2009, 1944, 1957, 1958, 1928, 1955, 1933,
                        1950, 1913, 2005, 1915, 1911, 1987, 1964, 1995, 2001, 1927, 1962,
                        1906])
ic| years_val: array([1902, 1903, 1907, 1912, 1916, 1917, 1918, 1923, 1935, 1937, 1941,
                      1945, 1946, 1949, 1953, 1961, 1965, 1976, 1992, 2007, 2008, 2010])
ic| years_test: 2010


raw_data        = (40422, 17, 105, 3)
training data   = (32387, 17, 105, 3), (32387,)
validation data = (8035, 17, 105, 3), (8035,)
test data       = (365, 17, 105, 3), (365,)
X_mean          = 0.00029500475989341156
X_std           = 1.0281252570023809


## Get the model and make predictions

In [9]:
model_filename = model_dir + 'model_' + EXP_NAME + '_stage' + str(STAGE)
model = common_functions.load_model(model_filename)


loading model from ./saved_models/mjo_hemi_10E_270E_nol1l2/model_mjo_hemi_10E_270E_nol1l2_stage9


In [10]:
proto_class_mask = network.createClassIdentity(PROTOTYPES_PER_CLASS)

prototypes_of_correct_class_train = np.zeros((len(y_train),NPROTOTYPES))
for i in range(0,prototypes_of_correct_class_train.shape[0]):
    prototypes_of_correct_class_train[i,:] = proto_class_mask[:,int(y_train[i])]
    
prototypes_of_correct_class_val   = np.zeros((len(y_val),NPROTOTYPES))    
for i in range(0,prototypes_of_correct_class_val.shape[0]):
    prototypes_of_correct_class_val[i,:] = proto_class_mask[:,int(y_val[i])]

prototypes_of_correct_class_test   = np.zeros((len(y_test),NPROTOTYPES))    
for i in range(0,prototypes_of_correct_class_test.shape[0]):
    prototypes_of_correct_class_test[i,:] = proto_class_mask[:,int(y_test[i])]
    

## Validation samples

In [11]:
input_val  = [[X_val,prototypes_of_correct_class_val]]

print('......ProtoLNet Metrics......')
print('running model.predict()')
y_predict_val = model.predict(input_val, batch_size=BATCH_SIZE_PREDICT, verbose=1)
print('model.predict() complete.')

model.evaluate(input_val,y_val,batch_size=BATCH_SIZE_PREDICT, verbose=1)

model.summary()

......ProtoLNet Metrics......
running model.predict()
model.predict() complete.
Model: "full_model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
inputs (InputLayer)             [(None, 17, 105, 3)] 0                                            
__________________________________________________________________________________________________
conv_0 (Conv2D)                 (None, 17, 105, 16)  448         inputs[0][0]                     
__________________________________________________________________________________________________
dropout (Dropout)               (None, 17, 105, 16)  0           conv_0[0][0]                     
__________________________________________________________________________________________________
maxpooling_0 (AveragePooling2D) (None, 8, 52, 16)    0           dropout[0][0]                    
_________

In [12]:
print('Accuracies by class: ')

for c in np.arange(0,NCLASSES):
    i = np.where(y_val==c)[0]
    j = np.where(y_val[i]==np.argmax(y_predict_val[i],axis=1))[0]
    acc = np.round(len(j)/len(i),3)
    
    print('   phase ' + str(c) + ' = ' + str(acc))
    

Accuracies by class: 
   phase 0 = 0.701
   phase 1 = 0.706
   phase 2 = 0.767
   phase 3 = 0.798
   phase 4 = 0.714
   phase 5 = 0.663
   phase 6 = 0.771
   phase 7 = 0.717
   phase 8 = 0.694


In [13]:
if(settings['pretrain_exp'] is None):
    PRETRAINED_MODEL = model_dir + 'pretrained_model_' + EXP_NAME 
else:
    PRETRAINED_MODEL = './saved_models/' + settings['pretrain_exp'] 

print('......Base CNN Metrics......')
print('loading pretrained convolutional layers from ' + PRETRAINED_MODEL)
pretrained_model = tf.keras.models.load_model(PRETRAINED_MODEL)
y_predict_val_cnnbase = pretrained_model.predict(X_val, batch_size=BATCH_SIZE_PREDICT, verbose=1)
print('model.predict() complete.')
pretrained_model.evaluate(X_val,y_val,batch_size=BATCH_SIZE_PREDICT, verbose=1)

......Base CNN Metrics......
loading pretrained convolutional layers from ./saved_models/mjo_hemi_10E_270E_nol1l2/pretrained_model_mjo_hemi_10E_270E_nol1l2
model.predict() complete.


[1.033264398574829, 0.5843185782432556]

In [14]:
pretrained_model.summary()

Model: "full_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          [(None, 17, 105, 3)]      0         
_________________________________________________________________
conv_0 (Conv2D)              (None, 17, 105, 16)       448       
_________________________________________________________________
dropout (Dropout)            (None, 17, 105, 16)       0         
_________________________________________________________________
maxpooling_0 (AveragePooling (None, 8, 52, 16)         0         
_________________________________________________________________
conv_1 (Conv2D)              (None, 8, 52, 16)         2320      
_________________________________________________________________
dropout_1 (Dropout)          (None, 8, 52, 16)         0         
_________________________________________________________________
maxpooling_1 (AveragePooling (None, 4, 26, 16)         0