### File for comparing lists from RF and LSTM models for 90th confidence predictions ...

7/15/2025

In [1]:
#relevant import statements
import numpy as np
import math
import pandas as pd
import xarray as xr 
import pickle 
import matplotlib.pyplot as plt

import collections

In [2]:
#because of how i modified the RF models, these dates match. SO. The mismatch of array index issue I had is largely circumvented. 
ranges_RF = np.array([x for x in range(0,135*10,1)])
ranges_RF = ranges_RF.reshape(10,135) 
print("RF dates shape:",ranges_RF.shape)

ranges_LSTM = np.array([x for x in range(0,135*10,1)])
ranges_LSTM = ranges_LSTM.reshape(10,135) 
print("LSTM dates shape:",ranges_LSTM.shape)

RF dates shape: (10, 135)
LSTM dates shape: (10, 135)


In [3]:
#flatten ranges to allow mapping of the rep. arrays. 
#flat_RF_map = corr_RFindices.flatten()      

flat_RF_map = ranges_LSTM.flatten() #because of the changes, these are the same...
flat_LSTM_map = ranges_LSTM.flatten() 
#both should be the same shape, covering the same dates

### Definition statement for calculating overlap. 

In [13]:
#definition to compile and compare counts across lists/dictionaries
def compare_dicts_values(list1, list2):
    RF_count = collections.Counter() ##RF counter dictionary
    for day in list1:
        RF_count[day] += 1
    LSTM_count = collections.Counter() ##LSTM counter dictionary
    for day in list2:
        LSTM_count[day] += 1
        
    result = {}
    #begin comparing RF and LSTM dictionaries. 
    for key in RF_count:
        if key in LSTM_count: #this next part is basically like "which count is smaller so that we save the overlap in the dict"
            if RF_count[key] >= LSTM_count[key]:
                result[key] = abs(LSTM_count[key])
            if RF_count[key] < LSTM_count[key]:
                result[key] = abs(RF_count[key])
    total_sum = sum(result.values()) #look at total count for all events
    print("Total number of RF events: ",len(list1)) 
    print("Total number of LSTM events: ",len(list2)) 
    print("Total number of overlap: ",total_sum)  
    percentage = round(total_sum/len(list2) *100,2)
    return percentage;

#_________________________________________________________________
##this instance is just to save the keys so that I can look at composites ... for those dates. 
def compare_keys(list1, list2):
    RF_count = collections.Counter() ##RF counter dictionary
    for day in list1:
        RF_count[day] += 1
    LSTM_count = collections.Counter() ##LSTM counter dictionary
    for day in list2:
        LSTM_count[day] += 1
        
    result = []
    Rfonly = []
    #begin comparing RF and LSTM dictionaries. 
    for key in RF_count:
        if key in LSTM_count:
            #save the key to look at for composites
            result.append(key)
        if key not in LSTM_count:
            Rfonly.append(key)
    
    LSTMonly =[]
    for key in LSTM_count:
        if key not in RF_count:
            LSTMonly.append(key)
    
    ##I have this set up to look at the SLTM/RF only values too but I may save those for another time. 
    return result, Rfonly, LSTMonly;
#ultimately these composites would look at the average across the # of shared dates, ignoring the count of them. 

## Europe first. 

In [14]:
##open EUROPE indice files
infile = open("./Europe/posXtest_eur14.p", 'rb') 
posXtest = pickle.load(infile)
infile.close()

infile = open("./Europe/posXtest_eur14_eof.p", 'rb') 
pos_LSTM = pickle.load(infile)
infile.close()

infile = open("./Europe/FposXtest_eur14.p", 'rb') 
FposXtest = pickle.load(infile)
infile.close()

infile = open("./Europe/FposXtest_eur14_eof.p", 'rb') 
Fpos_LSTM = pickle.load(infile)
infile.close()

infile = open("./Europe/negXtest_eur14.p", 'rb') 
negXtest = pickle.load(infile)
infile.close()

infile = open("./Europe/negXtest_eur14_eof.p", 'rb') 
neg_LSTM = pickle.load(infile)
infile.close()

infile = open("./Europe/FnegXtest_eur14.p", 'rb') 
FnegXtest = pickle.load(infile)
infile.close()

infile = open("./Europe/FnegXtest_eur14_eof.p", 'rb') 
Fneg_LSTM = pickle.load(infile)
infile.close()

In [15]:
print("Overview and Comparison of High Confidence events from Europe 14 between RF Ellipse and RF EOF")
print("Positive and Correct Events")
print(compare_dicts_values(posXtest, pos_LSTM),"% relative to LSTM events")
print("__________________________________")
print("Negative and Correct Events")
print(compare_dicts_values(negXtest, neg_LSTM),"% relative to LSTM events")
print("__________________________________")
print("Positive and Incorrect Events")
print(compare_dicts_values(FposXtest, Fpos_LSTM),"% relative to LSTM events")
print("__________________________________")
print("Negative and Inorrect Events")
print(compare_dicts_values(FnegXtest, Fneg_LSTM),"% relative to LSTM events")

Overview and Comparison of High Confidence events from Europe 14 between RF Ellipse and RF EOF
Positive and Correct Events
Total number of RF events:  11494
Total number of LSTM events:  10604
Total number of overlap:  7295
68.79 % relative to LSTM events
__________________________________
Negative and Correct Events
Total number of RF events:  9876
Total number of LSTM events:  8304
Total number of overlap:  1285
15.47 % relative to LSTM events
__________________________________
Positive and Incorrect Events
Total number of RF events:  3618
Total number of LSTM events:  5196
Total number of overlap:  1175
22.61 % relative to LSTM events
__________________________________
Negative and Inorrect Events
Total number of RF events:  2002
Total number of LSTM events:  2895
Total number of overlap:  750
25.91 % relative to LSTM events


In [26]:
RF_count = collections.Counter() ##RF counter dictionary
for day in posXtest:
    RF_count[day] += 1
LSTM_count = collections.Counter() ##LSTM counter dictionary
for day in pos_LSTM:
    LSTM_count[day] += 1
count, RF, EOF = compare_keys(posXtest, pos_LSTM)
print("Positive and Correct Events")
print("RF Ellipse total unique indices: "+str(len(RF_count)))
print("RF EOF total unique indices: "+str(len(LSTM_count)))
print("There are "+str(len(count))+" shared indices. The RF Ellipse model has "+str(len(RF))+" unique events and the RF EOF model has "+str(len(EOF))+".")
print("__________________________________")

RF_count = collections.Counter() ##RF counter dictionary
for day in negXtest:
    RF_count[day] += 1
LSTM_count = collections.Counter() ##LSTM counter dictionary
for day in neg_LSTM:
    LSTM_count[day] += 1
count, RF, EOF = compare_keys(negXtest, neg_LSTM)
print("Negative and Correct Events")
print("RF Ellipse total unique indices: "+str(len(RF_count)))
print("RF EOF total unique indices: "+str(len(LSTM_count)))
print("There are "+str(len(count))+" shared indices. The RF Ellipse model has "+str(len(RF))+" unique events and the RF EOF model has "+str(len(EOF))+".")

Positive and Correct Events
RF Ellipse total unique indices: 226
RF EOF total unique indices: 146
There are 123 shared indices. The RF Ellipse model has 103 unique events and the RF EOF model has 23.
__________________________________
Negative and Correct Events
RF Ellipse total unique indices: 222
RF EOF total unique indices: 174
There are 75 shared indices. The RF Ellipse model has 147 unique events and the RF EOF model has 99.


In [7]:
#save shared keys for composite plots. 
pos_count= compare_keys(posXtest, pos_LSTM)
neg_count = compare_keys(negXtest, neg_LSTM)
Fpos_count = compare_keys(FposXtest, Fpos_LSTM)
Fneg_count = compare_keys(FnegXtest, Fneg_LSTM)

pickle.dump(pos_count, open("./shared_keys/sharedPOS_eur14_eof.p", 'wb'))
pickle.dump(Fpos_count, open("./shared_keys/sharedFPOS_eur14_eof.p", 'wb'))
pickle.dump(neg_count, open("./shared_keys/sharedNEG_eur14_eof.p", 'wb'))
pickle.dump(Fneg_count, open("./shared_keys/sharedFNEG_eur14_eof.p", 'wb'))

## Now Canada.

In [8]:
##open CANADA indice files
infile = open("./Canada/posXtest_can14.p", 'rb') 
posXtest = pickle.load(infile)
infile.close()

infile = open("./RF_indices/posXtest_can14.p", 'rb') 
pos_LSTM = pickle.load(infile)
infile.close()

infile = open("./Canada/FposXtest_can14.p", 'rb') 
FposXtest = pickle.load(infile)
infile.close()

infile = open("./RF_indices/FposXtest_can14.p", 'rb') 
Fpos_LSTM = pickle.load(infile)
infile.close()

infile = open("./Canada/negXtest_can14.p", 'rb') 
negXtest = pickle.load(infile)
infile.close()

infile = open("./RF_indices/negXtest_can14.p", 'rb') 
neg_LSTM = pickle.load(infile)
infile.close()

infile = open("./Canada/FnegXtest_can14.p", 'rb') 
FnegXtest = pickle.load(infile)
infile.close()

infile = open("./RF_indices/FnegXtest_can14.p", 'rb') 
Fneg_LSTM = pickle.load(infile)
infile.close()

In [9]:
print("Overview and Comparison of High Confidence events from Canada 14 between RF and LSTM")
print("Positive and Correct Events")
print(compare_dicts_values(posXtest, pos_LSTM),"% relative to LSTM events")
print("__________________________________")
print("Negative and Correct Events")
print(compare_dicts_values(negXtest, neg_LSTM),"% relative to LSTM events")
print("__________________________________")
print("Positive and Incorrect Events")
print(compare_dicts_values(FposXtest, Fpos_LSTM),"% relative to LSTM events")
print("__________________________________")
print("Negative and Inorrect Events")
print(compare_dicts_values(FnegXtest, Fneg_LSTM),"% relative to LSTM events")

Overview and Comparison of High Confidence events from Canada 14 between RF and LSTM
Positive and Correct Events
Total number of RF events:  12534
Total number of LSTM events:  11774
Total number of overlap:  7614
64.67 % relative to LSTM events
__________________________________
Negative and Correct Events
Total number of RF events:  9742
Total number of LSTM events:  12426
Total number of overlap:  2825
22.73 % relative to LSTM events
__________________________________
Positive and Incorrect Events
Total number of RF events:  3755
Total number of LSTM events:  1074
Total number of overlap:  596
55.49 % relative to LSTM events
__________________________________
Negative and Inorrect Events
Total number of RF events:  965
Total number of LSTM events:  1726
Total number of overlap:  626
36.27 % relative to LSTM events


In [10]:
#save shared keys for composite plots. 
pos_count= compare_keys(posXtest, pos_LSTM)
neg_count = compare_keys(negXtest, neg_LSTM)
Fpos_count = compare_keys(FposXtest, Fpos_LSTM)
Fneg_count = compare_keys(FnegXtest, Fneg_LSTM)

pickle.dump(pos_count, open("./shared_keys/sharedPOS_can14.p", 'wb'))
pickle.dump(Fpos_count, open("./shared_keys/sharedFPOS_can14.p", 'wb'))
pickle.dump(neg_count, open("./shared_keys/sharedNEG_can14.p", 'wb'))
pickle.dump(Fneg_count, open("./shared_keys/sharedFNEG_can14.p", 'wb'))

## Last, SE US.

In [11]:
##open SEUS indice files
infile = open("./SEUS/posXtest_seus14.p", 'rb') 
posXtest = pickle.load(infile)
infile.close()

infile = open("./RF_indices/posXtest_seus14.p", 'rb') 
pos_LSTM = pickle.load(infile)
infile.close()

infile = open("./SEUS/FposXtest_seus14.p", 'rb') 
FposXtest = pickle.load(infile)
infile.close()

infile = open("./RF_indices/FposXtest_seus14.p", 'rb') 
Fpos_LSTM = pickle.load(infile)
infile.close()

infile = open("./SEUS/negXtest_seus14.p", 'rb') 
negXtest = pickle.load(infile)
infile.close()

infile = open("./RF_indices/negXtest_seus14.p", 'rb') 
neg_LSTM = pickle.load(infile)
infile.close()

infile = open("./SEUS/FnegXtest_seus14.p", 'rb') 
FnegXtest = pickle.load(infile)
infile.close()

infile = open("./RF_indices/FnegXtest_seus14.p", 'rb') 
Fneg_LSTM = pickle.load(infile)
infile.close()

In [12]:
print("Overview and Comparison of High Confidence events from SE US 14 between RF and LSTM")
print("Positive and Correct Events")
print(compare_dicts_values(posXtest, pos_LSTM),"% relative to LSTM events")
print("__________________________________")
print("Negative and Correct Events")
print(compare_dicts_values(negXtest, neg_LSTM),"% relative to LSTM events")
print("__________________________________")
print("Positive and Incorrect Events")
print(compare_dicts_values(FposXtest, Fpos_LSTM),"% relative to LSTM events")
print("__________________________________")
print("Negative and Inorrect Events")
print(compare_dicts_values(FnegXtest, Fneg_LSTM),"% relative to LSTM events")

Overview and Comparison of High Confidence events from SE US 14 between RF and LSTM
Positive and Correct Events
Total number of RF events:  8608
Total number of LSTM events:  7907
Total number of overlap:  3472
43.91 % relative to LSTM events
__________________________________
Negative and Correct Events
Total number of RF events:  10276
Total number of LSTM events:  11290
Total number of overlap:  2463
21.82 % relative to LSTM events
__________________________________
Positive and Incorrect Events
Total number of RF events:  3222
Total number of LSTM events:  2210
Total number of overlap:  234
10.59 % relative to LSTM events
__________________________________
Negative and Inorrect Events
Total number of RF events:  4891
Total number of LSTM events:  5593
Total number of overlap:  826
14.77 % relative to LSTM events


In [13]:
#save shared keys for composite plots. 
pos_count= compare_keys(posXtest, pos_LSTM)
neg_count = compare_keys(negXtest, neg_LSTM)
Fpos_count = compare_keys(FposXtest, Fpos_LSTM)
Fneg_count = compare_keys(FnegXtest, Fneg_LSTM)

pickle.dump(pos_count, open("./shared_keys/sharedPOS_seus14.p", 'wb'))
pickle.dump(Fpos_count, open("./shared_keys/sharedFPOS_seus14.p", 'wb'))
pickle.dump(neg_count, open("./shared_keys/sharedNEG_seus14.p", 'wb'))
pickle.dump(Fneg_count, open("./shared_keys/sharedFNEG_seus14.p", 'wb'))