### File for comparing lists from RF and LSTM models for 90th confidence predictions ...

7/25/2025

This time just for the EOF LSTM and RF. 

In [1]:
#relevant import statements
import numpy as np
import math
import pandas as pd
import xarray as xr 
import pickle 
import matplotlib.pyplot as plt

import collections

In [2]:
#because of how i modified the RF models, these dates match. SO. The mismatch of array index issue I had is largely circumvented. 
ranges_RF = np.array([x for x in range(0,135*10,1)])
ranges_RF = ranges_RF.reshape(10,135) 
print("RF dates shape:",ranges_RF.shape)

ranges_LSTM = np.array([x for x in range(0,135*10,1)])
ranges_LSTM = ranges_LSTM.reshape(10,135) 
print("LSTM dates shape:",ranges_LSTM.shape)

RF dates shape: (10, 135)
LSTM dates shape: (10, 135)


In [3]:
#flatten ranges to allow mapping of the rep. arrays. 
#flat_RF_map = corr_RFindices.flatten()      

flat_RF_map = ranges_LSTM.flatten() #because of the changes, these are the same...
flat_LSTM_map = ranges_LSTM.flatten() 
#both should be the same shape, covering the same dates

### Definition statement for calculating overlap. 

In [8]:
#definition to compile and compare counts across lists/dictionaries
def compare_dicts_values(list1, list2):
    RF_count = collections.Counter() ##RF counter dictionary
    for day in list1:
        RF_count[day] += 1
    LSTM_count = collections.Counter() ##LSTM counter dictionary
    for day in list2:
        LSTM_count[day] += 1
        
    result = {}
    #begin comparing RF and LSTM dictionaries. 
    for key in RF_count:
        if key in LSTM_count: #this next part is basically like "which count is smaller so that we save the overlap in the dict"
            if RF_count[key] >= LSTM_count[key]:
                result[key] = abs(LSTM_count[key])
            if RF_count[key] < LSTM_count[key]:
                result[key] = abs(RF_count[key])
    total_sum = sum(result.values()) #look at total count for all events
    print("Total number of RF events: ",len(list1)) 
    print("Total number of LSTM events: ",len(list2)) 
    print("Total number of overlap: ",total_sum)  
    percentage = round(total_sum/len(list2) *100,2)
    return percentage;

#_________________________________________________________________
##this instance is just to save the keys so that I can look at composites ... for those dates. 
def compare_keys(list1, list2):
    RF_count = collections.Counter() ##RF counter dictionary
    for day in list1:
        RF_count[day] += 1
    LSTM_count = collections.Counter() ##LSTM counter dictionary
    for day in list2:
        LSTM_count[day] += 1
        
    result = []
    Rfonly = []
    #begin comparing RF and LSTM dictionaries. 
    for key in RF_count:
        if key in LSTM_count:
            #save the key to look at for composites
            result.append(key)
        if key not in LSTM_count:
            Rfonly.append(key)
    
    LSTMonly =[]
    for key in LSTM_count:
        if key not in RF_count:
            LSTMonly.append(key)
    
    ##I have this set up to look at the SLTM/RF only values too but I may save those for another time. 
    return result,Rfonly,LSTMonly ;
#ultimately these composites would look at the average across the # of shared dates, ignoring the count of them. 

## Europe first. 

In [9]:
##open EUROPE indice files
infile = open("./RF_indices/posXtest_eur14.p", 'rb') 
pos_LSTM = pickle.load(infile)
infile.close()

infile = open("./Europe/posXtest_eur14_eof.p", 'rb') 
posXtest = pickle.load(infile)
infile.close()

infile = open("./RF_indices/FposXtest_eur14.p", 'rb') 
Fpos_LSTM = pickle.load(infile)
infile.close()

infile = open("./Europe/FposXtest_eur14_eof.p", 'rb') 
FposXtest = pickle.load(infile)
infile.close()

infile = open("./RF_indices/negXtest_eur14.p", 'rb') 
neg_LSTM = pickle.load(infile)
infile.close()

infile = open("./Europe/negXtest_eur14_eof.p", 'rb') 
negXtest = pickle.load(infile)
infile.close()

infile = open("./RF_indices/FnegXtest_eur14.p", 'rb') 
Fneg_LSTM = pickle.load(infile)
infile.close()

infile = open("./Europe/FnegXtest_eur14_eof.p", 'rb') 
FnegXtest = pickle.load(infile)
infile.close()

In [10]:
print("Overview and Comparison of High Confidence events from Europe 14 between RF Ellipse and RF EOF")
print("Positive and Correct Events")
print(compare_dicts_values(posXtest, pos_LSTM),"% relative to RF events")
print("__________________________________")
print("Negative and Correct Events")
print(compare_dicts_values(negXtest, neg_LSTM),"% relative to RF events")
print("__________________________________")
print("Positive and Incorrect Events")
print(compare_dicts_values(FposXtest, Fpos_LSTM),"% relative to RF events")
print("__________________________________")
print("Negative and Inorrect Events")
print(compare_dicts_values(FnegXtest, Fneg_LSTM),"% relative to RF events")

Overview and Comparison of High Confidence events from Europe 14 between RF Ellipse and RF EOF
Positive and Correct Events
Total number of RF events:  10604
Total number of LSTM events:  9172
Total number of overlap:  6751
73.6 % relative to RF events
__________________________________
Negative and Correct Events
Total number of RF events:  8304
Total number of LSTM events:  9661
Total number of overlap:  2588
26.79 % relative to RF events
__________________________________
Positive and Incorrect Events
Total number of RF events:  5196
Total number of LSTM events:  3839
Total number of overlap:  1247
32.48 % relative to RF events
__________________________________
Negative and Inorrect Events
Total number of RF events:  2895
Total number of LSTM events:  4328
Total number of overlap:  1508
34.84 % relative to RF events


In [17]:
RF_count = collections.Counter() ##RF counter dictionary
for day in posXtest:
    RF_count[day] += 1
LSTM_count = collections.Counter() ##LSTM counter dictionary
for day in pos_LSTM:
    LSTM_count[day] += 1
count, RF, EOF = compare_keys(posXtest, pos_LSTM)
print("Positive and Correct Events")
print("RF EOF total unique indices: "+str(len(RF_count)))
print("LSTM total unique indices: "+str(len(LSTM_count)))
print("There are "+str(len(count))+" shared indices. The RF EOF model has "+str(len(RF))+" unique events and the LSTM model has "+str(len(EOF))+".")
print("__________________________________")

RF_count = collections.Counter() ##RF counter dictionary
for day in negXtest:
    RF_count[day] += 1
LSTM_count = collections.Counter() ##LSTM counter dictionary
for day in neg_LSTM:
    LSTM_count[day] += 1
count, RF, EOF = compare_keys(negXtest, neg_LSTM)
print("Negative and Correct Events")
print("RF EOF total unique indices: "+str(len(RF_count)))
print("LSTM total unique indices: "+str(len(LSTM_count)))
print("There are "+str(len(count))+" shared indices. The RF EOF model has "+str(len(RF))+" unique events and the LSTM model has "+str(len(EOF))+".")

Positive and Correct Events
RF EOF total unique indices: 146
LSTM total unique indices: 201
There are 119 shared indices. The RF EOF model has 27 unique events and the LSTM model has 82.
__________________________________
Negative and Correct Events
RF EOF total unique indices: 174
LSTM total unique indices: 516
There are 142 shared indices. The RF EOF model has 32 unique events and the LSTM model has 374.
