### Create Input/Output datasets for testing in RF model.

12/1/2025

In [1]:
#relevant import statements
import numpy as np
import math
import pandas as pd
import xarray as xr 
import pickle 

In [2]:
#______________Open file___________________
def openfile(path1,path2):
    infile = open(f'../UFS_metrics/{path1}.p', 'rb') 
    actual = pickle.load(infile)
    infile.close()
    
    infile = open(f'../UFS_metrics/{path2}.p', 'rb') 
    forecast = pickle.load(infile)
    infile.close()
    return actual, forecast

In [3]:
#open ERA-5 and UFS data 
#zonal mean wind
w_actual, w_forecast = openfile("ERA5_wind10","UFS_wind10")
#size 
sz_actual, sz_forecast = openfile("ERA5_size10","UFS_size10")
#cenlat 
cenlat_actual, cenlat_forecast = openfile("ERA5_cenlat10","UFS_cenlat10")
#cenlon
cenlon_actual, cenlon_forecast = openfile("ERA5_cenlon10","UFS_cenlon10")
#ratio 
ratio_actual, ratio_forecast = openfile("ERA5_ratio10","UFS_ratio10")
#ephi
ephi_actual, ephi_forecast = openfile("ERA5_ephi10","UFS_ephi10")
#GPH
gph_actual, gph_forecast = openfile("ERA5_gph","UFS_GPHanoms100")

##TEMPS
eur_actual, eur_forecast = openfile("ERA5_eurtemp","UFS_EurTempAnoms")
can_actual, can_forecast = openfile("ERA5_novatemp","UFS_CanTempAnoms")
seus_actual, seus_forecast = openfile("ERA5_seustemp","UFS_SEUSTempAnoms")

Start with ellipse metrics. 

In [4]:
def normalize_metrics(data):
    data = data.reshape((7*8,36)) #reshape to combine year/forecast axis
    datafr = pd.DataFrame(data[:,0:18]) #turn into dataframe
    running_mean = datafr.apply(lambda x: x.rolling(window=7, min_periods=1).mean(), axis=1) #calculate runing window
    ##isolate arrays
    value = running_mean.values
    ##max min normalize arrays 
    value_maxmin = (value - value.min()) / (value.max() - value.min())

    return value_maxmin

In [5]:
#separated by prototype
prototype = ["Prototype5","Prototype6","Prototype7","Prototype8","ERA5"]
for i in range(0,5):
    input = np.empty((7*8,18,7))
    if i < 4:
        input[:,:,0] = normalize_metrics(w_forecast[:,:,i,:])
        input[:,:,1] = normalize_metrics(ratio_forecast[:,:,i,:])
        input[:,:,2] = normalize_metrics(cenlat_forecast[:,:,i,:])
        input[:,:,3] = normalize_metrics(cenlon_forecast[:,:,i,:])
        input[:,:,4] = normalize_metrics(sz_forecast[:,:,i,:])
        input[:,:,5] = normalize_metrics(ephi_forecast[:,:,i,:])
        input[:,:,6] = normalize_metrics(gph_forecast[:,:,i,:])

        #break
    else:
        input[:,:,0] = normalize_metrics(w_actual[:,:,:])
        input[:,:,1] = normalize_metrics(ratio_actual[:,:,:])
        input[:,:,2] = normalize_metrics(cenlat_actual[:,:,:])
        input[:,:,3] = normalize_metrics(cenlon_actual[:,:,:])
        input[:,:,4] = normalize_metrics(sz_actual[:,:,:])
        input[:,:,5] = normalize_metrics(ephi_actual[:,:,:])
        input[:,:,6] = normalize_metrics(gph_actual[:,:,:])
    
    pickle.dump(input, open(f'./data/input_{prototype[i]}.p', 'wb'))   

Now temperature data. 

In [16]:
def climo_two_class(data):
    print("Entering Climo Classification")
    new_len = int(8*18)
    #reshape the classified array.
    data_mod = data.reshape((7,new_len))
    ##create an empty list for days
    daily = np.empty((new_len,2)) ## (forecasts, days, categories)
    ##loop and add
    for i in range(0,new_len):
        day0 = []
        day1 = []
    
        for j in range(0,7):
            if data_mod[j,i] == 0:
                day0.append(1)
            if data_mod[j,i] == 1:
                day1.append(1)
        
            prob0 = round(len(day0)/7,2)
            #print(prob0)
            prob1 = round(len(day1)/7,2)
            #list = [prob0,prob1,prob2,prob3]
            list = [prob0,prob1]
        
            daily[i,:] = list
            
    ##now i need to reiterate this by year
    full = np.empty((7,new_len,2)) #sepcify empty for repeated list  
    for i in range(0,7):
        full[i] = daily #add
    fe = full.reshape((7*new_len,2)) #flatten, kinda
    
    return fe
    
#____________________________________________________________________________#
def temp_two_class(data, region, prototype):
    print("Conducting Anomaly Classification")
    #restrict to observed 18 days, 14-day lag
    data_mod = data[:,:,14:32]
    #specify new dimension
    new_len = int(7*8*18) 
    #flatten temp
    te1 = np.reshape(data_mod, new_len)
    #empty array
    st = np.empty((new_len))

    #check for NaNs
    if np.any(np.isnan(te1)) or np.any(np.isinf(te1)):
        print("NaN or Inf values found in te1!")

    #create empty arrays for neg and pos classification
    pos = []
    neg = []
    #print("Classifying ...")
    for i in range(0,new_len):
        if te1[i] > 0:
            st[i] = 1
            pos.append(1)
            
        elif te1[i] < 0:
            st[i] = 0
            neg.append(0)
            
        ## alternative in an attempt to make arrays ... even? lol  
        elif te1[i] == 0:
            if len(pos) <= len(neg):
                e_st[i] = 1
                pos.append(1)
            else:
                e_st[i] = 0
                neg.append(0)

    ##do climo calc
    climo = climo_two_class(st)
    print("Climo Classification Done")
    print("Shape of Anoms: ", st.shape)
    print("Shape of Climo: ", climo.shape)
    print("Saving files ...")
    pickle.dump(st, open(f'./data/2classtemps_{str(region)}_{str(prototype)}.p', 'wb'))
    pickle.dump(climo, open(f'./data/2classclimo_{str(region)}_{str(prototype)}.p', 'wb'))
    print("Done.")
    print("##############")

In [18]:
#separated by prototype
prototype = ["Prototype5","Prototype6","Prototype7","Prototype8","ERA5"]
region = ["Europe","Canada","SEUS"]
for i in range(0,5):
    if i < 4:
        #europe
        temp_two_class(eur_forecast[:,:,i,:],region[0],prototype[i])
        #canada
        temp_two_class(can_forecast[:,:,i,:],region[1],prototype[i])
        #SEUS
        temp_two_class(seus_forecast[:,:,i,:],region[2],prototype[i])
    else:
        #europe
        temp_two_class(eur_actual,region[0],prototype[i])
        #canada
        temp_two_class(can_actual,region[1],prototype[i])
        #SEUS
        temp_two_class(seus_actual,region[2],prototype[i])

Conducting Anomaly Classification
Entering Climo Classification
Climo Classification Done
Shape of Anoms:  (1008,)
Shape of Climo:  (1008, 2)
Saving files ...
Done.
##############
Conducting Anomaly Classification
Entering Climo Classification
Climo Classification Done
Shape of Anoms:  (1008,)
Shape of Climo:  (1008, 2)
Saving files ...
Done.
##############
Conducting Anomaly Classification
Entering Climo Classification
Climo Classification Done
Shape of Anoms:  (1008,)
Shape of Climo:  (1008, 2)
Saving files ...
Done.
##############
Conducting Anomaly Classification
Entering Climo Classification
Climo Classification Done
Shape of Anoms:  (1008,)
Shape of Climo:  (1008, 2)
Saving files ...
Done.
##############
Conducting Anomaly Classification
Entering Climo Classification
Climo Classification Done
Shape of Anoms:  (1008,)
Shape of Climo:  (1008, 2)
Saving files ...
Done.
##############
Conducting Anomaly Classification
Entering Climo Classification
Climo Classification Done
Shape of A