In [1]:
from shapely.geometry.polygon import Polygon,Point
from shapely.geometry import MultiLineString
from shapely import points,linestrings,polygons,minimum_bounding_circle,union
from shapely import GeometryCollection, LineString, MultiPoint, Point, Polygon
import numpy as np
import cartopy.crs as ccrs
import cartopy.feature as cfeat
import matplotlib.pyplot as plt
import pandas as pd
import xarray as xr
import geopandas as gpd
import shapely as shp
import xmltodict
import json
import os
from datetime import datetime
import glob
from sklearn.metrics.pairwise import haversine_distances
from scipy.ndimage import gaussian_filter
import seaborn as sns
from pyproj import CRS
import xesmf as xe

In [2]:
df_storm=pd.read_csv('/users/baldwin/Downloads/StormEvents_details-ftp_v1.0_d2025_c20260116.csv.gz')

df_tornado=df_storm[df_storm.EVENT_TYPE=='Tornado'].copy()
df_tornado['begin_datetime']=pd.to_datetime(df_tornado.BEGIN_DATE_TIME,format='%d-%b-%y %H:%M:%S')
df_tornado['end_datetime']=pd.to_datetime(df_tornado.END_DATE_TIME,format='%d-%b-%y %H:%M:%S')
df_tornado['duration']=df_tornado.end_datetime-df_tornado.begin_datetime
df_torn_ast=df_tornado[df_tornado.CZ_TIMEZONE=='AST-4'].copy()
df_torn_est=df_tornado[df_tornado.CZ_TIMEZONE=='EST-5'].copy()
df_torn_cst=df_tornado[df_tornado.CZ_TIMEZONE=='CST-6'].copy()
df_torn_mst=df_tornado[df_tornado.CZ_TIMEZONE=='MST-7'].copy()
df_torn_pst=df_tornado[df_tornado.CZ_TIMEZONE=='PST-8'].copy()
df_torn_akst=df_tornado[df_tornado.CZ_TIMEZONE=='AKST-9'].copy()
df_torn_ast['begin_datetime']+=pd.Timedelta('4 hours')
df_torn_ast['end_datetime']+=pd.Timedelta('4 hours')
df_torn_est['begin_datetime']+=pd.Timedelta('5 hours')
df_torn_est['end_datetime']+=pd.Timedelta('5 hours')
df_torn_cst['begin_datetime']+=pd.Timedelta('6 hours')
df_torn_cst['end_datetime']+=pd.Timedelta('6 hours')
df_torn_mst['begin_datetime']+=pd.Timedelta('7 hours')
df_torn_mst['end_datetime']+=pd.Timedelta('7 hours')
df_torn_pst['begin_datetime']+=pd.Timedelta('8 hours')
df_torn_pst['end_datetime']+=pd.Timedelta('8 hours')
df_torn_akst['begin_datetime']+=pd.Timedelta('9 hours')
df_torn_akst['end_datetime']+=pd.Timedelta('9 hours')
df_torn_utc=pd.concat([df_torn_ast,df_torn_est,df_torn_cst,df_torn_pst,df_torn_akst])
df_torn_utc['begin_hour_top']=df_torn_utc.begin_datetime.dt.ceil('h')
df_torn_utc['end_hour_top']=df_torn_utc.end_datetime.dt.ceil('h')

df_multihour=df_torn_utc[df_torn_utc['begin_hour_top']!=df_torn_utc['end_hour_top']]
df_singlehour=df_torn_utc[df_torn_utc['begin_hour_top']==df_torn_utc['end_hour_top']]

all_dfs=[]
for i in np.arange(len(df_multihour)):
    selected_row=df_multihour.iloc[i]
    datetimes=pd.date_range(start=selected_row.begin_datetime,end=selected_row.end_datetime,freq='s')
    dftest=pd.DataFrame({'lat':np.full(len(datetimes),np.nan),'lon':np.full(len(datetimes),np.nan)},index=datetimes)
    hour_tops=datetimes.ceil('h').unique()
    dftest.loc[selected_row.begin_datetime,'lat']=selected_row.BEGIN_LAT
    dftest.loc[selected_row.begin_datetime,'lon']=selected_row.BEGIN_LON
    dftest.loc[selected_row.end_datetime,'lat']=selected_row.END_LAT
    dftest.loc[selected_row.end_datetime,'lon']=selected_row.END_LON
    df_interp=dftest.interpolate(method='time').reset_index(names='datetime')
    df_new=pd.DataFrame([selected_row]*len(hour_tops))
    row=0
    for hour in hour_tops:
        df_new.iloc[row,44]=df_interp[df_interp.datetime.dt.ceil('h')==hour].iloc[0].lat
        df_new.iloc[row,45]=df_interp[df_interp.datetime.dt.ceil('h')==hour].iloc[0].lon
        df_new.iloc[row,51]=df_interp[df_interp.datetime.dt.ceil('h')==hour].iloc[0].datetime
        df_new.iloc[row,46]=df_interp[df_interp.datetime.dt.ceil('h')==hour].iloc[-1].lat
        df_new.iloc[row,47]=df_interp[df_interp.datetime.dt.ceil('h')==hour].iloc[-1].lon
        df_new.iloc[row,52]=df_interp[df_interp.datetime.dt.ceil('h')==hour].iloc[-1].datetime
        row=row+1
        
    all_dfs.append(df_new)

df_split_hours=pd.concat(all_dfs,ignore_index=True)
df_torn_final=pd.concat([df_singlehour,df_split_hours],ignore_index=True)
df_torn_final['begin_hour_top']=df_torn_final.begin_datetime.dt.ceil('h')
df_torn_final['end_hour_top']=df_torn_final.end_datetime.dt.ceil('h')
df_torn_final.loc[df_torn_final.TOR_F_SCALE=='EFU','MAGNITUDE']=-1.
df_torn_final.loc[df_torn_final.TOR_F_SCALE=='EF0','MAGNITUDE']=0.
df_torn_final.loc[df_torn_final.TOR_F_SCALE=='EF1','MAGNITUDE']=1.
df_torn_final.loc[df_torn_final.TOR_F_SCALE=='EF2','MAGNITUDE']=2.
df_torn_final.loc[df_torn_final.TOR_F_SCALE=='EF3','MAGNITUDE']=3.
df_torn_final.loc[df_torn_final.TOR_F_SCALE=='EF4','MAGNITUDE']=4.
df_torn_final.loc[df_torn_final.TOR_F_SCALE=='EF5','MAGNITUDE']=5.
#df_ef1_ef5=df_torn_final[~((df_torn_final.TOR_F_SCALE=='EFU')|(df_torn_final.TOR_F_SCALE=='EF0'))]
df_torn=df_torn_final.filter(items=['MAGNITUDE','end_hour_top','BEGIN_LON','BEGIN_LAT','END_LON','END_LAT'])
df_torn.rename(columns={'MAGNITUDE':'mag','end_hour_top':'datetime','BEGIN_LON':'slon','BEGIN_LAT':'slat','END_LON':'elon','END_LAT':'elat'},inplace=True)
df_ef1_ef5=df_torn[df_torn.mag>=1.].copy()

#df_torn_final.to_csv('/users/baldwin/datasets/torn_seg_hour_2025.csv')

In [3]:
df_wind=df_storm[df_storm.EVENT_TYPE=='Thunderstorm Wind'].copy()
df_wind['begin_datetime']=pd.to_datetime(df_wind.BEGIN_DATE_TIME,format='%d-%b-%y %H:%M:%S')
df_wind['end_datetime']=pd.to_datetime(df_wind.END_DATE_TIME,format='%d-%b-%y %H:%M:%S')
df_wind['duration']=df_wind.end_datetime-df_wind.begin_datetime
df_wind_ast=df_wind[df_wind.CZ_TIMEZONE=='AST-4'].copy()
df_wind_est=df_wind[df_wind.CZ_TIMEZONE=='EST-5'].copy()
df_wind_cst=df_wind[df_wind.CZ_TIMEZONE=='CST-6'].copy()
df_wind_mst=df_wind[df_wind.CZ_TIMEZONE=='MST-7'].copy()
df_wind_pst=df_wind[df_wind.CZ_TIMEZONE=='PST-8'].copy()
df_wind_akst=df_wind[df_wind.CZ_TIMEZONE=='AKST-9'].copy()
df_wind_ast['begin_datetime']+=pd.Timedelta('4 hours')
df_wind_ast['end_datetime']+=pd.Timedelta('4 hours')
df_wind_est['begin_datetime']+=pd.Timedelta('5 hours')
df_wind_est['end_datetime']+=pd.Timedelta('5 hours')
df_wind_cst['begin_datetime']+=pd.Timedelta('6 hours')
df_wind_cst['end_datetime']+=pd.Timedelta('6 hours')
df_wind_mst['begin_datetime']+=pd.Timedelta('7 hours')
df_wind_mst['end_datetime']+=pd.Timedelta('7 hours')
df_wind_pst['begin_datetime']+=pd.Timedelta('8 hours')
df_wind_pst['end_datetime']+=pd.Timedelta('8 hours')
df_wind_akst['begin_datetime']+=pd.Timedelta('9 hours')
df_wind_akst['end_datetime']+=pd.Timedelta('9 hours')
df_wind_utc=pd.concat([df_wind_ast,df_wind_est,df_wind_cst,df_wind_pst,df_wind_akst])
df_wind_utc['begin_hour_top']=df_wind_utc.begin_datetime.dt.ceil('h')
df_wind_utc['end_hour_top']=df_wind_utc.end_datetime.dt.ceil('h')

df_multihour=df_wind_utc[df_wind_utc['begin_hour_top']!=df_wind_utc['end_hour_top']]
df_singlehour=df_wind_utc[df_wind_utc['begin_hour_top']==df_wind_utc['end_hour_top']]

all_dfs=[]
for i in np.arange(len(df_multihour)):
    selected_row=df_multihour.iloc[i]
    datetimes=pd.date_range(start=selected_row.begin_datetime,end=selected_row.end_datetime,freq='s')
    dftest=pd.DataFrame({'lat':np.full(len(datetimes),np.nan),'lon':np.full(len(datetimes),np.nan)},index=datetimes)
    hour_tops=datetimes.ceil('h').unique()
    dftest.loc[selected_row.begin_datetime,'lat']=selected_row.BEGIN_LAT
    dftest.loc[selected_row.begin_datetime,'lon']=selected_row.BEGIN_LON
    dftest.loc[selected_row.end_datetime,'lat']=selected_row.END_LAT
    dftest.loc[selected_row.end_datetime,'lon']=selected_row.END_LON
    df_interp=dftest.interpolate(method='time').reset_index(names='datetime')
    df_new=pd.DataFrame([selected_row]*len(hour_tops))
    row=0
    for hour in hour_tops:
        df_new.iloc[row,44]=df_interp[df_interp.datetime.dt.ceil('h')==hour].iloc[0].lat
        df_new.iloc[row,45]=df_interp[df_interp.datetime.dt.ceil('h')==hour].iloc[0].lon
        df_new.iloc[row,51]=df_interp[df_interp.datetime.dt.ceil('h')==hour].iloc[0].datetime
        df_new.iloc[row,46]=df_interp[df_interp.datetime.dt.ceil('h')==hour].iloc[-1].lat
        df_new.iloc[row,47]=df_interp[df_interp.datetime.dt.ceil('h')==hour].iloc[-1].lon
        df_new.iloc[row,52]=df_interp[df_interp.datetime.dt.ceil('h')==hour].iloc[-1].datetime
        row=row+1
        
    all_dfs.append(df_new)

df_split_hours=pd.concat(all_dfs,ignore_index=True)
df_wind_final=pd.concat([df_singlehour,df_split_hours],ignore_index=True)
df_wind_final['begin_hour_top']=df_wind_final.begin_datetime.dt.ceil('h')
df_wind_final['end_hour_top']=df_wind_final.end_datetime.dt.ceil('h')
df_wind_svr=df_wind_final[df_wind_final.MAGNITUDE>=50.]
df_wind_measvr=df_wind_svr[((df_wind_svr.MAGNITUDE_TYPE=='MG')|(df_wind_svr.MAGNITUDE_TYPE=='MS'))].filter(items=['MAGNITUDE','end_hour_top','BEGIN_LON','BEGIN_LAT','END_LON','END_LAT'])
df_wind=df_wind_svr.filter(items=['MAGNITUDE','end_hour_top','BEGIN_LON','BEGIN_LAT','END_LON','END_LAT'])
df_wind.rename(columns={'MAGNITUDE':'mag','end_hour_top':'datetime','BEGIN_LON':'slon','BEGIN_LAT':'slat','END_LON':'elon','END_LAT':'elat'},inplace=True)
df_wind_measvr.rename(columns={'MAGNITUDE':'mag','end_hour_top':'datetime','BEGIN_LON':'slon','BEGIN_LAT':'slat','END_LON':'elon','END_LAT':'elat'},inplace=True)

In [4]:
df_hail=df_storm[df_storm.EVENT_TYPE=='Hail'].copy()
df_hail['begin_datetime']=pd.to_datetime(df_hail.BEGIN_DATE_TIME,format='%d-%b-%y %H:%M:%S')
df_hail['end_datetime']=pd.to_datetime(df_hail.END_DATE_TIME,format='%d-%b-%y %H:%M:%S')
df_hail['duration']=df_hail.end_datetime-df_hail.begin_datetime
df_hail_ast=df_hail[df_hail.CZ_TIMEZONE=='AST-4'].copy()
df_hail_est=df_hail[df_hail.CZ_TIMEZONE=='EST-5'].copy()
df_hail_cst=df_hail[df_hail.CZ_TIMEZONE=='CST-6'].copy()
df_hail_mst=df_hail[df_hail.CZ_TIMEZONE=='MST-7'].copy()
df_hail_pst=df_hail[df_hail.CZ_TIMEZONE=='PST-8'].copy()
df_hail_akst=df_hail[df_hail.CZ_TIMEZONE=='AKST-9'].copy()
df_hail_ast['begin_datetime']+=pd.Timedelta('4 hours')
df_hail_ast['end_datetime']+=pd.Timedelta('4 hours')
df_hail_est['begin_datetime']+=pd.Timedelta('5 hours')
df_hail_est['end_datetime']+=pd.Timedelta('5 hours')
df_hail_cst['begin_datetime']+=pd.Timedelta('6 hours')
df_hail_cst['end_datetime']+=pd.Timedelta('6 hours')
df_hail_mst['begin_datetime']+=pd.Timedelta('7 hours')
df_hail_mst['end_datetime']+=pd.Timedelta('7 hours')
df_hail_pst['begin_datetime']+=pd.Timedelta('8 hours')
df_hail_pst['end_datetime']+=pd.Timedelta('8 hours')
df_hail_akst['begin_datetime']+=pd.Timedelta('9 hours')
df_hail_akst['end_datetime']+=pd.Timedelta('9 hours')
df_hail_utc=pd.concat([df_hail_ast,df_hail_est,df_hail_cst,df_hail_pst,df_hail_akst])
df_hail_utc['begin_hour_top']=df_hail_utc.begin_datetime.dt.ceil('h')
df_hail_utc['end_hour_top']=df_hail_utc.end_datetime.dt.ceil('h')

df_multihour=df_hail_utc[df_hail_utc['begin_hour_top']!=df_hail_utc['end_hour_top']]
df_singlehour=df_hail_utc[df_hail_utc['begin_hour_top']==df_hail_utc['end_hour_top']]

all_dfs=[]
for i in np.arange(len(df_multihour)):
    selected_row=df_multihour.iloc[i]
    datetimes=pd.date_range(start=selected_row.begin_datetime,end=selected_row.end_datetime,freq='s')
    dftest=pd.DataFrame({'lat':np.full(len(datetimes),np.nan),'lon':np.full(len(datetimes),np.nan)},index=datetimes)
    hour_tops=datetimes.ceil('h').unique()
    dftest.loc[selected_row.begin_datetime,'lat']=selected_row.BEGIN_LAT
    dftest.loc[selected_row.begin_datetime,'lon']=selected_row.BEGIN_LON
    dftest.loc[selected_row.end_datetime,'lat']=selected_row.END_LAT
    dftest.loc[selected_row.end_datetime,'lon']=selected_row.END_LON
    df_interp=dftest.interpolate(method='time').reset_index(names='datetime')
    df_new=pd.DataFrame([selected_row]*len(hour_tops))
    row=0
    for hour in hour_tops:
        df_new.iloc[row,44]=df_interp[df_interp.datetime.dt.ceil('h')==hour].iloc[0].lat
        df_new.iloc[row,45]=df_interp[df_interp.datetime.dt.ceil('h')==hour].iloc[0].lon
        df_new.iloc[row,51]=df_interp[df_interp.datetime.dt.ceil('h')==hour].iloc[0].datetime
        df_new.iloc[row,46]=df_interp[df_interp.datetime.dt.ceil('h')==hour].iloc[-1].lat
        df_new.iloc[row,47]=df_interp[df_interp.datetime.dt.ceil('h')==hour].iloc[-1].lon
        df_new.iloc[row,52]=df_interp[df_interp.datetime.dt.ceil('h')==hour].iloc[-1].datetime
        row=row+1
        
    all_dfs.append(df_new)

df_split_hours=pd.concat(all_dfs,ignore_index=True)
df_hail_final=pd.concat([df_singlehour,df_split_hours],ignore_index=True)
df_hail_final['begin_hour_top']=df_hail_final.begin_datetime.dt.ceil('h')
df_hail_final['end_hour_top']=df_hail_final.end_datetime.dt.ceil('h')
df_hail_svr=df_hail_final[df_hail_final.MAGNITUDE>=1.]
df_hail=df_hail_svr.filter(items=['MAGNITUDE','end_hour_top','BEGIN_LON','BEGIN_LAT','END_LON','END_LAT'])
df_hail.rename(columns={'MAGNITUDE':'mag','end_hour_top':'datetime','BEGIN_LON':'slon','BEGIN_LAT':'slat','END_LON':'elon','END_LAT':'elat'},inplace=True)

In [85]:
# MEB 23 Feb 2026
# verify outlooks using polygons
# add stats needed to calculate MSE
# do regional verf use bukvosky regions
# use projection lcc with precision = 1.
# polygon-based area add buffer around obs

areaempty=shp.Polygon()




DATES = pd.date_range(start='2025-01-01 12:00:00',end='2025-11-01 12:00:00',inclusive='left', freq='1D')

#DATES = pd.date_range(start='2025-03-15 12:00:00',end='2025-03-15 12:00:00',inclusive='left', freq='1D')

#outlook=gpd.read_file('/spcarch4/ckarstens/closed-outlooks/geojson/2019/wind_day1_1630_20190720164956.lyr.geojson')
#sigoutlook=gpd.read_file('/spcarch4/ckarstens/closed-outlooks/geojson/2019/sigwind_day1_1630_20190720164956.lyr.geojson')

usa=gpd.read_file('/users/baldwin/datasets/usa_outline.geojson').set_crs(epsg=4326)
region_bukovsky=gpd.read_file('/users/baldwin/datasets/bukovsky_regions_groups.geojson').set_crs(epsg=4326)
regions=region_bukovsky.copy()
regions['geometry']=region_bukovsky.set_precision(0.0001).to_crs(crs=CRS.from_proj4("+proj=lcc +lat_1=25 +lat_2=25 +lon_0=-95")).set_precision(1.)

#regions=gpd.read_file('/users/baldwin/datasets/bukovsky_cwa_regions_groups.geojson')

ranges=['day1_1630']

#flds=['wind']
#obstype=['all','svr','meas','sigall','sigmeas']

flds=['torn','hail','wind']
#flds=['torn']

#flds=['torn','wind','hail']

for fld in flds:
    print(fld)
    # prob levels areas intersections unions
    #pd.DataFrame({'OUTLOOK':prod_lvl,'VALID':valid_lvl,'EXPIRE':expire_lvl,'ISSUE':issue_lvl,'LABEL':label,
    #'FAREA':farea,'XAREA':xarea,'SIGAREA':sigarea,'INTERSECT':intersect,'UNION':union})
    fld_lvl=[]
    prod_lvl=[]
    cig_lvl=[]
    valid_lvl=[]
    expire_lvl=[]
    issue_lvl=[]
    obs_type=[]
    region_lvl=[]
    region_name=[]
    label=[]
    farea=[]
    xarea=[]
    sigarea=[]
    intersect=[]
    union=[]
    domain=[]
    aa=[]
    bb=[]
    cc=[]
    dd=[]
    rel_farea=[]
    rel_intersect=[]
    fld_lvl_mse=[]
    prod_lvl_mse=[]
    valid_lvl_mse=[]
    expire_lvl_mse=[]
    issue_lvl_mse=[]
    obs_type_mse=[]
    region_lvl_mse=[]
    region_name_mse=[]
    xarea_mse=[]
    domain_mse=[]
    e_f=[]
    e_f2=[]
    e_fx=[]
    e_x=[]
    

    if fld=='torn':
        lvls=[0,2,5,10,15,30,45,60]
        nlvls=8
        obstype=['svr','sigall']
    elif fld=='hail':
        lvls=[0,5,15,30,45,60]
        nlvls=6
        obstype=['svr','sigall']
    elif fld=='wind':
        lvls=[0,5,15,30,45,60]
        nlvls=6
        obstype=['all','meas','svr','sigall','sigmeas']

        
    for date in DATES:
        if date.is_leap_year:
            jjj=date.strftime('%j')
        else:
            if date.month>=3:
                jjj=(date+pd.to_timedelta('1 day')).strftime('%j')
            else:
                jjj=date.strftime('%j')
        print(date,jjj,datetime.now())
        for range in ranges:
            if len(aa)<len(domain):
                print(' oops ',date,range,fld)
                sys.exit()
            ffld=f'{fld}_{range}'
            if fld=='hail':
                sigthr=2.0
                svrthr=1.0
            if fld=='wind':
                sigthr=64.
                svrthr=50.
            if fld=='torn':
                sigthr=2.
                svrthr=-10.
    
            day1=date.strftime('%Y%m%d')        
            yy=date.strftime('%Y')  
            valid_time=date
            expire_time=date+pd.to_timedelta('1 day')
            issue_time=date-pd.to_timedelta('12 hours')
            dn_df1=[]
            label_df1=[]
            geo_df1=[]
            valid_df1=[]
            expire_df1=[]
            
            for ii in np.arange(nlvls):
                lvl=lvls[ii]
                dn_df1.append(lvl)
                label_df1.append(str(lvl*0.01))
                geo_df1.append(shp.Polygon())
                valid_df1.append(valid_time)
                expire_df1.append(expire_time)
                    
            df1=pd.DataFrame({'DN':dn_df1,'VALID':valid_df1,'EXPIRE':expire_df1,'LABEL':label_df1})
            prodll = gpd.GeoDataFrame(df1, geometry=geo_df1).set_crs(epsg=4326)
            try:
                jsonfile1=glob.glob(f'/NAWIPS/archive/OUTLOOK/{yy}/geojson/{ffld}_{day1}*.lyr.geojson')[-1]
                outlook=gpd.read_file(jsonfile1)
                for ii in np.arange(1,nlvls):
                    lvl=lvls[ii]
                    try:
                        fcstarea=outlook[outlook.DN==lvl].geometry.iloc[0]
                    except:
                        fcstarea=shp.Polygon()
                    prodll.loc[ii,'geometry']=fcstarea
                prodproj=prodll.copy()
                prodproj['geometry']=prodll.to_crs(crs=CRS.from_proj4("+proj=lcc +lat_1=25 +lat_2=25 +lon_0=-95")).set_precision(1.)
                prodproj['mlp']=prodproj.DN.to_numpy()*0.01
                prodproj['geometry']=prodproj['geometry'].fillna(shp.Polygon())
            except:
                #print(f'/NAWIPS/archive/OUTLOOK/{yy}/geojson/{ffld}_{day1}*.lyr.geojson file not found')
                print(jsonfile1,'some kind of error')

                continue

            verfdomain_full=shp.set_precision(usa.to_crs(crs=CRS.from_proj4("+proj=lcc +lat_1=25 +lat_2=25 +lon_0=-95")).geometry,grid_size=1.).iloc[0]
            fcstarea_nonzero=shp.unary_union(prodproj.geometry.iloc[1:]).buffer(10000.).buffer(-10000.).simplify(tolerance=1000, preserve_topology=True)
            prodproj.loc[0,'geometry']=shp.difference(verfdomain_full,fcstarea_nonzero)
            prodproj.loc[0,'LABEL']='0.00'
            prodproj.loc[0,'mlp']=0.
            for obs1 in obstype:
                if obs1 == 'all':
                    if fld=='hail':
                        df_today=df_hail[(df_hail.datetime>=valid_time)&(df_hail.datetime<=expire_time)]
                    if fld=='wind':
                        df_today=df_wind[(df_wind.datetime>=valid_time)&(df_wind.datetime<=expire_time)]
                    if fld=='torn':
                        df_today=df_torn[(df_torn.datetime>=valid_time)&(df_torn.datetime<=expire_time)]
                elif obs1 == 'meas':
                    if fld=='hail':
                        df_today=df_hail[(df_hail.datetime>=valid_time)&(df_hail.datetime<=expire_time)]
                    if fld=='wind':
                        df_today=df_wind_measvr[(df_wind_measvr.datetime>=valid_time)&(df_wind_measvr.datetime<=expire_time)]
                    if fld=='torn':
                        df_today=df_torn[(df_torn.datetime>=valid_time)&(df_torn.datetime<=expire_time)]
                elif obs1 == 'svr':
                    if fld=='hail':
                        df_today=df_hail[(df_hail.mag>=svrthr)&(df_hail.datetime>=valid_time)&(df_hail.datetime<=expire_time)]
                    if fld=='wind':
                        df_today=df_wind[(df_wind.mag>=svrthr)&(df_wind.datetime>=valid_time)&(df_wind.datetime<=expire_time)]
                    if fld=='torn':
                        df_today=df_torn[(df_torn.mag>=svrthr)&(df_torn.datetime>=valid_time)&(df_torn.datetime<=expire_time)]
                elif obs1 == 'sigall':
                    if fld=='hail':
                        df_today=df_hail[(df_hail.mag>=sigthr)&(df_hail.datetime>=valid_time)&(df_hail.datetime<=expire_time)]
                    if fld=='wind':
                        df_today=df_wind[(df_wind.mag>=sigthr)&(df_wind.datetime>=valid_time)&(df_wind.datetime<=expire_time)]
                    if fld=='torn':
                        df_today=df_torn[(df_torn.mag>=sigthr)&(df_torn.datetime>=valid_time)&(df_torn.datetime<=expire_time)]
                elif obs1 == 'sigmeas':
                    if fld=='hail':
                        df_today=df_hail[(df_hail.mag>=sigthr)&(df_hail.datetime>=valid_time)&(df_hail.datetime<=expire_time)]
                    if fld=='wind':
                        df_today=df_wind_measvr[(df_wind_measvr.mag>=sigthr)&(df_wind_measvr.datetime>=valid_time)&(df_wind_measvr.datetime<=expire_time)]
                    if fld=='torn':
                        df_today=df_torn[(df_torn.mag>=sigthr)&(df_torn.datetime>=valid_time)&(df_torn.datetime<=expire_time)]
                df1=df_today.copy()
                if df1.shape[0]>0:
                    if fld=='torn':
                        startlatlon = gpd.GeoDataFrame(df1, geometry=gpd.points_from_xy(df1.slon, df1.slat)).set_crs(epsg=4326)
                        endlatlon = gpd.GeoDataFrame(df1, geometry=gpd.points_from_xy(df1.elon, df1.elat)).set_crs(epsg=4326)
                        gdfstart=startlatlon.to_crs(crs=CRS.from_proj4("+proj=lcc +lat_1=25 +lat_2=25 +lon_0=-95"))
                        gdfend=endlatlon.to_crs(crs=CRS.from_proj4("+proj=lcc +lat_1=25 +lat_2=25 +lon_0=-95"))
                        gdf=gpd.GeoDataFrame(df1, geometry=gdfstart.geometry.shortest_line(gdfend.geometry))
                    else:
                        gdflatlon = gpd.GeoDataFrame(df1, geometry=gpd.points_from_xy(df1.slon, df1.slat)).set_crs(epsg=4326)
                        gdf = gdflatlon.to_crs(crs=CRS.from_proj4("+proj=lcc +lat_1=25 +lat_2=25 +lon_0=-95"))
                    obsarea_full=shp.set_precision(shp.union_all(gdf.buffer(40000.)),grid_size=1.)
                else:
                    #print(f'/NAWIPS/archive/OUTLOOK/{yy}/geojson/{ffld}_{day1}*.lyr.geojson {df_today.shape[0]} reports')
                    obsarea_full=areaempty

                for ireg in np.arange(len(regions)):
                    obsarea=shp.intersection(regions.geometry.iloc[ireg],obsarea_full)
                    verfdomain=shp.intersection(regions.geometry.iloc[ireg],verfdomain_full)
                    prodproj['fxinter']=shp.intersection(prodproj.geometry,obsarea).area
                    prodproj['domain']=shp.intersection(prodproj.geometry,verfdomain).area
                    sums_mse=prodproj.sum(numeric_only=True)
                    fld_lvl_mse.append(fld)            
                    obs_type_mse.append(obs1)            
                    prod_lvl_mse.append(range)            
                    valid_lvl_mse.append(valid_time)
                    expire_lvl_mse.append(expire_time)
                    issue_lvl_mse.append(issue_time)
                    region_lvl_mse.append(regions.region.iloc[ireg])
                    region_name_mse.append(regions.names.iloc[ireg])
                    xarea_mse.append(sums_mse.fxinter*1.e-6)
                    domain_mse.append(sums_mse.domain*1.e-6)
                    e_f.append(np.sum(prodproj.mlp*prodproj.domain)/sums_mse.domain)
                    e_fx.append(np.sum(prodproj.mlp*prodproj.fxinter)/sums_mse.domain)
                    e_f2.append(np.sum(prodproj.mlp*prodproj.mlp*prodproj.domain)/sums_mse.domain)
                    e_x.append(sums_mse.fxinter/sums_mse.domain)

                for ii in np.arange(nlvls):
                    try:
                        fcstarea_full=shp.unary_union(prodproj.geometry.iloc[ii:]).buffer(10000.).buffer(-10000.).simplify(tolerance=1000, preserve_topology=True)
                        fcstarea2_full=prodproj.geometry.iloc[ii]

                    except:
                        fcstarea_full=areaempty
                        fcstarea2_full=areaempty

                    for ireg in np.arange(len(regions)):
                        fcstarea=shp.intersection(regions.geometry.iloc[ireg],fcstarea_full)
                        fcstarea2=shp.intersection(regions.geometry.iloc[ireg],fcstarea2_full)
                        obsarea=shp.intersection(regions.geometry.iloc[ireg],obsarea_full)
                        verfdomain=shp.intersection(regions.geometry.iloc[ireg],verfdomain_full)
                        intersect1=shp.intersection(fcstarea,obsarea).area*1.e-6
                        farea1=fcstarea.area*1.e-6
                        xarea1=obsarea.area*1.e-6
                        union1=shp.union(fcstarea,obsarea).area*1.e-6
                        domainarea=verfdomain.area*1.e-6
                        fld_lvl.append(fld)            
                        obs_type.append(obs1)            
                        prod_lvl.append(range)            
                        valid_lvl.append(valid_time)
                        expire_lvl.append(expire_time)
                        issue_lvl.append(issue_time)
                        label.append(prodproj.LABEL.iloc[ii])
                        region_lvl.append(regions.region.iloc[ireg])
                        region_name.append(regions.names.iloc[ireg])
                        farea.append(farea1)
                        xarea.append(xarea1)
                        intersect.append(intersect1)
                        union.append(union1)
                        domain.append(domainarea)
                        aa.append(intersect1)
                        bb.append(farea1-intersect1)
                        cc.append(xarea1-intersect1)
                        dd.append(domainarea-union1)
                        rel_farea.append(fcstarea2.area*1.e-6)
                        rel_intersect.append(shp.intersection(fcstarea2,obsarea).area*1.e-6)

    
    df_lvls_outlook=pd.DataFrame({'HAZARD':fld_lvl,'OBS_TYPE':obs_type,'OUTLOOK':prod_lvl,'VALID':valid_lvl,'EXPIRE':expire_lvl,'ISSUE':issue_lvl,'LABEL':label,'REGION':region_lvl,'REGION_NAME':region_name,'FAREA':farea,'XAREA':xarea,'INTERSECT':intersect,'UNION':union,'DOMAIN':domain,'aa':aa,'bb':bb,'cc':cc,'dd':dd,'rel_farea':rel_farea,'rel_intersect':rel_intersect})
    df_lvls_outlook.to_csv(f'/users/baldwin/datasets/{fld}_outlooks/outlookday1_area_verf_full_regions.csv')
    df_mse_outlook=pd.DataFrame({'HAZARD':fld_lvl_mse,'OBS_TYPE':obs_type_mse,'OUTLOOK':prod_lvl_mse,'VALID':valid_lvl_mse,'EXPIRE':expire_lvl_mse,'ISSUE':issue_lvl_mse,'REGION':region_lvl_mse,'REGION_NAME':region_name_mse,'XAREA':xarea_mse,'DOMAIN':domain_mse,'E_F':e_f,'E_F2':e_f2,'E_X':e_x,'E_FX':e_fx})
    df_mse_outlook.to_csv(f'/users/baldwin/datasets/{fld}_outlooks/outlookday1_area_verf_mse_regions.csv')

torn
2025-01-01 12:00:00 001 2026-02-23 20:51:31.993484
2025-01-02 12:00:00 002 2026-02-23 20:51:32.348376
2025-01-03 12:00:00 003 2026-02-23 20:51:32.692217
2025-01-04 12:00:00 004 2026-02-23 20:51:33.049681
2025-01-05 12:00:00 005 2026-02-23 20:51:33.401260
2025-01-06 12:00:00 006 2026-02-23 20:51:33.849492
2025-01-07 12:00:00 007 2026-02-23 20:51:34.207186
2025-01-08 12:00:00 008 2026-02-23 20:51:34.551444
2025-01-09 12:00:00 009 2026-02-23 20:51:34.895891
2025-01-10 12:00:00 010 2026-02-23 20:51:35.246879
2025-01-11 12:00:00 011 2026-02-23 20:51:35.619303
2025-01-12 12:00:00 012 2026-02-23 20:51:35.978254
2025-01-13 12:00:00 013 2026-02-23 20:51:36.371425
2025-01-14 12:00:00 014 2026-02-23 20:51:36.751971
2025-01-15 12:00:00 015 2026-02-23 20:51:37.169345
2025-01-16 12:00:00 016 2026-02-23 20:51:37.522858
2025-01-17 12:00:00 017 2026-02-23 20:51:37.883914
2025-01-18 12:00:00 018 2026-02-23 20:51:38.261875
2025-01-19 12:00:00 019 2026-02-23 20:51:38.620866
2025-01-20 12:00:00 020 20