In [15]:
import pandas as pd
import numpy as np
import altair as alt
from spacepy.coordinates import Coords
from spacepy.time import Ticktock
from datetime import datetime, timedelta
from pytz import timezone
import pytz
import os
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [16]:
df = pd.read_csv("/cluster/work/igp_psr/dslab_FS25_data_and_weights/model_2025-05-10-18-26-55SA2024501.csv")
doy = 183
year = 2024
month = 7
day =  1

# MAE

In [17]:
target = np.array(df["target"])
pred = np.array(df["prediction"])
print("MAE",np.mean(np.abs(target-pred)))

MAE 5.81757419706036


### Recover input

In [18]:
# Undo sine / cosine transformation on second
angle = (np.array(df["sod_sin"]) >= 0)*np.arccos(np.array(df["sod_cos"])) + (np.array(df["sod_sin"]) < 0)*(2*np.pi-np.arccos(np.array(df["sod_cos"])))
sod =  (86400*angle) / (2 * np.pi)
df["sod"] = sod

In [19]:
# Undo sine / cosine transformation on latitude
angle = (np.array(df["sm_lon_ipp_sin"]) >= 0)*np.arccos(np.array(df["sm_lon_ipp_cos"])) + (np.array(df["sm_lon_ipp_sin"]) < 0)*(2*np.pi-np.arccos(np.array(df["sm_lon_ipp_cos"])))
angle *= 360 / (2*np.pi)
lons = angle

In [20]:
lats = df["sm_lat_ipp"]

### Convert back to GEO coordinates

In [21]:
# Converse solar magnetic coordinates to geo coordinates
def coord_transform(input_type, output_type, lats, lons, epochs):
    coords = np.array([[1 + 450 / 6371, lat, lon] for lat, lon in zip(lats, lons)], dtype=np.float64)
    geo_coords = Coords(coords, input_type, 'sph')
    geo_coords.ticks = Ticktock(epochs, 'UTC')
    return geo_coords.convert(output_type, 'sph')

date = datetime.strptime("2024-01-01", "%Y-%m-%d") + timedelta(days=doy - 1)
epochs = [date + timedelta(seconds=int(sod)) for sod in df["sod"]]
sm_coords = coord_transform('SM', 'GEO', lats, lons, epochs)
out_coords = sm_coords.data

### Get timezone / local hour / region

In [22]:
# Find area of prediction
from timezonefinder import TimezoneFinder
tf = TimezoneFinder()  # reuse
tz_list = []
query_points = zip(out_coords[:,1],out_coords[:,2])
for lat, lng in query_points:
    tz = tf.timezone_at(lng=lng, lat=lat)  # 'Europe/Berlin'
    tz_list.append(tz)
tz_list = np.array(tz_list)
df["timezone_name"] = tz_list

In [23]:
# Calculate local time
local_hour_list = []
for _ , row in df.iterrows():
    s = row["sod"]
    h = int(s/3600)
    global_time = datetime(year,month,day,h,0,0,tzinfo=pytz.utc)

    local_zone = row["timezone_name"]
    if local_zone == 'America/Coyhaique':
        local_zone = "America/Santiago"

    local_hour = global_time.astimezone(timezone(local_zone)).hour
    local_hour_list.append(local_hour)
    
df["local_hour"] = local_hour_list

In [24]:
region_list = np.array([s.split('/')[0] for s in tz_list])
df["region"] = region_list

### Error by region

In [25]:
region_error_list = []
region_sample_size = []
for current_region in np.unique(region_list):
    region_mask = current_region == region_list
    region_error_list.append(np.mean(np.abs(pred[region_mask]-target[region_mask])))
    region_sample_size.append(np.sum(region_mask))
    

In [26]:
plot_df = pd.DataFrame()
plot_df["region"] = np.unique(region_list)
plot_df["MAE"] = region_error_list
alt.Chart(plot_df).mark_bar().encode(
    x="region",
    y="MAE"
).properties(width=500,height=500)

### Error by local time

In [27]:
local_time_error_list = []
local_time_sample_size = []
for lt in np.unique(local_hour_list):
    local_time_mask = local_hour_list == lt
    local_time_error_list.append(np.mean(np.abs(pred[local_time_mask]-target[local_time_mask])))
    local_time_sample_size.append(np.sum(local_time_mask))

In [28]:
plot_df = pd.DataFrame()
plot_df["local time"] = np.unique(local_hour_list)
plot_df["MAE"] = local_time_error_list
alt.Chart(plot_df).mark_bar().encode(
    x="local time",
    y="MAE"
).properties(width=500,height=500)