In [12]:
import xarray as xr

fn = "irrad.nc"
dsn = xr.open_dataset(fn)

In [13]:
print(dsn)

<xarray.Dataset> Size: 866MB
Dimensions:       (time: 15458791)
Coordinates:
  * time          (time) datetime64[ns] 124MB 2012-01-01T00:00:00.378000128 ....
Data variables:
    a_counts      (time) float64 124MB ...
    b_counts      (time) float64 124MB ...
    a_flux        (time) float32 62MB ...
    b_flux        (time) float32 62MB ...
    a_flags       (time) float32 62MB ...
    b_flags       (time) float32 62MB ...
    a_swpc_flags  (time) float64 124MB ...
    b_swpc_flags  (time) float64 124MB ...
Attributes: (12/44)
    Conventions:                   ACDD-1.3, Spase v2.2.6
    title:                         GOES 1-15 L2 XRS high-resolution Irradiances
    summary:                       The X-ray flux product consists of reproce...
    keywords:                      NumericalData.MeasurementType.Irradiance
    keywords_vocabulary:           SPASE: Space Physics Archive Search and Ex...
    naming_authority:              gov.nesdis.noaa
    ...                            ...


In [14]:
for var in dsn.variables.values():
    print(var)

<xarray.Variable (time: 15458791)> Size: 124MB
[15458791 values with dtype=float64]
Attributes:
    long_name:            XRS-A channel counts.
    valid_min:            0
    valid_max:            5000000
    ancillary_variables:  a_flags a_swpc_flags
<xarray.Variable (time: 15458791)> Size: 124MB
[15458791 values with dtype=float64]
Attributes:
    long_name:            XRS-B channel counts.
    valid_min:            0
    valid_max:            5000000
    ancillary_variables:  b_flags b_swpc_flags
<xarray.Variable (time: 15458791)> Size: 62MB
[15458791 values with dtype=float32]
Attributes:
    long_name:            XRS-A channel irradiance.
    units:                W/m2
    valid_min:            -5e-07
    valid_max:            0.2
    ancillary_variables:  a_flags a_swpc_flags
<xarray.Variable (time: 15458791)> Size: 62MB
[15458791 values with dtype=float32]
Attributes:
    long_name:            XRS-B channel irradiance.
    units:                W/m2
    valid_min:            -5

In [15]:
file = 'flsum.nc'
data = xr.open_dataset(file) # read the data using xarray
print(type(data)) # print the type of the data
print(data.variables.keys()) # print the variables in the data

<class 'xarray.core.dataset.Dataset'>
KeysView(Frozen({'xrsb_flux': <xarray.Variable (time: 11063)> Size: 44kB
[11063 values with dtype=float32]
Attributes:
    long_name:  Averaged flux for XRS-B.
    units:      W/m2
    valid_min:  1e-11
    valid_max:  0.1, 'status': <xarray.Variable (time: 11063)> Size: 89kB
[11063 values with dtype=object]
Attributes:
    long_name:      Flag for xrsb_flux.
    comments:       EVENT_START=start of flare, EVENT_PEAK=peak of flare, EVE...
    flag_meanings:  EVENT_START EVENT_PEAK EVENT_END POST_EVENT, 'background_flux': <xarray.Variable (time: 11063)> Size: 44kB
[11063 values with dtype=float32]
Attributes:
    long_name:  Background flux.
    comments:   Recorded at EVENT_START.
    units:      W/m2
    valid_min:  1e-09
    valid_max:  0.1, 'flare_class': <xarray.Variable (time: 11063)> Size: 89kB
[11063 values with dtype=object]
Attributes:
    long_name:  Flare class.
    comments:   Recorded at EVENT_PEAK., 'integrated_flux': <xarray.Variable

In [16]:
import xarray as xr
import pandas as pd
import numpy as np

# ---------- CONFIG ----------
WINDOW_IN_MIN = 180  # 3 hours history
WINDOW_OUT_MIN = 60  # predict next 1 hour
IRRAD_PATH = "irrad.nc"
FLSUM_PATH = "flsum.nc"
# -----------------------------

# Load high-cadence irradiances (2–3 s)
irrad_ds = xr.open_dataset(IRRAD_PATH)
irrad_df = irrad_ds[['a_flux', 'b_flux']].to_dataframe().reset_index()

# Tidy up
irrad_df.rename(columns={
    'time': 'timestamp',
    'a_flux': 'short_xray',   # 0.05–0.4 nm
    'b_flux': 'long_xray'     # 0.1–0.8 nm
}, inplace=True)

irrad_df['timestamp'] = pd.to_datetime(irrad_df['timestamp'])
irrad_df.set_index('timestamp', inplace=True)

print("Raw irrad shape:", irrad_df.shape)
irrad_df.head()


Raw irrad shape: (15458791, 2)


Unnamed: 0_level_0,short_xray,long_xray
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-01-01 00:00:00.378000128,6.651697e-10,6.362675e-07
2012-01-01 00:00:02.428000000,-6.51022e-10,6.362675e-07
2012-01-01 00:00:04.474999808,6.651697e-10,6.362675e-07
2012-01-01 00:00:06.525000192,6.651697e-10,6.325039e-07
2012-01-01 00:00:08.571000064,-6.51022e-10,6.362675e-07


In [17]:
# Resample to 1-min cadence
irrad_1m = irrad_df.resample('1min').mean().dropna()

print("1-minute irrad shape:", irrad_1m.shape)
irrad_1m.head()

1-minute irrad shape: (501498, 2)


Unnamed: 0_level_0,short_xray,long_xray
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-01-01 00:00:00,1.083194e-10,6.280166e-07
2012-01-01 00:01:00,1.43673e-09,6.184879e-07
2012-01-01 00:02:00,1.572888e-09,6.270533e-07
2012-01-01 00:03:00,8.406619e-10,6.175751e-07
2012-01-01 00:04:00,-7.871798e-10,6.121287e-07


In [18]:
# Load flare summary
flsum_ds = xr.open_dataset(FLSUM_PATH)
flsum_df = flsum_ds[['flare_class']].to_dataframe().reset_index()

flsum_df.rename(columns={'time': 'flare_time'}, inplace=True)
flsum_df['flare_time'] = pd.to_datetime(flsum_df['flare_time'])

# class like 'M2.1' → just the letter 'M'
flsum_df['class_letter'] = flsum_df['flare_class'].astype(str).str[0]

# Keep only proper classes
valid_classes = ['A', 'B', 'C', 'M', 'X']
flsum_df = flsum_df[flsum_df['class_letter'].isin(valid_classes)].copy()

flsum_df.sort_values('flare_time', inplace=True)
flsum_df.reset_index(drop=True, inplace=True)

print("Number of flare events:", len(flsum_df))
flsum_df.head()


Number of flare events: 3141


Unnamed: 0,flare_time,flare_class,class_letter
0,2012-01-01 03:55:00,C1.6,C
1,2012-01-01 04:50:00,C1.1,C
2,2012-01-01 07:34:00,C4.6,C
3,2012-01-01 13:26:00,C1.1,C
4,2012-01-01 18:59:00,C1.2,C


In [19]:
# ranking of flare classes
flare_order = {"A": 0, "B": 1, "C": 2, "M": 3, "X": 4}

fl_times = flsum_df['flare_time'].values
fl_classes = flsum_df['class_letter'].values
n_events = len(fl_times)

def make_future_labels(index, horizon_min=60):
    labels = []
    j = 0  # pointer into flare list

    for t in index.values:
        # advance pointer past any flares before t
        while j < n_events and fl_times[j] < t:
            j += 1

        # scan flares within [t, t + horizon]
        h_end = t + np.timedelta64(horizon_min, 'm')
        k = j
        best_class = "A"
        best_score = flare_order[best_class]

        while k < n_events and fl_times[k] <= h_end:
            c = fl_classes[k]
            score = flare_order[c]
            if score > best_score:
                best_class, best_score = c, score
            k += 1

        labels.append(best_class)

    return np.array(labels)

labels = make_future_labels(irrad_1m.index, horizon_min=WINDOW_OUT_MIN)
print("Labels shape:", labels.shape)
pd.Series(labels).value_counts()


Labels shape: (501498,)


Unnamed: 0,count
A,348176
C,102292
B,39801
M,10619
X,610


In [20]:
WINDOW_IN = WINDOW_IN_MIN  # in minutes

data = irrad_1m[['short_xray', 'long_xray']].values

X_list, y_list = [], []
for i in range(WINDOW_IN, len(irrad_1m)):
    x_window = data[i-WINDOW_IN:i, :]        # 180 x 2
    X_list.append(x_window)
    y_list.append(labels[i])

X = np.stack(X_list)    # shape: (N_samples, 180, 2)
y = np.array(y_list)    # shape: (N_samples,)

print("X shape:", X.shape)
print("y shape:", y.shape)
pd.Series(y).value_counts()


X shape: (501318, 180, 2)
y shape: (501318,)


Unnamed: 0,count
A,348001
C,102287
B,39801
M,10619
X,610


In [23]:
np.save("X_2012.npy", X)
np.save("y_2012.npy", y)