This Notebook serves as a starting point for formatting and inputting data sets necessary for the ACE_RNN

In [1]:
import sys
import os
import numpy as np
from astropy.table import Table, vstack
from astropy.io import misc, ascii
from pathlib import Path
import h5py
import requests
import zipfile
import io

In [2]:
#Format data around h5py library and astropy table
ARC_DIR = Path("/proj/sot/ska/data/arc3")
ACE_H5_FILE = h5py.File(ARC_DIR.joinpath("ACE.h5"))
GOES_H5_FILE = h5py.File(ARC_DIR.joinpath("GOES_X.h5"))

In [3]:
ACE_TABLE = misc.hdf5.read_table_hdf5(ACE_H5_FILE)
GOES_TABLE = misc.hdf5.read_table_hdf5(GOES_H5_FILE)

In [4]:
#Data Directories for set fetched online
DATA_DIR = Path("/data/mta4/ACE_RNN/Data")
CELIAS_PATH = DATA_DIR.joinpath("CELIAS.h5")

In [5]:
#Web Links for fetching Data
#mtof = f"https://l1.umd.edu/data/{this_year}_CELIAS_Proton_Monitor_5min.zip"
CELIAS_LINK = f"https://l1.umd.edu/data"

In [6]:
#
#-- Function to fetch CELIAS Data
#
def pull_celias(start,stop):
    for year in range(start,stop+1):
        fetch_link = f"{CELIAS_LINK}/{year}_CELIAS_Proton_Monitor_5min.zip"
        r = requests.get(fetch_link, stream=True)
        z = zipfile.ZipFile(io.BytesIO(r.content))
        z.extractall(DATA_DIR.joinpath("CELIAS_txt"))

Pulling only since 2013 even though 1996 is available, in order to match with current ACE and GOES timeframes. Can be adjusted. Note that this data fetch was already run and is here for reference

In [7]:
#pull_celias(2013,2024)

In [8]:
CELIAS_UNITS = [None, None, None, None, 'km/s', 'cm-3', 'km/s', 'deg',
                'km/s', 'Re', 'Re', 'Re', 'Mkm', 'deg','deg','#']

CELIAS_COLNAMES = ['YY',
 'MON',
 'DY',
 'DOY:HH:MM:SS',
 'SPEED',
 'Np',
 'Vth',
 'N/S',
 'V_He',
 'GSE_X',
 'GSE_Y',
 'GSE_Z',
 'RANGE',
 'HGLAT',
 'HGLONG',
 'CRN(E)']

CELIAS_DTYPE = [np.dtype('int64'),
 np.dtype('<U3'),
 np.dtype('int64'),
 np.dtype('<U12'),
 np.dtype('int64'),
 np.dtype('float64'),
 np.dtype('int64'),
 np.dtype('float64'),
 np.dtype('int64'),
 np.dtype('float64'),
 np.dtype('float64'),
 np.dtype('float64'),
 np.dtype('float64'),
 np.dtype('float64'),
 np.dtype('float64'),
 np.dtype('int64')]

In [9]:
#
# -- Function converting CELIAS textfiles to Astropy fits tables
#
def convert_celias(start,stop):
    celias_table = Table(names = CELIAS_COLNAMES, units = CELIAS_UNITS, dtype = CELIAS_DTYPE)
    for year in range(start,stop+1):
        subtable = ascii.read(DATA_DIR.joinpath("CELIAS_txt", f"{year}_CELIAS_Proton_Monitor_5min.txt"),
                         header_start=20)
        celias_table = vstack([celias_table, subtable])
    return celias_table

Using all text formatting. Note that this data fetch was already run and is here for reference.

In [10]:
#CELIAS_TABLE = convert_celias(2013,2024)

Saving astropy table to a .h5 file for better storage methods.

In [11]:
#misc.hdf5.write_table_hdf5(CELIAS_TABLE, str(CELIAS_PATH), 
#                           serialize_meta=True, overwrite=True)

Pulling from the CELIUS_TABLE from the saved h5 file

In [12]:
CELIAS_TABLE = misc.hdf5.read_table_hdf5(str(CELIAS_PATH))

In [13]:
CELIAS_TABLE

YY,MON,DY,DOY:HH:MM:SS,SPEED,Np,Vth,N/S,V_He,GSE_X,GSE_Y,GSE_Z,RANGE,HGLAT,HGLONG,CRN(E)
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,km / s,1 / cm3,km / s,deg,km / s,Re,Re,Re,Mkm,deg,deg,#
int64,bytes3,int64,bytes12,int64,float64,int64,float64,int64,float64,float64,float64,float64,float64,float64,int64
13,Jan,1,001:00:00:09,352,4.23,18,-1.6,354,257.2,2.9,-19.3,145.5,-3.1,327.0,2132
13,Jan,1,001:00:04:57,353,4.0,18,-1.2,354,257.2,2.9,-19.3,145.5,-3.1,327.0,2132
13,Jan,1,001:00:09:57,354,3.86,20,-1.6,357,257.2,2.9,-19.3,145.5,-3.1,327.0,2132
13,Jan,1,001:00:15:00,355,3.78,20,-1.5,358,257.2,2.9,-19.3,145.5,-3.1,326.9,2132
13,Jan,1,001:00:20:02,355,3.74,20,-1.5,358,257.2,2.8,-19.3,145.5,-3.1,326.8,2132
13,Jan,1,001:00:25:04,356,3.68,20,-1.2,358,257.2,2.8,-19.3,145.5,-3.1,326.8,2132
13,Jan,1,001:00:30:05,355,3.69,20,-1.2,358,257.2,2.8,-19.3,145.5,-3.1,326.7,2132
13,Jan,1,001:00:35:11,355,3.84,20,-1.3,357,257.2,2.8,-19.3,145.5,-3.1,326.7,2132
13,Jan,1,001:00:40:11,360,3.66,20,-1.3,363,257.2,2.8,-19.3,145.5,-3.1,326.6,2132
13,Jan,1,001:00:45:12,356,3.75,20,-0.9,358,257.2,2.8,-19.3,145.5,-3.1,326.6,2132


In [14]:
ACE_TABLE

year,month,dom,hhmm,mjd,secs,destat,de1,de4,pstat,p1,p3,p5,p6,p7,anis_idx,time
int64,int64,int64,int64,int64,int64,int64,float64,float64,int64,float64,float64,float64,float64,float64,float64,float64
2012,1,8,5,55934,300,0,767.0,24.0,0,1970.0,19.0,2.59,0.831,0.126,-1.0,442368366.1839997
2012,1,8,10,55934,600,0,830.0,23.2,0,2050.0,18.3,2.94,0.792,0.223,-1.0,442368666.1840001
2012,1,8,15,55934,900,0,664.0,18.6,0,1990.0,19.0,2.66,0.728,0.221,-1.0,442368966.1839998
2012,1,8,20,55934,1200,0,644.0,25.7,0,2060.0,19.8,2.66,0.79,0.137,-1.0,442369266.18400013
2012,1,8,25,55934,1500,0,714.0,24.0,0,1950.0,20.5,2.8,0.862,0.169,-1.0,442369566.18399984
2012,1,8,30,55934,1800,0,878.0,27.2,0,2050.0,18.0,2.66,0.665,0.19,-1.0,442369866.1840002
2012,1,8,35,55934,2100,0,594.0,20.0,0,2070.0,18.0,2.61,0.563,0.152,-1.0,442370166.1839999
2012,1,8,40,55934,2400,0,685.0,21.1,0,1820.0,19.0,2.56,0.769,0.211,-1.0,442370466.18400025
2012,1,8,45,55934,2700,0,848.0,24.5,0,2000.0,18.1,3.02,0.686,0.105,-1.0,442370766.18399996
2012,1,8,50,55934,3000,0,726.0,26.7,0,1960.0,18.5,2.97,0.756,0.16,-1.0,442371066.1839997


In [15]:
GOES_TABLE

year,month,dom,hhmm,mjd,secs,short,long,ratio,time,satellite
int64,int64,int64,int64,int64,int64,float64,float64,float64,float64,int64
2013,5,3,5,56415,300,7.4e-09,8.33e-07,0.00889,483926767.1839997,15
2013,5,3,10,56415,600,6.83e-09,8.35e-07,0.00817,483927067.1840001,15
2013,5,3,15,56415,900,6.98e-09,8.36e-07,0.00835,483927367.1839998,15
2013,5,3,20,56415,1200,8.16e-09,8.23e-07,0.00991,483927667.18400013,15
2013,5,3,25,56415,1500,5.59e-09,8.12e-07,0.00688,483927967.18399984,15
2013,5,3,30,56415,1800,2.91e-08,9.94e-07,0.0251,483928267.1840002,15
2013,5,3,35,56415,2100,6.87e-08,1.6e-06,0.0419,483928567.1839999,15
2013,5,3,40,56415,2400,1.45e-08,1.02e-06,0.0141,483928867.18400025,15
2013,5,3,45,56415,2700,9.55e-09,8.92e-07,0.0107,483929167.18399996,15
2013,5,3,50,56415,3000,2.77e-08,1.11e-06,0.025,483929467.1839997,15
