This Notebook serves as a starting point for formatting and inputting data sets necessary for the ACE_RNN

In [40]:
import sys
import os
import numpy as np
from astropy.table import Table, vstack
from astropy.io import misc, ascii
from pathlib import Path
import h5py
import requests
import zipfile
import io

In [7]:
#Format data around h5py library and astropy table
ARC_DIR = Path("/proj/sot/ska/data/arc3")
ACE_H5_FILE = h5py.File(ARC_DIR.joinpath("ACE.h5"))
GOES_H5_FILE = h5py.File(ARC_DIR.joinpath("GOES_X.h5"))

In [8]:
ACE_TABLE = misc.hdf5.read_table_hdf5(ACE_H5_FILE)
GOES_TABLE = misc.hdf5.read_table_hdf5(GOES_H5_FILE)

In [9]:
#Data Directories for set fetched online
DATA_DIR = Path("/data/mta4/ACE_RNN/Data")
CELIAS_PATH = DATA_DIR.joinpath("CELIAS.h5")

In [10]:
#Web Links for fetching Data
#mtof = f"https://l1.umd.edu/data/{this_year}_CELIAS_Proton_Monitor_5min.zip"
CELIAS_LINK = f"https://l1.umd.edu/data"

In [11]:
#
#-- Function to fetch CELIAS Data
#
def pull_celias(start,stop):
    for year in range(start,stop+1):
        fetch_link = f"{CELIAS_LINK}/{year}_CELIAS_Proton_Monitor_5min.zip"
        r = requests.get(fetch_link, stream=True)
        z = zipfile.ZipFile(io.BytesIO(r.content))
        z.extractall(DATA_DIR)

Pulling only since 2013 even though 1996 is available, in order to match with current ACE and GOES timeframes. Can be adjusted. Note that this data fetch was already run and is here for reference

In [12]:
#pull_celias(2013,2024)

In [56]:
CELIAS_UNITS = [None, None, None, None, 'km/s', 'cm-3', 'km/s', 'deg',
                'km/s', 'Re', 'Re', 'Re', 'Mkm', 'deg','deg','#']

CELIAS_COLNAMES = ['YY',
 'MON',
 'DY',
 'DOY:HH:MM:SS',
 'SPEED',
 'Np',
 'Vth',
 'N/S',
 'V_He',
 'GSE_X',
 'GSE_Y',
 'GSE_Z',
 'RANGE',
 'HGLAT',
 'HGLONG',
 'CRN(E)']

CELIAS_DTYPE = [np.dtype('int64'),
 np.dtype('<U3'),
 np.dtype('int64'),
 np.dtype('<U12'),
 np.dtype('int64'),
 np.dtype('float64'),
 np.dtype('int64'),
 np.dtype('float64'),
 np.dtype('int64'),
 np.dtype('float64'),
 np.dtype('float64'),
 np.dtype('float64'),
 np.dtype('float64'),
 np.dtype('float64'),
 np.dtype('float64'),
 np.dtype('int64')]

In [65]:
#
# -- Function converting CELIAS textfiles to Astropy fits tables
#
def convert_celias(start,stop):
    celias_table = Table(names = CELIAS_COLNAMES, units = CELIAS_UNITS, dtype = CELIAS_DTYPE)
    for year in range(start,stop+1):
        subtable = ascii.read(DATA_DIR.joinpath(f"{year}_CELIAS_Proton_Monitor_5min.txt"),
                         header_start=20)
        celias_table = vstack([celias_table, subtable])
    return celias_table

Using all text formatting. Note that this data fetch was already run and is here for reference.

In [70]:
#CELIAS_TABLE = convert_celias(2013,2024)

In [69]:
CELIAS_TABLE

YY,MON,DY,DOY:HH:MM:SS,SPEED,Np,Vth,N/S,V_He,GSE_X,GSE_Y,GSE_Z,RANGE,HGLAT,HGLONG,CRN(E)
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,km / s,1 / cm3,km / s,deg,km / s,Re,Re,Re,Mkm,deg,deg,#
int64,str3,int64,str12,int64,float64,int64,float64,int64,float64,float64,float64,float64,float64,float64,int64
13,Jan,1,001:00:00:09,352,4.23,18,-1.6,354,257.2,2.9,-19.3,145.5,-3.1,327.0,2132
13,Jan,1,001:00:04:57,353,4.0,18,-1.2,354,257.2,2.9,-19.3,145.5,-3.1,327.0,2132
13,Jan,1,001:00:09:57,354,3.86,20,-1.6,357,257.2,2.9,-19.3,145.5,-3.1,327.0,2132
13,Jan,1,001:00:15:00,355,3.78,20,-1.5,358,257.2,2.9,-19.3,145.5,-3.1,326.9,2132
13,Jan,1,001:00:20:02,355,3.74,20,-1.5,358,257.2,2.8,-19.3,145.5,-3.1,326.8,2132
13,Jan,1,001:00:25:04,356,3.68,20,-1.2,358,257.2,2.8,-19.3,145.5,-3.1,326.8,2132
13,Jan,1,001:00:30:05,355,3.69,20,-1.2,358,257.2,2.8,-19.3,145.5,-3.1,326.7,2132
13,Jan,1,001:00:35:11,355,3.84,20,-1.3,357,257.2,2.8,-19.3,145.5,-3.1,326.7,2132
13,Jan,1,001:00:40:11,360,3.66,20,-1.3,363,257.2,2.8,-19.3,145.5,-3.1,326.6,2132
13,Jan,1,001:00:45:12,356,3.75,20,-0.9,358,257.2,2.8,-19.3,145.5,-3.1,326.6,2132


Saving astropy table to a .h5 file for better storage methods.

In [77]:
#misc.hdf5.write_table_hdf5(CELIAS_TABLE, str(DATA_DIR.joinpath('CELIUS.h5')), 
#                           serialize_meta=True, overwrite=True)

Pulling from the CELIUS_TABLE from the saved h5 file

In [None]:
CELIAS_TABLE = misc.hdf5.read_table_hdf5(str(DATA_DIR.joinpath('CELIUS.h5')))