This Notebook serves as a starting point for formatting and inputting data sets necessary for the ACE_RNN

In [69]:
import sys
import os
import numpy as np
from astropy.table import Table
from astropy.io import misc, ascii
from pathlib import Path
import h5py
import requests
import zipfile
import io

In [16]:
#Format data around h5py library and astropy table
ARC_DIR = Path("/proj/sot/ska/data/arc3")
ACE_H5_FILE = h5py.File(ARC_DIR.joinpath("ACE.h5"))
GOES_H5_FILE = h5py.File(ARC_DIR.joinpath("GOES_X.h5"))

In [20]:
ACE_TABLE = misc.hdf5.read_table_hdf5(ACE_H5_FILE)
GOES_TABLE = misc.hdf5.read_table_hdf5(GOES_H5_FILE)

In [44]:
#Data Directories for set fetched online
DATA_DIR = Path("/data/mta4/ACE_RNN/Data")
CELIAS_PATH = DATA_DIR.joinpath("CELIAS.h5")

In [50]:
#Web Links for fetching Data
#mtof = f"https://l1.umd.edu/data/{this_year}_CELIAS_Proton_Monitor_5min.zip"
CELIAS_LINK = f"https://l1.umd.edu/data"

In [61]:
#
#-- Method to fetch CELIAS Data
#
def pull_celias(start,stop):
    for year in range(start,stop+1):
        fetch_link = f"{CELIAS_LINK}/{year}_CELIAS_Proton_Monitor_5min.zip"
        r = requests.get(fetch_link, stream=True)
        z = zipfile.ZipFile(io.BytesIO(r.content))
        z.extractall(DATA_DIR)

Pulling onyl since 2013 even thoguh 1996 is available int order to match with current ACE and GOES timeframes. Can be adjusted. Note that this data fetch was already run and is here for reference

In [64]:
#pull_celias(2013,2024)

In [74]:
#
# -- CELIAS textfiles to Astropy fits tables
#

In [112]:
CELIAS_2024 = ascii.read(DATA_DIR.joinpath("2024_CELIAS_Proton_Monitor_5min.txt"),
                         header_start=20)

In [113]:
CELIAS_2024['SPEED'].unit = 'km / s'

In [114]:
CELIAS_2024

YY,MON,DY,DOY:HH:MM:SS,SPEED,Np,Vth,N/S,V_He,GSE_X,GSE_Y,GSE_Z,RANGE,HGLAT,HGLONG,CRN(E)
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,km / s,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
int64,str3,int64,str12,int64,float64,int64,float64,int64,float64,float64,float64,float64,float64,float64,int64
24,Jan,1,001:00:00:08,300,7.38,18,0.9,301,194.2,50.8,12.6,145.9,-2.9,227.2,2279
24,Jan,1,001:00:04:55,301,7.19,18,0.5,301,194.2,50.8,12.6,145.9,-2.9,227.2,2279
24,Jan,1,001:00:10:00,301,7.19,17,0.6,302,194.2,50.9,12.6,145.9,-2.9,227.1,2279
24,Jan,1,001:00:15:00,301,7.71,18,1.2,302,194.2,50.9,12.6,145.9,-2.9,227.1,2279
24,Jan,1,001:00:20:02,301,7.56,18,0.9,302,194.2,50.9,12.6,145.9,-2.9,227.0,2279
24,Jan,1,001:00:25:05,302,7.55,18,0.5,303,194.2,50.9,12.6,145.9,-2.9,227.0,2279
24,Jan,1,001:00:30:07,303,8.27,19,1.2,304,194.2,50.9,12.6,145.9,-2.9,226.9,2279
24,Jan,1,001:00:35:08,310,6.98,19,-0.3,311,194.2,50.9,12.6,145.9,-2.9,226.9,2279
24,Jan,1,001:00:40:10,309,6.12,19,-1.4,311,194.2,50.9,12.6,145.9,-2.9,226.8,2279
24,Jan,1,001:00:45:14,314,6.23,21,-1.2,316,194.2,50.9,12.6,145.9,-2.9,226.8,2279


In [73]:
ACE_TABLE

year,month,dom,hhmm,mjd,secs,destat,de1,de4,pstat,p1,p3,p5,p6,p7,anis_idx,time
int64,int64,int64,int64,int64,int64,int64,float64,float64,int64,float64,float64,float64,float64,float64,float64,float64
2012,1,8,5,55934,300,0,767.0,24.0,0,1970.0,19.0,2.59,0.831,0.126,-1.0,442368366.1839997
2012,1,8,10,55934,600,0,830.0,23.2,0,2050.0,18.3,2.94,0.792,0.223,-1.0,442368666.1840001
2012,1,8,15,55934,900,0,664.0,18.6,0,1990.0,19.0,2.66,0.728,0.221,-1.0,442368966.1839998
2012,1,8,20,55934,1200,0,644.0,25.7,0,2060.0,19.8,2.66,0.79,0.137,-1.0,442369266.18400013
2012,1,8,25,55934,1500,0,714.0,24.0,0,1950.0,20.5,2.8,0.862,0.169,-1.0,442369566.18399984
2012,1,8,30,55934,1800,0,878.0,27.2,0,2050.0,18.0,2.66,0.665,0.19,-1.0,442369866.1840002
2012,1,8,35,55934,2100,0,594.0,20.0,0,2070.0,18.0,2.61,0.563,0.152,-1.0,442370166.1839999
2012,1,8,40,55934,2400,0,685.0,21.1,0,1820.0,19.0,2.56,0.769,0.211,-1.0,442370466.18400025
2012,1,8,45,55934,2700,0,848.0,24.5,0,2000.0,18.1,3.02,0.686,0.105,-1.0,442370766.18399996
2012,1,8,50,55934,3000,0,726.0,26.7,0,1960.0,18.5,2.97,0.756,0.16,-1.0,442371066.1839997


In [28]:
#try out writing to an h5 file

test_array = np.random.rand(100,4)

In [36]:
test_table = Table(test_array, names=['time_frac','val1','val2','val3'])

In [39]:
misc.hdf5.write_table_hdf5(test_table,'./test.h5')

OSError: File ./test.h5 already exists. If you mean to replace it then use the argument "overwrite=True".

In [41]:
pull_table = misc.hdf5.read_table_hdf5("./test.h5")

In [42]:
pull_table

time_frac,val1,val2,val3
float64,float64,float64,float64
0.2328581352437361,0.9216367710272351,0.4116790625441731,0.3993153453194147
0.2339234471822711,0.36152490067477017,0.321371932091044,0.40428448810190265
0.5333765212537173,0.7177114485273823,0.42047042069576357,0.6842642186652845
0.5675338741008029,0.64090680395659,0.11590406767447126,0.5261274997694191
0.6167150983219097,0.19425611701087842,0.863131426367121,0.02854961770676412
0.9973079286585181,0.4105638106599806,0.009258300371353334,0.24787988493505897
0.6711082363806673,0.8658798762759182,0.9806857450573593,0.18724875996418977
0.5851902737051563,0.45049987517548173,0.16755025940592472,0.3675378697135052
0.514520559911257,0.22179389460712629,0.9346307560302893,0.055316011318566005
0.7345976424193935,0.23188818100177344,0.8309389992137579,0.6508222408171644
