In [2]:
import zipfile
import os
import pandas as pd
import numpy as np
import h5py as h5
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
from pylab import rcParams

epop_conj_list = pd.read_pickle('epop_conj_list.pkl')

### File extraction

In [2]:
a = os.listdir('RRI_zip')
extract_dir = 'RRI'

for i in a:
    file_path = os.path.join('RRI_zip', i)
    try:
        if not os.path.exists(os.path.join(extract_dir, i[:-4])):
            print('Extracting', file_path)
            with zipfile.ZipFile(file_path, 'r') as zip_ref:
                zip_ref.extractall(extract_dir)
        else:
            print('Already unzipped')
            continue
    except:
        print('Bad zip file')
        continue

Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipped
Already unzipp

In [3]:
file_list = [os.path.join('RRI', i) for i in os.listdir('RRI')]
file_list

['RRI\\RRI_20170201_081043_081555_lv1_13.2.0.h5',
 'RRI\\RRI_20220304_002320_003317_lv1_13.1.1.h5',
 'RRI\\RRI_20220304_034355_035352_lv1_13.1.1.h5',
 'RRI\\RRI_20220327_080644_080941_lv1_13.1.2.h5',
 'RRI\\RRI_20220328_013630_013927_lv1_13.1.3.h5',
 'RRI\\RRI_20220330_080314_081311_lv1_13.1.1.h5',
 'RRI\\RRI_20220330_143924_144221_lv1_13.1.1.h5',
 'RRI\\RRI_20220406_043038_043235_lv1_13.1.3.h5',
 'RRI\\RRI_20220406_111226_111423_lv1_13.1.3.h5',
 'RRI\\RRI_20220406_202325_202522_lv1_13.1.3.h5',
 'RRI\\RRI_20220406_205957_210154_lv1_13.1.3.h5',
 'RRI\\RRI_20220406_211343_211540_lv1_13.1.3.h5',
 'RRI\\RRI_20220407_071853_072050_lv1_13.1.1.h5',
 'RRI\\RRI_20220407_085429_085626_lv1_13.1.1.h5',
 'RRI\\RRI_20220407_113419_113616_lv1_13.1.1.h5',
 'RRI\\RRI_20220407_145457_145654_lv1_13.1.1.h5',
 'RRI\\RRI_20220407_185604_185801_lv1_13.1.1.h5',
 'RRI\\RRI_20220408_231029_231226_lv1_13.1.1.h5',
 'RRI\\RRI_20220409_072754_072951_lv1_13.1.2.h5',
 'RRI\\RRI_20220409_123056_123253_lv1_13.1.2.h5',


### Spectrogram generation

The following follows the tutorial which can be found at https://epop.phys.ucalgary.ca/wp-content/uploads/2024/10/CASSIOPE-RRI-tutorial-v1.2.ipynb but differs as follows: 

    (i) 'UTC Time' no longer exists

    (ii) 'Digital Down Converter Sampling Rate' no longer exists

The ephemeris group contains ephemeris time but the sampling is much faster than the timesteps. Instead I have identified the start and end ephemeris time. The sampling frequency is therefore calculated from the number of samples * 29 (29 samples per monopole) divided by the total time elapsed, defined by the difference between the start and end ephemeris times, assuming the sampling rate is constant throughout the interval. Similarly this is how the UTC timestamps are defined, subject to the complication that this ephemeris time is seconds recorded from 1968-05-24, and UTC time starts from 1970-01-01, so this had to be corrected. 

In [83]:
def fft(dipole, bin_width, bh_fil, ts_list, samp_freq):
    fft_list = []
    time_list = []
    
    j = 0
    while j < (dipole.shape[0]-bin_width):
        x = dipole[j:j+bin_width]*bh_fil # apply BH window to binned data
        """Calculating FFT for x,
         numpy.fft.fft computes 1D DFT with FFT algorithm
         numpy.fft.fftshift shifts the zero frequency component to the center"""
        x_FFT = np.real(np.fft.fftshift(np.fft.fft(x, n=bin_width))) / bin_width
        # scaling x_FFT
        Pxx = 20*np.log10(np.absolute(x_FFT))
        # appending data to corresponding list
        fft_list.append(Pxx)
        # calculating average time for the current signal sample and adding it to its respective list
        time_list.append(np.average(ts_list[j:j+bin_width]))
        j += bin_width

    # converting FFT_list to an array for better performance
    fft_array = np.array(fft_list).T
    fft_array = fft_array[int(bin_width/4):int(3*bin_width/4)]
    freq_bins = np.fft.fftfreq(bin_width, d=1.0/samp_freq)
    
    return fft_array, time_list, freq_bins

def spec_gen(file):
    print('File name:', file.split('\\')[-1])
    f = h5.File(file, 'r')
    eph_grp = f['CASSIOPE Ephemeris']
    data_grp = f['RRI Data']
    set_grp = f['RRI Settings']

    eph_t = [i for i in list(eph_grp.keys()) if 'MET' in i][0] # identifies the key for Ephemeris time
    samp_no = data_grp['RRI Packet Numbers'][-1] # number of samples
    dt_std = pd.to_datetime(datetime(1968, 5, 24)).timestamp()
    sta = eph_grp[eph_t][0] + dt_std
    end = eph_grp[eph_t][-1] + dt_std
    sta_dt = datetime.utcfromtimestamp(sta)
    end_dt = datetime.utcfromtimestamp(end)
    ts_list = np.linspace(sta, end, samp_no*29) # generate list of unix timestamps assuming regular intervals
    samp_freq = samp_no*29 / (end - sta)
    freq_a = data_grp['Channel A Frequencies (Hz)'][0]
    print('Start time:', sta_dt)
    print('End time:', end_dt)
    print('Number of samples:', samp_no*29)
    print('Mode:', set_grp['Data Format'][0].decode())
    print('Sampling frequency:', round(samp_freq))
    print('Channel A frequency:', round(freq_a))

    mp_list = [i for i in list(data_grp.keys()) if 'Monopole' in i]
    mp_1 = np.array(data_grp[mp_list[0]]).flatten()*1e3
    mp_2 = np.array(data_grp[mp_list[1]]).flatten()*1e3
    mp_3 = np.array(data_grp[mp_list[2]]).flatten()*1e3
    mp_4 = np.array(data_grp[mp_list[3]]).flatten()*1e3

    dp_a = mp_1 + 1j*mp_2 # dipole A
    dp_b = mp_3 + 1j*mp_4 # dipole B

    bin_width = 5208

    # blackman-harris filter
    a0 = 0.3635819
    a1 = -0.4891775
    a2 = 0.1365995
    a3 = -0.0106411
    var = np.pi * np.arange(bin_width) / (bin_width - 1)
    bh_fil = a0 + a1 * np.cos(2 * var) + a2 * np.cos(4 * var) + a3 * np.cos(6 * var)
    
    df_list = []
    
    for i in [dp_a, dp_b]:
        fft_array, time_list, freq_bins = fft(i, bin_width, bh_fil, ts_list, samp_freq)
        time_avg = (np.array(time_list))[0:int(fft_array.shape[1])]
        # scaling frequency bins such that the channel frequency is at the centre then converting to kHz
        freq_bins = (freq_bins - samp_freq/4 + freq_a) * 1e-3
        freq_bins = freq_bins[0:int(fft_array.shape[0])]
        dt_list = [datetime.utcfromtimestamp(i) for i in time_avg]
        df_list.append(pd.DataFrame(data=fft_array.T, index=dt_list).set_axis(freq_bins, axis=1))
    
    spec_a, spec_b = df_list
    return spec_a, spec_b

### Exporting spectrograms as pickles

In [84]:
%%time
for i in file_list:
    a, b = spec_gen(i)
    name_a = 'spec' + i.split('RRI')[-1].split('lv')[0] + 'a'
    name_b = 'spec' + i.split('RRI')[-1].split('lv')[0] + 'b'
    a.to_pickle('spectrogram_pickles/'+name_a+'.pkl')
    b.to_pickle('spectrogram_pickles/'+name_b+'.pkl')

File name: RRI_20170201_081043_081555_lv1_13.2.0.h5
Start time: 2017-02-01 08:10:43.592020
End time: 2017-02-01 08:15:55.592020
Number of samples: 19497686
Mode: I1Q1I3Q3
Sampling frequency: 62493
Channel A frequency: 15625
File name: RRI_20220304_002320_003317_lv1_13.1.1.h5
Start time: 2022-03-04 00:23:20.378000
End time: 2022-03-04 00:33:18.378000
Number of samples: 37328771
Mode: I1Q1I3Q3
Sampling frequency: 62423
Channel A frequency: 17999
File name: RRI_20220304_034355_035352_lv1_13.1.1.h5
Start time: 2022-03-04 03:43:55.358740
End time: 2022-03-04 03:53:53.358740
Number of samples: 37328974
Mode: I1Q1I3Q3
Sampling frequency: 62423
Channel A frequency: 17999
File name: RRI_20220327_080644_080941_lv1_13.1.2.h5
Start time: 2022-03-27 08:06:44.398880
End time: 2022-03-27 08:09:42.398880
Number of samples: 11078638
Mode: I1Q1I3Q3
Sampling frequency: 62240
Channel A frequency: 17999
File name: RRI_20220328_013630_013927_lv1_13.1.3.h5
Start time: 2022-03-28 01:36:30.430740
End time: 202

File name: RRI_20220425_163814_164411_lv1_13.1.1.h5
Start time: 2022-04-25 16:38:14.429390
End time: 2022-04-25 16:44:12.429390
Number of samples: 22334988
Mode: I1Q1I3Q3
Sampling frequency: 62388
Channel A frequency: 17999
File name: RRI_20220430_050114_050511_lv1_13.1.1.h5
Start time: 2022-04-30 05:01:14.424710
End time: 2022-04-30 05:05:12.424710
Number of samples: 14828715
Mode: I1Q1I3Q3
Sampling frequency: 62306
Channel A frequency: 17999
File name: RRI_20220505_034454_034851_lv1_13.1.1.h5
Start time: 2022-05-05 03:44:54.508230
End time: 2022-05-05 03:48:52.508230
Number of samples: 14822480
Mode: I1Q1I3Q3
Sampling frequency: 62279
Channel A frequency: 17999
File name: RRI_20220609_104514_105111_lv1_13.1.1.h5
Start time: 2022-06-09 10:45:14.436590
End time: 2022-06-09 10:51:12.436590
Number of samples: 22328898
Mode: I1Q1I3Q3
Sampling frequency: 62371
Channel A frequency: 17999
File name: RRI_20220610_043044_043641_lv1_13.1.1.h5
Start time: 2022-06-10 04:30:44.374310
End time: 202

File name: RRI_20230920_112425_112722_lv1_13.1.2.h5
Start time: 2023-09-20 11:24:25.378450
End time: 2023-09-20 11:27:23.378450
Number of samples: 11078812
Mode: I1Q1I3Q3
Sampling frequency: 62241
Channel A frequency: 17999
File name: RRI_20230921_002931_003128_lv1_13.1.1.h5
Start time: 2023-09-21 00:29:31.365940
End time: 2023-09-21 00:31:29.365940
Number of samples: 7328590
Mode: I1Q1I3Q3
Sampling frequency: 62107
Channel A frequency: 17999
File name: RRI_20230921_022209_022406_lv1_13.1.1.h5
Start time: 2023-09-21 02:22:09.360000
End time: 2023-09-21 02:24:07.360000
Number of samples: 7328822
Mode: I1Q1I3Q3
Sampling frequency: 62109
Channel A frequency: 17999
File name: RRI_20230921_100850_101047_lv1_13.1.1.h5
Start time: 2023-09-21 10:08:50.351720
End time: 2023-09-21 10:10:48.351720
Number of samples: 7328822
Mode: I1Q1I3Q3
Sampling frequency: 62109
Channel A frequency: 17999
File name: RRI_20230921_103658_103855_lv1_13.1.1.h5
Start time: 2023-09-21 10:36:58.354240
End time: 2023-0

File name: RRI_20231022_182722_182919_lv1_13.1.2.h5
Start time: 2023-10-22 18:27:22.431640
End time: 2023-10-22 18:29:20.431640
Number of samples: 7328648
Mode: I1Q1I3Q3
Sampling frequency: 62107
Channel A frequency: 17999
File name: RRI_20231023_175307_175504_lv1_13.1.4.h5
Start time: 2023-10-23 17:53:07.403830
End time: 2023-10-23 17:55:05.403830
Number of samples: 7328648
Mode: I1Q1I3Q3
Sampling frequency: 62107
Channel A frequency: 17999
File name: RRI_20231023_175845_180042_lv1_13.1.4.h5
Start time: 2023-10-23 17:58:45.404820
End time: 2023-10-23 18:00:43.404820
Number of samples: 7328648
Mode: I1Q1I3Q3
Sampling frequency: 62107
Channel A frequency: 17999
File name: RRI_20231024_224404_224601_lv1_13.1.3.h5
Start time: 2023-10-24 22:44:04.450810
End time: 2023-10-24 22:46:02.450810
Number of samples: 7322384
Mode: I1Q1I3Q3
Sampling frequency: 62054
Channel A frequency: 17999
File name: RRI_20231025_045418_045615_lv1_13.1.3.h5
Start time: 2023-10-25 04:54:18.420120
End time: 2023-10

### Cell graveyard

In [55]:
s = [datetime.strptime(i.decode()[:-1], '%Y-%m-%dT%H:%M:%S.%f') for i in data_grp.get('UTC Time')[:10]]
np.array(s) - np.roll(s,1)

array([datetime.timedelta(days=-1, seconds=86399, microseconds=995824),
       datetime.timedelta(microseconds=464),
       datetime.timedelta(microseconds=464),
       datetime.timedelta(microseconds=464),
       datetime.timedelta(microseconds=464),
       datetime.timedelta(microseconds=464),
       datetime.timedelta(microseconds=464),
       datetime.timedelta(microseconds=464),
       datetime.timedelta(microseconds=464),
       datetime.timedelta(microseconds=464)], dtype=object)

In [5]:
f = h5.File(file_list[0], 'r')
dt_std = pd.to_datetime(datetime(1968, 5, 24)).timestamp()
d = np.array(f['CASSIOPE Ephemeris'].get('Ephemeris MET (seconds since May 24, 1968)'))+dt_std
e = np.array([datetime.utcfromtimestamp(i) for i in d])
(e[-1]-e[0])/len(f['RRI Data'].get('Radio Data Monopole 1 (mV)'))

datetime.timedelta(microseconds=464)

In [17]:
29/(((e[-1]-e[0])/len(f['RRI Data'].get('Radio Data Monopole 1 (mV)'))).microseconds*1e-6)

62500.0

In [96]:
sampling_rate = np.array(f['RRI Settings']['Digital Down Converter Sampling Rate'])
sampling_rate

array(62500.33933)