In [2]:
%pip install PyQt5

import numpy as np
import pandas as pd
import datetime as dt
import matplotlib
from matplotlib import pyplot as plt
matplotlib.use('Qt5Agg')

Note: you may need to restart the kernel to use updated packages.


Data Clean Up
- Removing all the 999s thats indicate errors during collection

In [3]:
# data has already been cleaned and export to csv files omni_cleaned_data.csv and omni_cleaned_time.csv
# only uncomment if data needs to be cleaned again
# takes about 20 mins to run on my machine (Ryzen 9 7940HS, 64 GB RAM, etc.)

# %run cleanup.py

Import Cleaned Data

In [5]:
data = np.genfromtxt('./data/onmi_cleaned_data.csv', delimiter=',', dtype=float)
time = np.genfromtxt('./data/onmi_cleaned_time.csv', delimiter=',', dtype=dt.datetime)

bZ = data[:, 0] # magnetic field in the z-direction
fP = data[:, 1] # flow pressure
ssCount = data[:, 2] # sun spot count
dst = data[:, 3] # dst index

bZ[0]

2.4

In [6]:
# Remove some outliers in the data
avg_bZ = np.mean(bZ)
std_bZ = np.std(bZ)
timeBz = time[(bZ > avg_bZ - 3*std_bZ) & (bZ < avg_bZ + 3*std_bZ)]
bZ = bZ[(bZ > avg_bZ - 3*std_bZ) & (bZ < avg_bZ + 3*std_bZ)]

avg_fP = np.mean(fP)
std_fP = np.std(fP)
timeFp = time[(fP > avg_fP - 3*std_fP) & (fP < avg_fP + 3*std_fP)]
fP = fP[(fP > avg_fP - 3*std_fP) & (fP < avg_fP + 3*std_fP)]

avg_ssCount = np.mean(ssCount)
std_ssCount = np.std(ssCount)
timeSs = time[(ssCount > avg_ssCount - 3*std_ssCount) & (ssCount < avg_ssCount + 3*std_ssCount)]
ssCount = ssCount[(ssCount > avg_ssCount - 3*std_ssCount) & (ssCount < avg_ssCount + 3*std_ssCount)]

avg_dst = np.mean(dst)
std_dst = np.std(dst)
timeDst = time[(dst > avg_dst - 3*std_dst) & (dst < avg_dst + 3*std_dst)]
dst = dst[(dst > avg_dst - 3*std_dst) & (dst < avg_dst + 3*std_dst)]

Plot Data

In [7]:
# plot 4 subplots with padding
fig, axs = plt.subplots(4, figsize=(10, 10), tight_layout=True, num='OMNI Data')

fig.suptitle('OMNI Data', fontsize=16)

# Ensure time has the same length as other arrays
axs[0].plot(timeBz, bZ)
axs[0].set_title('Bz')
axs[0].set_ylabel('nT')

axs[1].plot(timeFp, fP)
axs[1].set_title('Flow Pressure')
axs[1].set_ylabel('nPa')

axs[2].plot(timeSs, ssCount)
axs[2].set_title('Daily Sunspot Count')
axs[2].set_ylabel('Count')

axs[3].plot(timeDst, dst)
axs[3].set_title('Dst Index')
axs[3].set_ylabel('nT')

fig.show()