In [None]:
import numpy as np
import pandas as pd
import datetime as dt
from matplotlib import pyplot as plt

Import Data

In [None]:
# import data from csv file 
# time is imported separately as a string
data = np.genfromtxt('omni.csv', delimiter=',', skip_header=117, usecols=range(1, 5))
time = np.genfromtxt('omni.csv', delimiter=',', skip_header=117, usecols=0, dtype=str)

# Convert time to datetime
time = [dt.datetime.strptime(t, '%Y-%m-%dT%H:%M:%S.%fZ') for t in time]
time = np.array(time)

Data Clean Up

In [None]:
def delete_row(input_data, input_time, index):
    print('remaining data:', (index / len(input_data)) * 100, '%')
    print('Time:', input_time[index])
    print('Data:', input_data[index])
    print()

    input_data = np.delete(input_data, index, axis=0)
    input_time = np.delete(input_time, index)
    index = index - 1

    return input_data, input_time, index

In [None]:
# create a temp array to store the data
tempArray = data
tempTime = time
rowsDeleted = 0
output = [tempArray, tempTime, rowsDeleted]

for i in range(len(tempArray)):
    print('old_time: ', time[rowsDeleted])
    print('old_data: ', data[rowsDeleted])
    if ((tempArray[i, 0] == 999.9).any()):
        print('bZ = 999.9 at index', i)
        output = delete_row(data, tempTime, i)
    elif ((tempArray[i, 1] == 99.99).any()):
        print('fP = 99.99 at index', i)
        output = delete_row(data, tempTime, i)
    elif ((tempArray[i,2] == 999).any()):
        print('ssCount = 999 at index', i)
        output = delete_row(data, tempTime, i)
    else:
        print('No bad data at index', i)
        print()
        continue
    
    tempArray = output[0]
    tempTime = output[1]
    i = output[2]
    rowsDeleted += 1

data = tempArray
time = tempTime

In [None]:
# save the cleaned data and time to a new csv file
np.savetxt('onmi_cleaned_data.csv', data, delimiter=',')
np.savetxt('onmi_cleaned_time.csv', time, delimiter=',')

In [None]:
bZ = data[:, 0] # magnetic field in the z-direction
fP = data[:, 1] # flow pressure
ssCount = data[:, 2] # sun spot count
dst = data[:, 3] # dst index

In [None]:
# clean up the Bz data

# average of the Bz component
avg_bZ = np.nanmean(bZ)

# standard deviation of the Bz component
std_bZ = np.nanstd(bZ)

for x in bZ:
    if not np.isnan(x):
        if x > avg_bZ + 2*std_bZ or x < avg_bZ - 2*std_bZ:
            print(type(x))
            x = np.nan
            print('replaced', x, 'with NaN')



In [None]:
# clean up the flow pressure data

# average of the flow pressure
avg_fP = np.nanmean(fP)


In [None]:
# clean up the sun spot count data

# average of the sun spot count
avg_ssCount = np.nanmean(ssCount)


In [None]:
# clean up the dst index data

# average of the dst index
avg_dst = np.nanmean(dst)

Plot Data

In [None]:
# plot 4 subplots with padding
fig, axs = plt.subplots(4, figsize=(10, 10), tight_layout=True, num='OMNI Data')

fig.suptitle('OMNI Data', fontsize=16)

axs[0].plot(time, bZ)
axs[0].set_title('Bz')
axs[0].set_ylabel('nT')

axs[1].plot(time, fP)
axs[1].set_title('Flow Pressure')
axs[1].set_ylabel('nPa')

axs[2].plot(time, ssCount)
axs[2].set_title('Daily Sunspot Count')
axs[2].set_ylabel('Count')

axs[3].plot(time, dst)
axs[3].set_title('Dst Index')
axs[3].set_ylabel('nT')

plt.show()