In [None]:
from datetime import *
from glob import glob
import json
import importlib
import numpy as np
import os
import pylab as plt
import pandas as pd
import seaborn as sns
import pytz
import sys

os.environ['CDF_LIB'] = '/home/dedasilv/.local/cdf/lib'
sys.path.append('/home/dedasilv/disp')
import lib_search_dispersion
importlib.reload(lib_search_dispersion)

%matplotlib inline
sns.set_style('whitegrid')

# Load Long Term Trend Data

In [None]:
dfs = []

for output in glob('/home/dedasilv/disp/data/Long_Term_Trend_F*.csv'):
    dfc = pd.read_csv(output, parse_dates=['start_time', 'end_time'])
    dfs.append(dfc)

df_ltt = pd.concat(dfs).reset_index()
df_ltt

# Load Kp Index data

In [None]:
df_kp = pd.read_csv('/home/dedasilv/disp/data/Kp_ap_since_1932.txt', comment='#', sep='\\s+',
                    names='year month day hour hour2 days days_m Kp ap D'.split())

df_kp.insert(0, 'timestamp', [
    datetime(
        int(row.year), int(row.month), int(row.day), int(row.hour),
        tzinfo=pytz.utc
    )
    for (_, row) in df_kp.iterrows()
])


start_time = datetime(2010, 1, 1, tzinfo=pytz.utc)
end_time = datetime(2015, 1, 1, tzinfo=pytz.utc)
df_kp = df_kp[(df_kp.timestamp > start_time) & (df_kp.timestamp < end_time)]
df_kp.head()

# Assign Kp index to each detected event

In [None]:
df_ltt['Kp'] = [
    df_kp.iloc[df_kp.timestamp.searchsorted(row.start_time)].Kp
    for (_, row)
    in df_ltt.iterrows()
]
df_ltt.head()

# Visualize Statistics

In [None]:
bins = np.arange(2010, 2016)
years = [row.start_time.year for (_, row) in df_ltt.iterrows()]
bin_counts, _ = plt.histogram(years, bins=bins)
plt.bar(bins[:-1], bin_counts, width=np.diff(bins), color='darkslategrey')
plt.xticks(bins[:-1])
plt.ylabel('Bin Counts')
plt.title('Event Counts over Five Year Run')

### Visualize Relationship with Kp

In [None]:
plt.hist(df_ltt.Kp, bins=np.arange(0, 10, .33))
plt.xticks(range(10))
plt.title('Distribution of Event Kp index (2010 - 2014, N = %d)' % len(df_ltt.index))
plt.xlabel('Kp Index')
plt.ylabel('Bin Count')
plt.axvline(df_ltt.Kp.mean(), color='red', linestyle='dashed')
print('Mean Kp: %.2f' % df_ltt.Kp.mean())
None

In [None]:
plt.hist(df_kp.Kp, bins=np.arange(0, 10, .33))
plt.xticks(range(10))
plt.title('Distribution of All Kp index (2010 - 2014, N = %d)' % len(df_kp.index))
plt.xlabel('Kp Index')
plt.ylabel('Bin Count')
plt.axvline(df_kp.Kp.mean(), color='red', linestyle='dashed')
print('Mean Kp: %.2f' % df_kp.Kp.mean())
None

In [None]:
wbin = .33*4
bins = np.arange(0, df_kp.Kp.max()+wbin, wbin)
bin_counts_all, _ = np.histogram(df_kp.Kp, bins=bins)
bin_counts_evt, _ = np.histogram(df_ltt.Kp, bins=bins)

bin_counts_all = bin_counts_all.astype(float) /  bin_counts_all.sum()
bin_counts_evt = bin_counts_evt.astype(float) / bin_counts_evt.sum()

P = np.zeros_like(bin_counts_all)
mask = (bin_counts_all > 0)
P[mask] = bin_counts_evt[mask]/bin_counts_all[mask]
P/=P.max()
plt.figure(figsize=(8, 8))
plt.bar(bins[:-1], P, width=np.diff(bins), align='edge', color='lightseagreen')
plt.title('Relative Likelihood of Detection (Detection Rate Normalized by Kp frequency)')
plt.xlabel('Kp Index', fontsize=20)
plt.ylabel('Relative Frequency', fontsize=20)
plt.xlim(bins[0], bins[-1])
plt.grid(False)

xt = np.arange(9) + 0.5
xtl = [str(int(x)) for x in range(9)]
plt.xticks(xt, xtl, horizontalalignment='left')
plt.grid(True)
None

# Visualize Relationship With $\vec{B}$

In [None]:
case_file = json.load(open('case_files/Long_Term_Trend.json'))
omniweb_fh = lib_search_dispersion.read_omniweb_files(case_file['OMNIWEB_FILES'], silent=True)

In [None]:
df_ltt['By'] = [
    omniweb_fh['By'][omniweb_fh['t'].searchsorted(row.start_time)]
    for (_, row) in df_ltt.iterrows()
]
df_ltt['Bz'] = [
    omniweb_fh['Bz'][omniweb_fh['t'].searchsorted(row.start_time)]
    for (_, row) in df_ltt.iterrows()
]
df_ltt.loc[df_ltt['By'] > lib_search_dispersion.OMNIWEB_FILL_VALUE, 'By'] = np.nan
df_ltt.loc[df_ltt['Bz'] > lib_search_dispersion.OMNIWEB_FILL_VALUE, 'Bz'] = np.nan

In [None]:

Bz_bins = np.arange(-25, -3, 1)
By_bins = np.arange(-25, 26, 1)
plt.figure(figsize=(16, 5))
plt.subplot(122)
plt.hist(df_ltt.Bz, bins=Bz_bins, color='purple')
plt.xlabel('Bz (nT)')
plt.ylabel('Bin Count')
plt.title('Histogram of Bz over all events (N=%d)'%len(df_ltt.index))
plt.subplot(121)
plt.hist(df_ltt.By, bins=By_bins, color='orange')
plt.xlabel('By (nT)')
plt.ylabel('Bin Count')
plt.title('Histogram of By over all events (N=%d)'%len(df_ltt.index))
None

In [None]:

Bz_bins = np.arange(-25, -3, 1)
By_bins = np.arange(-25, 26, 1)

Bz_evt, _ = np.histogram(df_ltt.Bz, bins=Bz_bins, density=True)
Bz_all, _ = np.histogram(omniweb_fh['Bz'], bins=Bz_bins, density=True)

By_evt, _ = np.histogram(df_ltt.By, bins=By_bins, density=True)
By_all, _ = np.histogram(omniweb_fh['By'], bins=By_bins, density=True)

P_Bz = np.zeros_like(Bz_evt)
P_Bz[Bz_all>0] = Bz_evt[Bz_all>0] / Bz_all[Bz_all>0]
P_Bz /= P_Bz.max()

P_By = np.zeros_like(By_evt)
P_By[By_all>0] = By_evt[By_all>0] / By_all[By_all>0]
P_By /= P_By.max()

plt.figure(figsize=(16, 5))
plt.subplot(122)
plt.bar(Bz_bins[:-1], P_Bz, width=np.diff(Bz_bins), color='purple')
plt.xlabel('Bz (nT)')
plt.ylabel('Relative Frequency')
plt.title('Relative Likelihood of Detection (Detection Rate\nNormalized by Bz Fequency, N=%d)'%len(df_ltt.index))
plt.subplot(121)
plt.bar(By_bins[:-1], P_By, width=np.diff(By_bins), color='orange')
plt.xlabel('By (nT)')
plt.ylabel('Relative Frequency')
plt.title('Relative Likelihood of Detection (Detection Rate\nNormalized by By Frequency, N=%d)'%len(df_ltt.index))
None

### Count number of Events Per Day

In [None]:
if 'date' in df_ltt.columns:
    del df_ltt['date']
df_ltt.insert(0, 'date', [row.start_time.date() for (_, row) in df_ltt.iterrows()])
df_ltt.head()

In [None]:
df_event_count = df_ltt[['date', 'start_time']].groupby('date').count().rename(columns=dict(start_time='num_events'))
df_event_count.reset_index(inplace=True)
df_event_count.head()

This looks exponential -- Poisson arrival rate?

In [None]:
bins = list(range(14))
plt.hist(df_event_count.num_events, bins=bins, color='r')
plt.xticks(bins)
plt.ylabel('Bin Count')
plt.xlabel('# Events / Day')
plt.title('Histogram of # Events / Day (Ndays = %d)' % len(df_event_count.index))
None

In [None]:
df_ltt['num_events'] = [
    df_event_count.iloc[df_event_count.date.searchsorted(row.date)].num_events
    for (_, row) in df_ltt.iterrows()
]
df_ltt.head()

In [None]:
binsx = np.arange(1,25)
binsy = np.arange(0, 10, .33)
plt.hist2d(df_ltt.num_events, df_ltt.Kp, bins=[binsx, binsy])
plt.colorbar().set_label('Bin Count')
plt.xlabel('Number Events')
plt.ylabel('Kp Index')
plt.title('Histogram of Number of Events vs Kp Index')
plt.xticks(binsx[::2])
None