In [1]:
import pandas
import datetime
import numpy
from sklearn import linear_model
import scipy
from scipy.optimize import curve_fit
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib.colors import ListedColormap
from matplotlib.colors import Normalize
from matplotlib import ticker
from matplotlib import rcParams

In [2]:
# setup some cutoff values we'll use in the analysis
velCutoffUpper = 2000.
velCutoffLower = 0.
numPointsCutoffMLTMLAT = 250
mlatCutOffUpper = 70.
probOccCutoff = 0.2

In [9]:
velGmagDF = pandas.read_csv("../data/processed-vels-geomag.txt", sep=' ')
velGmagDF = velGmagDF.drop('Unnamed: 0', axis=1)
# Discard unwanted values
# We'll only consider those velocities 
# which lie between 0 and 2500 m/s
# and located below 70 MLAT
velGmagDF = velGmagDF[ (velGmagDF["vSaps"] > velCutoffLower) \
                        & (velGmagDF["vSaps"] < velCutoffUpper)\
                       ].reset_index(drop=True)
velGmagDF = velGmagDF[ velGmagDF["MLAT"] < mlatCutOffUpper ].reset_index(drop=True)
# Now filter out velocities which have very few rate of occ.
# We calculat the prob and remove every measurement below 0.2 prob of occ.
mlatMLTDstCountDF = velGmagDF.groupby( ["MLAT", "normMLT", "dst_bin"] )["vSaps"].count().reset_index()
mlatMLTDstCountDF.columns = [ "MLAT", "normMLT", "dst_bin", "count" ]
dstMaxCntDF = mlatMLTDstCountDF.groupby( ["dst_bin"] )["count"].max().reset_index()
dstMaxCntDF.columns = [ "dst_bin", "maxCntDst" ]
mlatMLTDstCountDF = pandas.merge( mlatMLTDstCountDF, dstMaxCntDF, on=[ "dst_bin" ] )
mlatMLTDstCountDF["probOcc"] = mlatMLTDstCountDF["count"]/mlatMLTDstCountDF["maxCntDst"]
mlatMLTDstCountDF = mlatMLTDstCountDF[ mlatMLTDstCountDF["probOcc"] > probOccCutoff ].reset_index(drop=True)
# Filter out MLATs and MLTs (at the Dst bins)
# where number of measurements is low. We do
# this by merging the mlatMLTDstCountDF with velDF.
velGmagDF = pandas.merge( velGmagDF,\
                         mlatMLTDstCountDF,\
                         on=[ "MLAT", "normMLT", "dst_bin" ] )


# Get a DF with mean Dst in each bin
dstMeanDF = velGmagDF.groupby( ["dst_bin"] ).mean()["dst_index"].astype(int).reset_index()
dstMeanDF.columns = [ "dst_bin", "dst_mean" ]
velGmagDF = pandas.merge( velGmagDF, dstMeanDF, on=["dst_bin"] )
velGmagDF = velGmagDF.sort( ["dst_mean"], ascending=False ).reset_index(drop=True)

velGmagDF.head()



Unnamed: 0,normMLT,MLAT,vSaps,azim,vMagnErr,azimErr,dtStr,tmStr,date,hour,...,dst_bin,AE,AL,AO,AU,minute,count,maxCntDst,probOcc,dst_mean
0,1.0,58.0,927.34,-12.52,13.71,25.4,20120730,838,2012-07-30 08:38:00,8,...,"(-10, 10]",705,-386,-34,319,38,293,508,0.576772,-3
1,-3.0,61.0,755.23,-17.91,11.19,24.97,20110720,642,2011-07-20 06:42:00,6,...,"(-10, 10]",400,-227,-27,173,42,277,508,0.545276,-3
2,-3.0,61.0,743.78,-17.19,12.03,27.18,20110720,638,2011-07-20 06:38:00,6,...,"(-10, 10]",400,-222,-22,178,38,277,508,0.545276,-3
3,-3.0,61.0,800.52,-14.51,8.77,20.83,20110720,636,2011-07-20 06:36:00,6,...,"(-10, 10]",416,-218,-10,198,36,277,508,0.545276,-3
4,-3.0,61.0,535.88,-19.86,12.42,23.82,20110720,634,2011-07-20 06:34:00,6,...,"(-10, 10]",480,-271,-31,209,34,277,508,0.545276,-3


In [12]:
velGmagDF[ (velGmagDF["dst_bin"] == "(-10, 10]") & (velGmagDF["vSaps"] > 1000.) ].head(25)

Unnamed: 0,normMLT,MLAT,vSaps,azim,vMagnErr,azimErr,dtStr,tmStr,date,hour,...,dst_bin,AE,AL,AO,AU,minute,count,maxCntDst,probOcc,dst_mean
20,-3.0,61.0,1024.31,-11.57,6.68,18.02,20120420,656,2012-04-20 06:56:00,6,...,"(-10, 10]",382,-185,6,197,56,277,508,0.545276,-3
23,-3.0,61.0,1555.94,-17.91,16.16,26.28,20120420,652,2012-04-20 06:52:00,6,...,"(-10, 10]",367,-203,-20,164,52,277,508,0.545276,-3
44,-3.0,61.0,1078.72,-16.39,22.0,41.34,20120420,654,2012-04-20 06:54:00,6,...,"(-10, 10]",394,-207,-10,187,54,277,508,0.545276,-3
45,-3.0,61.0,1233.64,-19.06,12.76,21.29,20120420,650,2012-04-20 06:50:00,6,...,"(-10, 10]",359,-189,-10,170,50,277,508,0.545276,-3
69,-3.0,61.0,1134.03,-19.63,3.53,5.92,20120420,648,2012-04-20 06:48:00,6,...,"(-10, 10]",351,-186,-11,165,48,277,508,0.545276,-3
71,-3.0,61.0,1372.45,-18.41,10.86,17.02,20120420,646,2012-04-20 06:46:00,6,...,"(-10, 10]",373,-212,-26,161,46,277,508,0.545276,-3
72,-3.0,61.0,1073.23,-19.93,8.31,12.41,20120420,644,2012-04-20 06:44:00,6,...,"(-10, 10]",391,-224,-29,167,44,277,508,0.545276,-3
73,-3.0,61.0,1312.96,-16.04,5.41,10.27,20120420,642,2012-04-20 06:42:00,6,...,"(-10, 10]",395,-224,-27,171,42,277,508,0.545276,-3
74,-3.0,61.0,1110.22,-19.23,8.28,14.81,20120420,640,2012-04-20 06:40:00,6,...,"(-10, 10]",411,-217,-12,194,40,277,508,0.545276,-3
75,-3.0,61.0,1115.47,-19.81,8.45,15.37,20120420,638,2012-04-20 06:38:00,6,...,"(-10, 10]",454,-224,3,230,38,277,508,0.545276,-3
