In [1]:
import pandas
import datetime
import numpy
import scipy.optimize
import seaborn as sns
import matplotlib.pyplot as plt
import os
import time
import bs4
import urllib
from matplotlib.colors import ListedColormap
from matplotlib.colors import Normalize
from matplotlib import ticker
%matplotlib inline

In [2]:
# setup some cutoff values we'll use in the analysis
velCutoffUpper = 2000.
velCutoffLower = 0.
numPointsCutoffMLTMLAT = 100
mlatCutOffUpper = 70.

In [3]:
# READ Dst and AE data
inpDstFile = "../data/dst_out_file.csv"
dstDF = pandas.read_csv(inpDstFile, sep=' ',\
                infer_datetime_format=True,\
                        parse_dates=["dst_date"])
dstDF = dstDF[ (dstDF["dst_date"] > datetime.datetime(2010,12,31)) &\
             (dstDF["dst_date"] < datetime.datetime(2015,1,1))].reset_index(drop=True)
dstDF = dstDF[ dstDF["dst_index"] <= 10. ].reset_index(drop=True)
dstDF["dtStr"] = dstDF["dst_date"].apply(lambda x: x.strftime('%Y%m%d'))
dstDF["hour"] = dstDF["dst_date"].apply(lambda x: x.strftime('%H'))
# Aur Inds
aurDF = pandas.read_csv( "../data/aur_processed.txt", sep=' ' )
aurDF["date"] = pandas.to_datetime(aurDF["datetimeStr"], format='%Y%m%d-%H-%M')
aurDF["hour"] = aurDF["date"].apply(lambda x: x.strftime('%H'))
aurDF["minute"] = aurDF["date"].apply(lambda x: x.strftime('%M'))
aurDF["dtStr"] = aurDF["date"].apply(lambda x: x.strftime('%Y%m%d'))

In [4]:
#### In this block we load Velocity data ####
#### In this block we load Velocity data ####
#### In this block we load Velocity data ####
# a helper function to convert seperate date
# and time strings to datetime objects  
fitVelFile = "../data/fitres.csv"
inpColNames = ["azim", "azimStd", "delMLT", "endPtMLAT",\
               "endPtNormMLT","goodFit", "MLAT", "normMLT",\
               "vSaps", "velSTD", "date"]
# velsDataDF = pandas.read_csv(fitVelFile, sep=' ', header=None)
# velsDataDF.columns = inpColNames
velsDataDF = pandas.read_csv(fitVelFile, sep=' ',\
                             header=None, names=inpColNames,\
                            infer_datetime_format=True,\
                            parse_dates=["date"])
velsDataDF.head()
velsDataDF["dtStr"] = velsDataDF["date"].apply(lambda x: x.strftime('%Y%m%d'))
# Discard unwanted values
# We'll only consider those velocities 
# which lie between 0 and 2500 m/s
# and located below 70 MLAT
velsDataDF = velsDataDF[ (velsDataDF["vSaps"] > velCutoffLower) \
                        & (velsDataDF["vSaps"] < velCutoffUpper)\
                       ].reset_index(drop=True)
velsDataDF = velsDataDF[ velsDataDF["MLAT"] < mlatCutOffUpper ].reset_index(drop=True)
velsDataDF["hour"] = velsDataDF["date"].apply(lambda x: x.strftime('%H'))
velsDataDF["minute"] = velsDataDF["date"].apply(lambda x: x.strftime('%M'))
# Now merge the dst and velocity DFs
velsDataDF = pandas.merge( velsDataDF, dstDF,\
                          on=["dtStr", "hour"], how='inner' )
# We generally work with Dst bins, set them up
# add dst_bins
dstBins = [ -150, -75, -50, -25, -10, 10 ]
velsDataDF = pandas.concat( [ velsDataDF, \
                    pandas.cut( velsDataDF["dst_index"], \
                               bins=dstBins ) ], axis=1 )
velsDataDF.columns = ['azim', 'azimStd', 'delMLT', 'endPtMLAT', 'endPtNormMLT',\
                      'goodFit', 'MLAT', 'normMLT', 'vSaps', 'velSTD', 'date',\
                      'dtStr', 'hour', 'minute', 'dst_date', 'dst_index', 'dst_bin']
# Also merge with aurDF
# print "pre merge shape-->", velsDataDF.shape
velsDataDF = pandas.merge( velsDataDF, aurDF,\
                         on=["dtStr", "hour", "minute"], how='inner')
# Discard some unwanted cols
selColsVels = ['azim', 'azimStd', 'delMLT', 'endPtMLAT', 'endPtNormMLT',\
               'goodFit', 'MLAT', 'normMLT', 'vSaps', 'velSTD', 'date_x',\
               'dtStr', 'hour', 'minute', 'dst_date', 'dst_index', 'dst_bin',\
               'datetimeStr', 'AE', 'AL', 'AO', 'AU']
velsDataDF = velsDataDF[ selColsVels ]
velsDataDF.columns = ['azim', 'azimStd', 'delMLT', 'endPtMLAT', 'endPtNormMLT',\
               'goodFit', 'MLAT', 'normMLT', 'vSaps', 'velSTD', 'date',\
               'dtStr', 'hour', 'minute', 'dst_date', 'dst_index', 'dst_bin',\
               'datetimeStr', 'AE', 'AL', 'AO', 'AU']
# velsDataDF.head()
#### In this block we load Velocity data ####
#### In this block we load Velocity data ####
#### In this block we load Velocity data ####

In [5]:
# Filter out some values where number of datapoints are pretty low.
countDF = velsDataDF.groupby([ "normMLT", "MLAT" ]).size().reset_index()
countDF.columns = [ "normMLT", "MLAT", "count" ]
# Choose only columns which have atleast 100 points
countDF = countDF[ countDF["count"] >= numPointsCutoffMLTMLAT ].reset_index(drop=True)
# Merge with velsDataDF to filter out unwanted values
velsDataDF = pandas.merge( velsDataDF, countDF,\
                          on=["normMLT", "MLAT"], how='inner' )
velsDataDF.to_csv("../data/processed-vels-geomag.txt", sep=' ', index=False)
print velsDataDF.columns.tolist()
velsDataDF.head()

['azim', 'azimStd', 'delMLT', 'endPtMLAT', 'endPtNormMLT', 'goodFit', 'MLAT', 'normMLT', 'vSaps', 'velSTD', 'date', 'dtStr', 'hour', 'minute', 'dst_date', 'dst_index', 'dst_bin', 'datetimeStr', 'AE', 'AL', 'AO', 'AU', 'count']


Unnamed: 0,azim,azimStd,delMLT,endPtMLAT,endPtNormMLT,goodFit,MLAT,normMLT,vSaps,velSTD,...,minute,dst_date,dst_index,dst_bin,datetimeStr,AE,AL,AO,AU,count
0,-3.387108,,,63.05,-0.9,False,63.0,0.0,902.535876,,...,24,2012-10-23 07:00:00,4.0,"(-10, 10]",20121023-07-24,121,-98,-38,23,284
1,-3.516834,,,63.04,-0.66,False,63.0,0.0,659.606637,,...,26,2012-10-23 07:00:00,4.0,"(-10, 10]",20121023-07-26,116,-100,-42,16,284
2,-9.533716,,,63.12,-0.69,False,63.0,0.0,704.274601,,...,28,2012-10-23 07:00:00,4.0,"(-10, 10]",20121023-07-28,106,-87,-34,19,284
3,-18.011539,3.652241,1.0,,,True,63.0,0.0,441.631334,57.540267,...,8,2011-09-20 06:00:00,-24.0,"(-25, -10]",20110920-06-08,110,-52,3,58,284
4,-18.011539,3.652241,1.0,,,True,63.0,0.0,441.631334,57.540267,...,8,2011-09-20 06:00:00,-24.0,"(-25, -10]",20110920-06-08,110,-52,3,58,284
