In [1]:
import pandas
import datetime
import numpy
import scipy.optimize
import seaborn as sns
import matplotlib.pyplot as plt
import os
import time
import bs4
import urllib
from matplotlib.colors import ListedColormap
from matplotlib.colors import Normalize
from matplotlib import ticker
%matplotlib inline

In [2]:
# Some constants
cutOffMLTMLATUniqCnt = 4

In [3]:
velsDataDF = pandas.read_csv( "../data/processed-vels-geomag-fin.txt", sep=' ',\
                            infer_datetime_format=True,\
                            parse_dates=["date"])
# Get actual SAPS azimuths
velsDataDF["azim"] = -90.-velsDataDF["azim"]
velsDataDF.head()

Unnamed: 0,azim,azimStd,delMLT,goodFit,MLAT,normMLT,vSaps,velSTD,endPtMLAT,endPtNormMLT,...,dst_index,dst_bin,datetimeStr,AE,AL,AO,AU,count,maxCount,probOcc
0,-79.412573,1.794202,0.5,True,62.0,-6.0,306.738497,11.198685,62.06,-6.3,...,-44.0,"(-50, -25]",20121014-01-00,500,-343,-93,157,470,2562,0.18345
1,-76.475692,1.486233,1.0,True,62.0,-6.0,301.958739,8.617099,62.07,-6.29,...,-44.0,"(-50, -25]",20121014-01-02,490,-341,-96,149,470,2562,0.18345
2,-78.295769,1.7711,0.5,True,62.0,-6.0,328.832509,13.365574,62.07,-6.32,...,-44.0,"(-50, -25]",20121014-01-04,527,-371,-108,156,470,2562,0.18345
3,-78.597362,1.525368,1.0,True,62.0,-6.0,357.951508,12.264929,62.07,-6.35,...,-44.0,"(-50, -25]",20121014-01-06,515,-366,-109,149,470,2562,0.18345
4,-78.413884,1.070948,1.0,True,62.0,-6.0,406.549966,10.616775,62.08,-6.4,...,-44.0,"(-50, -25]",20121014-01-08,527,-373,-110,154,470,2562,0.18345


In [4]:
# Filter out some values
# When there aren't good number of measurements at lats
# discard those values
cntMLTPntsDF = velsDataDF[ ["dst_bin", "normMLT", "MLAT"] \
                    ].groupby( ["dst_bin", "normMLT"] \
                    )["MLAT"].nunique().reset_index()
cntMLTPntsDF.columns = ["dst_bin", "normMLT", "MLATCntUnq"]
cntMLTPntsDF = cntMLTPntsDF[ cntMLTPntsDF["MLATCntUnq"] >=\
                    cutOffMLTMLATUniqCnt ][ ["dst_bin", "normMLT"]\
                        ].reset_index(drop=True)
velsDataDF = pandas.merge( velsDataDF, cntMLTPntsDF,\
                    on=["dst_bin", "normMLT"], how="inner" )

In [5]:
# Work with AE bins
# Create AE bins
aeBins = [ 0, 500, 1000, 2000 ]
velsDataDF = pandas.concat( [ velsDataDF, \
                    pandas.cut( velsDataDF["AE"], \
                               bins=aeBins ) ], axis=1 )
velsDataDF.columns = ['azim', 'azimStd', 'delMLT',\
                      'goodFit', 'MLAT', 'normMLT',\
                      'vSaps', 'velSTD', 'endPtMLAT',\
                      'endPtNormMLT', 'date', 'dtStr',\
                      'hour', 'minute', 'dst_date',\
                      'dst_index', 'dst_bin', 'datetimeStr',\
                      'AE', 'AL', 'AO', 'AU', 'count',\
                      'maxCount', 'probOcc', 'AE_bin']
velsDataDF.head()

Unnamed: 0,azim,azimStd,delMLT,goodFit,MLAT,normMLT,vSaps,velSTD,endPtMLAT,endPtNormMLT,...,dst_bin,datetimeStr,AE,AL,AO,AU,count,maxCount,probOcc,AE_bin
0,-79.412573,1.794202,0.5,True,62.0,-6.0,306.738497,11.198685,62.06,-6.3,...,"(-50, -25]",20121014-01-00,500,-343,-93,157,470,2562,0.18345,"(0, 500]"
1,-76.475692,1.486233,1.0,True,62.0,-6.0,301.958739,8.617099,62.07,-6.29,...,"(-50, -25]",20121014-01-02,490,-341,-96,149,470,2562,0.18345,"(0, 500]"
2,-78.295769,1.7711,0.5,True,62.0,-6.0,328.832509,13.365574,62.07,-6.32,...,"(-50, -25]",20121014-01-04,527,-371,-108,156,470,2562,0.18345,"(500, 1000]"
3,-78.597362,1.525368,1.0,True,62.0,-6.0,357.951508,12.264929,62.07,-6.35,...,"(-50, -25]",20121014-01-06,515,-366,-109,149,470,2562,0.18345,"(500, 1000]"
4,-78.413884,1.070948,1.0,True,62.0,-6.0,406.549966,10.616775,62.08,-6.4,...,"(-50, -25]",20121014-01-08,527,-373,-110,154,470,2562,0.18345,"(500, 1000]"


In [6]:
# Get season data
def get_season(row):
        currMonth = row["date"].month
#         return currMonth
        if ( (currMonth >= 11) or (currMonth <= 2) ):
            return "winter"#-1
        elif ( (currMonth >= 5) and (currMonth <= 8) ):
            return "summer"#1
        else:
            return "equinox"
        
velsDataDF["season"] = velsDataDF.apply( get_season, axis=1 )
velsDataDF.head()

Unnamed: 0,azim,azimStd,delMLT,goodFit,MLAT,normMLT,vSaps,velSTD,endPtMLAT,endPtNormMLT,...,datetimeStr,AE,AL,AO,AU,count,maxCount,probOcc,AE_bin,season
0,-79.412573,1.794202,0.5,True,62.0,-6.0,306.738497,11.198685,62.06,-6.3,...,20121014-01-00,500,-343,-93,157,470,2562,0.18345,"(0, 500]",equinox
1,-76.475692,1.486233,1.0,True,62.0,-6.0,301.958739,8.617099,62.07,-6.29,...,20121014-01-02,490,-341,-96,149,470,2562,0.18345,"(0, 500]",equinox
2,-78.295769,1.7711,0.5,True,62.0,-6.0,328.832509,13.365574,62.07,-6.32,...,20121014-01-04,527,-371,-108,156,470,2562,0.18345,"(500, 1000]",equinox
3,-78.597362,1.525368,1.0,True,62.0,-6.0,357.951508,12.264929,62.07,-6.35,...,20121014-01-06,515,-366,-109,149,470,2562,0.18345,"(500, 1000]",equinox
4,-78.413884,1.070948,1.0,True,62.0,-6.0,406.549966,10.616775,62.08,-6.4,...,20121014-01-08,527,-373,-110,154,470,2562,0.18345,"(500, 1000]",equinox


In [12]:
seasonDF = velsDataDF[ [ "season", "dst_index", "AE", "vSaps", "AE_bin", "normMLT" ]\
                     ].groupby( ["season", "AE_bin", "normMLT"] ).median().reset_index()
# selAEBinDF = seasonDF[ seasonDF["AE_bin"] == "(0, 500]" ]
# sns.barplot(x="season", y="vSaps", data=selAEBinDF)
seasonDF.head()

Unnamed: 0,season,AE_bin,normMLT,dst_index,AE,vSaps
0,equinox,"(0, 500]",-7.0,-86.0,365.0,556.148107
1,equinox,"(0, 500]",-6.0,-70.0,346.0,482.526133
2,equinox,"(0, 500]",-5.0,-49.0,328.0,519.281734
3,equinox,"(0, 500]",-4.0,-42.0,312.0,524.141189
4,equinox,"(0, 500]",-3.0,-31.0,269.0,476.6192


In [18]:
# sns.factorplot(x="normMLT", y="vSaps", hue="season", data=seasonMLTGrpDF, hue_order=season_order)
velsDataDF[ (velsDataDF["AE"] < 500.) & (velsDataDF["dst_index"] > -25.) ]\
    [["datetimeStr", "AE", "dst_index"]].head(50)

Unnamed: 0,datetimeStr,AE,dst_index
100894,20121023-07-10,78,4.0
100895,20121023-07-14,73,4.0
100896,20121023-07-16,73,4.0
100897,20121023-07-18,80,4.0
100898,20121023-07-24,121,4.0
100899,20120327-07-00,80,-9.0
100900,20140907-06-02,261,-6.0
100901,20140907-06-02,261,-6.0
100902,20140907-06-04,261,-6.0
100903,20140907-06-04,261,-6.0
