In [1]:
import pandas
import datetime
import numpy
from scipy.optimize import curve_fit
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from matplotlib import ticker
%matplotlib inline
from matplotlib import rcParams

In [2]:
datFileName = "../data/processedSaps.txt"
sapsDataDF = pandas.read_csv(datFileName, sep=' ')
# # add dst_bins
dstBins = [ -150, -75, -50, -25, -10, 10 ]
sapsDataDF = pandas.concat( [ sapsDataDF, \
                    pandas.cut( sapsDataDF["dst_index"], \
                               bins=dstBins ) ], axis=1 )
sapsDataDF.columns = [ "dateStr", "sapsLat", "sapsMLT", \
                      "sapsVel", "radId", "poesLat", "poesMLT", \
                      "dst_date", "dst_index", "time", "dst_bin" ]
sapsDataDF = sapsDataDF.drop(["poesLat", "poesMLT"], 1)
sapsDataDF["sapsMLTRounded"] = sapsDataDF["sapsMLT"].map(lambda x: round(x) )
sapsDataDF.head()

Unnamed: 0,dateStr,sapsLat,sapsMLT,sapsVel,radId,dst_date,dst_index,time,dst_bin,sapsMLTRounded
0,20110107,56.5,17.7543,308.2077,33.0,2011-01-07 00:00:00,-18.0,0,"(-25, -10]",18.0
1,20110107,55.5,18.0147,224.1588,33.0,2011-01-07 00:00:00,-18.0,0,"(-25, -10]",18.0
2,20110107,56.5,17.8749,307.4328,33.0,2011-01-07 00:00:00,-18.0,0,"(-25, -10]",18.0
3,20110107,55.5,18.1324,222.4787,33.0,2011-01-07 00:00:00,-18.0,0,"(-25, -10]",18.0
4,20110107,56.5,17.9955,305.4201,33.0,2011-01-07 00:00:00,-18.0,0,"(-25, -10]",18.0


In [19]:
sapsNumRadsSer = sapsDataDF.groupby(["dateStr"]).agg( {"radId": pandas.Series.nunique} )

In [68]:
# get times, dates when the number of data points observed
# is greater than 50 points at a given time.
sapsDateTimeCount = sapsDataDF.groupby(["dateStr", "time"]).count()
sapsDateTimeCount = sapsDateTimeCount[ sapsDateTimeCount["sapsLat"] > 50 ].reset_index()
sapsDateTimeCount = sapsDateTimeCount[['dateStr','time', 'sapsLat']]
sapsDateTimeCount.columns = [ "dateStr", "time", "nPoints" ]
# Now get data points when atleast 4 radars were making observations
sapsUniqRadsCnt = sapsDataDF.groupby(["dateStr","time"]).agg( {"radId": pandas.Series.nunique} )
sapsUniqRadsCnt = sapsUniqRadsCnt[ sapsUniqRadsCnt >= 4 ].dropna().reset_index()
sapsUniqRadsCnt.columns = [ "dateStr", "time", "nRads" ]
# Merge both to get good dates where we can get velocity measurements
selVelDatesDF = pandas.merge( sapsUniqRadsCnt, \
                             sapsDateTimeCount, \
                             on=[ "dateStr", "time" ], how="inner" )
selVelDatesDF.head()
# save as csv file
# selVelDatesDF.to_csv("../data/sapsVelDatesTimes.txt", sep=' ', index=False)

Unnamed: 0,dateStr,time,nRads,nPoints
0,20110205,230,4.0,98
1,20110205,300,4.0,88
2,20110205,330,5.0,93
3,20110302,100,4.0,75
4,20110302,200,4.0,81


In [None]:
# Merge the selected date time DF with sapsDataDF
# to get the radar ids as well
selVelDatesDF = pandas.merge( selVelDatesDF, sapsDataDF, on=["dateStr","time"] )
selVelDatesDF = selVelDatesDF[ [ 'dateStr','time', 'radId', 'nRads', 'nPoints' ] ]
selVelDatesDF.head()