In [1]:
import netCDF4
import pandas
import datetime
import numpy

In [2]:
minCutoffFitLat = 45.
delTimeCutOffNrstSat = 45 # min
mlonDiffOtrEndCutoff = 20.

In [3]:
fileList = [ "../poes_n15_20130302_proc.nc",\
                "../poes_n18_20130302_proc.nc",\
                "../poes_m01_20130302_proc.nc",\
                "../poes_n19_20130302_proc.nc",\
                "../poes_m01_20130302_proc.nc",\
                "../poes_n16_20130302_proc.nc",\
                "../poes_n17_20130302_proc.nc" ]
poesAllEleDataDF = pandas.DataFrame( columns =  ["timestamp", "date", "aacgm_lat_foot",\
                         "aacgm_lon_foot", "MLT", "log_ele_flux", "sat"] )
poesAllProDataDF = pandas.DataFrame( columns =  ["timestamp", "date", "aacgm_lat_foot",\
                         "aacgm_lon_foot", "MLT", "log_pro_flux", "sat"] )
for f in fileList:
    print "reading file-->", f
    # read variable from the netCDF files
    poesRawData = netCDF4.Dataset(f)
    poesDF = pandas.DataFrame( poesRawData.variables['time'][:], columns=[ "timestamp" ] )
    poesDF['date'] = pandas.to_datetime(poesDF['timestamp'], unit='ms')
    poesDF["alt"] = poesRawData.variables['alt'][:]
    poesDF["aacgm_lat_foot"] = poesRawData.variables['aacgm_lat_foot'][:]

    poesDF["aacgm_lon_foot"] = poesRawData.variables['aacgm_lon_foot'][:]
    poesDF["MLT"] = poesRawData.variables['MLT'][:]
    # round of to 2 decimals
    poesDF['alt'] = [ round( x, 2 ) for x in poesDF['alt']]
    poesDF['aacgm_lat_foot'] = [ round( x, 2 ) for x in poesDF['aacgm_lat_foot']]
    poesDF['aacgm_lon_foot'] = [ round( x, 2 ) for x in poesDF['aacgm_lon_foot']]
    poesDF['MLT'] = [ round( x, 2 ) for x in poesDF['MLT']]
    # Add up the fluxes
    poesDF["ted_ele_total_flux"] = poesRawData.variables['ted_ele_tel0_flux_4'][:] +\
            poesRawData.variables['ted_ele_tel0_flux_8'][:] + \
            poesRawData.variables['ted_ele_tel0_flux_11'][:] + \
            poesRawData.variables['ted_ele_tel0_flux_14'][:] + \
            poesRawData.variables['ted_ele_tel30_flux_4'][:] +\
            poesRawData.variables['ted_ele_tel30_flux_8'][:] + \
            poesRawData.variables['ted_ele_tel30_flux_11'][:] + \
            poesRawData.variables['ted_ele_tel30_flux_14'][:]
    poesDF["ted_pro_total_flux"] = poesRawData.variables['ted_pro_tel0_flux_4'][:] +\
            poesRawData.variables['ted_pro_tel0_flux_8'][:] + \
            poesRawData.variables['ted_pro_tel0_flux_11'][:] + \
            poesRawData.variables['ted_pro_tel0_flux_14'][:] + \
            poesRawData.variables['ted_pro_tel30_flux_4'][:] +\
            poesRawData.variables['ted_pro_tel30_flux_8'][:] + \
            poesRawData.variables['ted_pro_tel30_flux_11'][:] + \
            poesRawData.variables['ted_pro_tel30_flux_14'][:]
    poesDF['log_ele_flux'] = [0. if x <= 0. else round( numpy.log10(x), 2 )\
                 for x in poesDF['ted_ele_total_flux']]
    poesDF['log_pro_flux'] = [0. if x <= 0. else round( numpy.log10(x), 2 )\
                 for x in poesDF['ted_pro_total_flux']]
    # the current satellite number
    poesDF["sat"] = f[-19:-17]
    # seperate out electron and proton flux and discard all zeros
    currPoesEleFluxDF = poesDF[poesDF["log_ele_flux"] > 0.][ ["timestamp",\
                     "date", "aacgm_lat_foot", "aacgm_lon_foot", "MLT",\
                     "log_ele_flux", "sat"] ].reset_index(drop=True)
    currPoesProFluxDF = poesDF[poesDF["log_pro_flux"] > 0.][ ["timestamp",\
                     "date", "aacgm_lat_foot", "aacgm_lon_foot", "MLT",\
                     "log_pro_flux", "sat"] ].reset_index(drop=True)
    poesAllEleDataDF = poesAllEleDataDF.append( currPoesEleFluxDF )
    poesAllProDataDF = poesAllProDataDF.append( currPoesProFluxDF )
    # now delete all the rows for prev DFs
    # we don't want to duplicate data
    poesDF = poesDF.drop( poesDF.index )
    currPoesEleFluxDF = currPoesEleFluxDF.drop( currPoesEleFluxDF.index )
    currPoesProFluxDF = currPoesProFluxDF.drop( currPoesProFluxDF.index )
# create a date and time columns
poesAllEleDataDF["dateStr"] = poesAllEleDataDF["date"].map(lambda x: x.strftime('%Y%m%d'))
poesAllEleDataDF["time"] = poesAllEleDataDF["date"].map(lambda x: x.strftime('%H%M'))
poesAllProDataDF["dateStr"] = poesAllProDataDF["date"].map(lambda x: x.strftime('%Y%m%d'))
poesAllProDataDF["time"] = poesAllProDataDF["date"].map(lambda x: x.strftime('%H%M'))

reading file--> ../poes_n15_20130302_proc.nc
reading file--> ../poes_n18_20130302_proc.nc
reading file--> ../poes_m01_20130302_proc.nc
reading file--> ../poes_n19_20130302_proc.nc
reading file--> ../poes_m01_20130302_proc.nc
reading file--> ../poes_n16_20130302_proc.nc
reading file--> ../poes_n17_20130302_proc.nc


In [4]:
timeRange = [ poesAllEleDataDF["date"].min(), poesAllEleDataDF["date"].max() ]
ctime = timeRange[0]
timeInterval=datetime.timedelta(minutes=30)
while ctime <= timeRange[1]:
    ctime += timeInterval
    if abs( ctime - datetime.datetime(2013,3,2,5) ) < datetime.timedelta(minutes=1):
        break
print ctime

2013-03-02 05:00:00.201000


In [5]:
# We only need those times when POES was above minCutoffFitLat(45) MLAT
poesAllEleDataDF = poesAllEleDataDF[ \
                ( abs( poesAllEleDataDF["aacgm_lat_foot"] ) >= minCutoffFitLat )\
                ].reset_index(drop=True)
# We only need a few columns, discard the rest
poesAllEleDataDF = poesAllEleDataDF[ [ 'sat', 'date',\
                        'aacgm_lat_foot', 'aacgm_lon_foot',\
                            'MLT', 'log_ele_flux' ] ]
poesAllEleDataDF["delCtime"] = abs(poesAllEleDataDF["date"] - ctime)
poesAllEleDataDF["delLatFit"] = abs( poesAllEleDataDF["aacgm_lat_foot"] ) -\
                                    abs( minCutoffFitLat )
# We are sorting by sats, dates and lats to pick the nearest time
# when the satellite is between two 45 MLATs
poesAllEleDataDFNth = poesAllEleDataDF[ poesAllEleDataDF["aacgm_lat_foot"]\
                        >= 0. ].sort_values( ['sat', 'date', 'aacgm_lat_foot'],\
                                ascending=True ).reset_index(drop=True).drop_duplicates()
poesAllEleDataDFSth = poesAllEleDataDF[ poesAllEleDataDF["aacgm_lat_foot"]\
                        < 0. ].sort_values( ['sat', 'date', 'aacgm_lat_foot'],\
                                ascending=True ).reset_index(drop=True).drop_duplicates()

In [6]:
# Now we need to pick the satellite path (between two 45 MLATs)
# which is closest to the selected time.!
currNthEleDF = poesAllEleDataDFNth[\
                    ( poesAllEleDataDFNth["delLatFit"] <= 1. )\
                    ].sort_values( ["sat", "delCtime"], ascending=[True, True] )
# Now if the time difference is too large, discard the satellite data
dscrdSatList = currNthEleDF[ ["sat", "delCtime"] ].groupby( "sat" ).min()
dscrdSatList = dscrdSatList[ \
                    dscrdSatList["delCtime"] <= \
                    datetime.timedelta(minutes=delTimeCutOffNrstSat)\
                    ].reset_index()
# only choose the satellites which are nearby
currNthEleDF = currNthEleDF[ \
                currNthEleDF["sat"].isin( \
                    dscrdSatList["sat"].values ) ]
# Now we need to identify the 45 to 45 MLAT path
# Basically the satelllite should start at 45 MLAT
# at one MLT/MLON and reach 45 MLAT at the other 
# extreme end ( different MLON )
# Get the nearest 45 MLAT instance for each satellite,
# substract its MLT with the rest and sort by date asc
# and MLT desc!
nrstSatInstance = currNthEleDF[ ["sat", "delCtime"]\
                    ].groupby( "sat" ).min().reset_index()
# These are the starting times of 45-45 pass
satSelTimes = nrstSatInstance[ ["sat", "delCtime"] ]
satSelTimes.columns = [ ["sat", "delCtime"] ]
satSelTimes = pandas.merge( satSelTimes, \
                currNthEleDF, on=["sat", "delCtime"] )
satSelTimes = satSelTimes[ ["sat", "date"] ]
satSelTimes.columns = [ "sat", "start_time" ]
nrstSatInstance =  pandas.merge( currNthEleDF, \
                        nrstSatInstance, \
                        on=["sat", "delCtime"] )[ ["sat", "aacgm_lon_foot"] ]
nrstSatInstance.columns = [ "sat", "nrstTimeLon" ]
currNthEleDF = pandas.merge( currNthEleDF, nrstSatInstance, on="sat" )
currNthEleDF["delLon"] = abs(currNthEleDF["aacgm_lon_foot"] \
                            - currNthEleDF["nrstTimeLon"]).astype(int)
# Now when the satellite moves to other 45 MLAT
# ( at the opposite end of the Earth). We'll set a
# cutoff to discard the other values
currNthEleDF = currNthEleDF[ currNthEleDF["delLon"] \
                            > mlonDiffOtrEndCutoff \
                           ].reset_index(drop=True)

currNthEleDF = currNthEleDF.sort_values( ["sat", "delCtime", "delLon"],\
                                ascending=[True, True, False] )
# Now get the first row of each SAT group
currNthEleDF = currNthEleDF.groupby("sat").first().reset_index()
satSelTimes = pandas.merge( satSelTimes, currNthEleDF[ ["sat", "date"] ],\
                         on="sat")
satSelTimes.columns = [ "sat", "start_time", "end_time" ]
satSelTimes.head(20)

Unnamed: 0,sat,start_time,end_time
0,1,2013-03-02 05:09:26.746,2013-03-02 05:36:16.744
1,16,2013-03-02 04:55:20.987,2013-03-02 04:28:24.986
2,17,2013-03-02 04:21:50.193,2013-03-02 05:38:46.194
3,18,2013-03-02 04:50:17.132,2013-03-02 03:31:43.132
4,19,2013-03-02 05:08:44.993,2013-03-02 04:42:54.993
