In [1]:
import pandas
import datetime
import numpy
import bs4
import urllib
import os

In [2]:
### Below are some useful urls to download data (ephemeris) files
### These may not be directly used in the code but can be used to
### download data that will be used in the code. Basically, noting
### them down here so that I wouldn't loose them!
# Info link
infoLink = "http://rbspgway.jhuapl.edu/analysis_models"
# TEST RBSP download link
testUrl = "https://www.rbsp-ect.lanl.gov/data_pub/rbspb/MagEphem/def/2013/rbspb_def_MagEphem_TS04D_20131231_v1.0.0.txt"
# TS07D Link:
ts07DUrl = "http://rbspgway.jhuapl.edu/SGMagEphem"

In [3]:
# THE TS07D Ephemeris files are in a folder by date
# Loop through the files in the folder, read the files,
# process the data (remove unwanted cols) and output 
# the data into a new file. That can be used by IDL to
# calculate AACGM coords.
# set the base directory
baseDir = "/home/bharat/Documents/code/rbsp-locs/"
# Loop through the base directory and sub directories
# and get a list of ephemeris files that we need!
fileList = []
for root, dirs, files in os.walk(baseDir):
    for fName in files:
        # Get only those files which start with 
        # the words "ts07d". These contain rbsp info!
        if fName.startswith("ts07d"):
            fileList.append( root + "/" + fName )
print "num files found--->", len(fileList)

num files found---> 1687


In [5]:
# Now loop through the files and work on getting the data
# setup a o/p file to write just the required params!
opFile = baseDir + "/rbspOp.txt"
cnt = 0
# Before appending to the file! delete it if it 
# exists already. We dont want to append to old data!
if os.path.isfile( opFile ):
    os.remove( opFile )
with open(opFile, 'a') as opf:
    for fn in fileList:
        currData = pandas.read_csv(fn, delim_whitespace=True)
        # Also need to check if curr file belongs 
        # to rbspa or rbspb. This can then be used
        # to identify which spacecraft Fps belong to!
        if "rbspa" in fn:
            currData["sat"] = "rbspa"
        else:
            currData["sat"] = "rbspb"
        currData["dtObj"] = pandas.to_datetime(currData["Time"])
        # Add a few additional datetime related columns
        currData["drvd_date"] = [ x.strftime("%Y%m%d") for x in currData["dtObj"] ]
        currData["drvd_time"] = [ x.strftime("%H%M") for x in currData["dtObj"] ]
        currData["drvd_year"] = [ x.year for x in currData["dtObj"] ]
        currData["drvd_mon"] = [ x.month for x in currData["dtObj"] ]
        currData["drvd_day"] = [ x.day for x in currData["dtObj"] ]
        # now we'll need only a few columns, just choose them
        currData = currData[ ['drvd_date', 'drvd_time',\
                              'drvd_year', 'drvd_mon',\
                              'drvd_day', 'Xgeo', 'Ygeo',\
                              'Zgeo', 'GeodLat', 'GeodLon',\
                              'GeodHeight', 'sat'] ]
        # Write the DF data to the op file    
        currData.to_csv(opf, header=False, index=False, sep=' ')