This is the first tutorial on using Pandas data frames for professional astronomy.  In this example, we import common types of astronomical data into Pandas DataFrames.  -jrigby, 10/2016

In [22]:
import pandas
from astropy.io import ascii
from astropy.table import Table
from astropy.utils.data import download_file
import subprocess

In [19]:
# Read a simple ascii text file into Pandas
!head -n 5 Sample_data/zody_and_ism.txt
df1 = pandas.read_table("Sample_data/zody_and_ism.txt", delim_whitespace=True, comment="#")
df1.head(2)

# Simple text file of zodiacal emission and ISM (Galactic) emission for 
# the so-called "1.2 min zody field", used for JWST background calculations
# Wave is wavelength in Micron. Zody and ISM are in MegaJanskies per sterradian (MJy/SR)
wave zody   ISM
0.5 0.130794 0.000470


Unnamed: 0,wave,zody,ISM
0,0.5,0.130794,0.00047
1,0.6,0.167639,0.000725


Pandas normally expects the row of column names to be uncommented (csv format).  For some reason, astronomers often preface the column names with a "#".  I can't figure out how to make pandas.read_table ignore the "#", so here's a stupid workaround. (Thanks to C. Pacifici for pointing out the problem.)

In [37]:
myfile = "Sample_data/weirdheader.txt"
print subprocess.check_output("head -3 " + myfile, shell=True)
subprocess.check_output("sed s/\#// < " + myfile + "> /tmp/workaround", shell=True)
#!sed 's/#//' < Sample_data/weirdheader.txt > /tmp/workaround
df = pandas.read_table("/tmp/workaround", sep='\s+', skipinitialspace=True)
# Jane has coded this into jrr.util.strip_pound_before_colnames():
df.head(2)

#wave zody   ISM
0.5 0.130794 0.000470
0.6 0.167639 0.000725



Unnamed: 0,wave,zody,ISM
0,0.5,0.130794,0.00047
1,0.6,0.167639,0.000725


In [3]:
# Read a simple .csv (comma-separated variable) file into Pandas
df2 = pandas.read_csv("Sample_data/thermal_curve_jwst_jrigby_1.1.csv", comment="#", names=("wave", "bkg"))
df2.head()

Unnamed: 0,wave,bkg
0,5.0,4.9e-05
1,5.1,6.9e-05
2,5.2,9.6e-05
3,5.3,0.000131
4,5.4,0.000178


In [4]:
# Read a machine-readable table from an ApJ paper into Pandas, via astropy.Table
file2 = "http://iopscience.iop.org/2041-8205/814/1/L6/suppdata/apjl521409t1_mrt.txt"
temp_table = ascii.read(file2) # this automagically gets the format right.
df3 = temp_table.to_pandas()  # Convert from astropy Table to Pandas Data Frame.  Needs astropy 1.2
df3.head(1)

Downloading http://iopscience.iop.org/2041-8205/814/1/L6/suppdata/apjl521409t1_mrt.txt [Done]


Unnamed: 0,Name,z,r_z,f_[O/H],[O/H],E_[O/H],e_[O/H],r_[O/H],f_WrLya,WrLya,e_WrLya,r_WrLya,f_WCIII,WCIII,e_WCIII,r_WCIII,Sample,Filter,MagEmag,r_MagEmag
0,RCSGA 032727-13260 Knot E,1.703745,2.0,,8.34,0.02,0.02,W14,>,-1.2,-99.0,R14,,-2.0,0.14,R14,MagE,g,19.15,W10


In [5]:
# Read a binary .fits table into Pandas, via astropy.Table
stsci_file = "Sample_data/example_bkgs.fits"
tab = Table.read(stsci_file)
stsci_df = tab.to_pandas()
# If you're feeling fancy, you can do it in one line:
stsci_df1 = Table.read(stsci_file).to_pandas()
stsci_df1.tail()

Unnamed: 0,wavelength,background,thermal,straylight,infield
83,26.5,1427.874634,1401.599976,2.51152,23.763126
84,27.5,1834.911255,1809.349976,2.450313,23.110891
85,28.5,2328.179688,2303.350098,2.387528,22.442274
86,29.5,2918.189453,2894.100098,2.32399,21.765467
87,30.5,184.821243,161.739136,1.994993,21.087128


In [6]:
# Note, an endian-ness mismatch between FITS and numpy can cause
# gruesome errrors if you import this WRONG way:
#   (mcat, mcat_hdr) = fits.getdata(mastercat_file, header=True) #WRONG
#   pmcat = pandas.DataFrame.from_records(mcat)  # WRONG
# USE .to_pandas() to avoid this.  See https://github.com/astropy/astropy/issues/1156

In [7]:
# You can read really big binary fits tables.  May take a while to download.
# Here, let's read the 3D-HST master catalog.
download_file("http://monoceros.astro.yale.edu/RELEASE_V4.1.5/3dhst.v4.1.5.master.fits.gz")
#mastercat_file = "3dhst.v4.1.5.master.fits.gz"
#pmcat = Table.read(mastercat_file).to_pandas()

 [Failed]


URLError: <urlopen error timed out>