# Open Exoplanet Catalogue for WebGL in JSON format
See: http://openexoplanetcatalogue.com/

Mark SubbaRao wrote the initial Python Notebook to create the raw file.  Aaron Geller converted this to output a JSON file.

The file contains
x,y,z(Galactic in pc), semi-major axis factor, distance(ly), yeardiscovered, discovery method, size class, period, ring Info, star name

In [None]:
from astropy import units as u
from astropy.coordinates import SkyCoord
from astropy.coordinates import Distance, Angle
from astropy.table import Table, Column
from astropy.io import ascii
import math

import pandas as pd
import os
import urllib2
import requests
from bs4 import BeautifulSoup

In [None]:
import xml.etree.ElementTree as ET, urllib2, gzip, io
url = "https://github.com/OpenExoplanetCatalogue/oec_gzip/raw/master/systems.xml.gz"
oec = ET.parse(gzip.GzipFile(fileobj=io.BytesIO(urllib2.urlopen(url).read())))

In [None]:
# This will eith return a tag value or if it does not exist, its upperlimit, used for mass and radius
def getValorUL(tag):
    if (tag is None):
        value=-999
    else:
        tagTxt = tag.text
        if (tagTxt is None):
            tagLim= tag.get('upperlimit')
            if (tagLim is None):
                value = -999
            else:
                value=float(tagLim)
        else:
            value = float(tagTxt)
    return(value)

spectraltypes_temp_radii = {'O' : (40000,10.) , 'B': (20000,3.0), 'A' : (8500, 1.5), 'F' : (6500, 1.3), 'G' : (5500, 1.0), 'K': (4000, 0.8) , 'M' : (3000, 0.5) }

def isHabitable(xmlPair):
    planet, star = xmlPair
    maxa = 0
    if star is None:
        return False # no binary systems (yet)
    spectralTypeMain = getText(star,"./spectraltype","G")[0]
    if spectralTypeMain not in spectraltypes_temp_radii :
        return False # unsupported spectral type 
    semimajoraxis = getFloat(planet,"./semimajoraxis")
    if semimajoraxis is None:
        hostmass = getFloat(star,"./mass",1.)
        period = getFloat(planet,"./period",365.25)
        semimajoraxis = pow(pow(period/6.283/365.25,2)*39.49*hostmass,1.0/3.0)

    temperature = getFloat(star,"./temperature")

    if temperature is None:
        temperature = spectraltypes_temp_radii[spectralTypeMain][0]

    rel_temp = temperature - 5700.

    stellarMass = getFloat(star,"./mass")
    if stellarMass is None:
        stellarMass = 1.

    stellarRadius = getFloat(star,"./radius")
    if stellarRadius is None or stellarRadius<0.01:
        stellarRadius = 1.
        if spectralTypeMain in spectraltypes_temp_radii:
            stellarRadius = spectraltypes_temp_radii[spectralTypeMain][1]


    if stellarMass>2.:
        luminosity = 1.5 * pow(stellarMass,3.5)
    elif stellarMass>0.43:
        luminosity = pow(stellarMass,4.)
    else:
        luminosity = 0.23 * stellarMass**2.3

    # Ref: http://adsabs.harvard.edu/abs/2007A%26A...476.1373S
    #using the Venus and Mars values
    HZinner2 = (0.72-2.7619e-5*rel_temp-3.8095e-9*rel_temp*rel_temp) *math.sqrt(luminosity);
    HZouter2 = (1.77-1.3786e-4*rel_temp-1.4286e-9*rel_temp*rel_temp) *math.sqrt(luminosity);

    #print(semimajoraxis, temperature, rel_temp, stellarMass, stellarRadius, luminosity, HZinner2, HZouter2)
    
    if semimajoraxis>HZinner2 and semimajoraxis<HZouter2:
        return True
    return False

def getText(obj,tag,default=None):
    v = obj.find(tag)
    if v is not None:
        return v.text
    return default 
def getFloat(obj,tag,default=None):
    v = obj.find(tag)
    if v is not None:
        try:
            return float(v.text)
        except:
            return default
    return default

#attempt to use the name to create a link to the open exoplanet catalogue
def searchURL(name):
#https://stackoverflow.com/questions/33427504/how-can-i-scrape-the-first-link-of-a-google-search-with-beautiful-soup
    searchFor = "Open Exoplanet Catalogue " + name
    goog_search = "https://www.google.co.uk/search?sclient=psy-ab&client=ubuntu&hs=k5b&channel=fs&biw=1366&bih=648&noj=1&q=" + searchFor
    r = requests.get(goog_search)

    soup = BeautifulSoup(r.text, "html.parser")
    #sometimes this seems to break
    
    return "http://"+soup.find('cite').text.replace("...","planet")
    
def makeURL(name):
    link = "http://www.openexoplanetcatalogue.com/planet/" + name.replace(" ", "%20") + "%20b/"
        
    retry = False
    
#https://stackoverflow.com/questions/16778435/python-check-if-website-exists   
    try:
        urllib2.urlopen(link)
    except urllib2.HTTPError, e:
        #link = "http://www.openexoplanetcatalogue.com/systems/"
        print(name, link, e.code)
        link = searchURL(name)
        print(link + "\n")
        retry = True
    except urllib2.URLError, e:
        #link = "http://www.openexoplanetcatalogue.com/systems/"
        print(name, link, e.args)
        link = searchURL(name)
        print(link + "\n")
        retry = True
        
    if (retry):
        try:
            urllib2.urlopen(link)
        except urllib2.HTTPError, e:
            print(name, link, e.code)
            link = "http://www.openexoplanetcatalogue.com/systems/"
            print(link + "\n")

        except urllib2.URLError, e:
            print(name, link, e.args)
            link = "http://www.openexoplanetcatalogue.com/systems/"
            print(link + "\n")
            
    if (link.find("xml") >= 0 or link == "http://www.openexoplanetcatalogue.com/"):
        print("bad", link)
        link = "http://www.openexoplanetcatalogue.com/systems/"
        print(link + "\n")

    return link


In [None]:
#create the JSON file, and then add the name of the variable (parts) that we want
def createJSON(data, fname, vname):
    print("writing JSON file : " + fname)
    pd.Series(data).to_json(fname, orient='index') 

#    pd.Series(data).to_json(fname+'0', orient='index') 
#     with open(fname+'0', 'r') as src:
#         with open(fname, 'w') as dest:
#             for line in src:
#                 dest.write('%s%s' % ("var "+vname+" = ", line))
                
#     os.remove(fname+'0')
    print("done")

In [None]:
exoplanetData = dict()
exoplanetData["x"] = []
exoplanetData["y"] = []
exoplanetData["z"] = []
exoplanetData["afac"] = []
exoplanetData["yrDiscovered"] = []
exoplanetData["method"] = []
exoplanetData["class"] = []
exoplanetData["period"] = []
exoplanetData["ringInfo"] = []
exoplanetData["name"] = []
exoplanetData["URL"] = []


u.imperial.enable() # to enable light years
methodDict = {'N/A':0,'RV':5,'imaging':1,'microlensing':2,'timing':3,'transit':4}
for system in oec.findall(".//system"):
    planets = system.findall(".//planet")
    star = system.find(".//star")
    nPlanets=len(planets)
    raStr=system.findtext("rightascension")
    decStr=system.findtext("declination")
    dist=system.findtext("distance")
    if not(raStr is None or decStr is None ):
        ra=Angle(raStr+' hours')
        dec=Angle(decStr+'degrees')
        d2 = 1000.0 if dist is None else dist
        coord=SkyCoord(ra,dec,distance=float(d2)*u.pc, frame='icrs')
        amax=0
        Pmax=0
        name = '_'
        if (star is not None):
            name = star.findtext("name")#.replace(" ", "_")
        #if (name == "TRAPPIST-1"):
        if (True):
            for planet in planets:
                a=planet.findtext("semimajoraxis")
                P=planet.findtext("period")
                try:
                    float(a)
                    if (float(a)>amax):
                        amax=float(a)
                except:
                    pass
                try:
                    float(P)
                    if (float(P)>Pmax):
                        Pmax=float(P)
                except:
                    pass

            for i,planet in enumerate(planets):
                period=planet.findtext("period")
                if (period is None or period==''):
                    period = -999
                    afac =1
                else:
                    afac = pow(float(period)/Pmax,2./3.)
                discovery=planet.findtext("discoveryyear")
                discoverymethod=planet.findtext("discoverymethod")
                a=planet.findtext("semimajoraxis")
                massTag=planet.find("mass")
                mass=getValorUL(massTag)
                sizeTag=planet.find("radius")
                size = getValorUL(sizeTag)
                ringInfo =nPlanets+float(i)/100
                if (dist is None):
                    ringInfo=-1.*ringInfo
                if (size >0):
                    rEarth = float(size)*11.2 #convert from Jupiter to Earth radaii
                    if (rEarth >= 0 and rEarth<0.75):
                        sClass=0 #smaller
                    elif (rEarth >= 0.75 and rEarth<1.25):
                        sClass =1 #Earth sized
                    elif (rEarth >= 1.25 and rEarth<2.0):
                        sClass=2 #Super Earth sized
                    elif (rEarth >= 2.0 and rEarth<6.0):
                        sClass=3 #Neptune sized
                    elif (rEarth >= 6.0 and rEarth<15.0):
                        sClass=4 #JupiterSized
                    else:
                        sClass=5 #larger
                elif (mass>0):
                    m=mass*318.8
                    if (m >= 0 and m<0.6):
                        sClass=0 #smaller
                    elif (m >= 0.6 and m<1.5):
                        sClass =1 #Earth sized
                    elif (m >= 1.5 and m<5.0):
                        sClass=2 #Super Earth sized
                    elif (m >= 5.0 and m<60.0):
                        sClass=3 #Neptune sized
                    elif (m >=60.0 and m<1000.0):
                        sClass=4 #JupiterSized
                    else:
                        sClass=5 #larger
                else:
                    sClass=-1
                try:
                    discYear=float(discovery)
                except:
                    discYear=2017
                if (isHabitable((planet,star))):
                    discYear*=-1
                if (discoverymethod is None):
                    discoverymethod='N/A'


                # only write if confirmed planet            
                for plist in planet.findall('list'):
                    #if (name == "TRAPPIST-1"): print(name, plist.text)
                    if (plist.text == 'Confirmed planets'):
                        exoplanetData["x"].append(coord.galactic.cartesian.x.value)
                        exoplanetData["y"].append(coord.galactic.cartesian.y.value)
                        exoplanetData["z"].append(coord.galactic.cartesian.z.value)
                        exoplanetData["afac"].append(afac)
                        exoplanetData["yrDiscovered"].append(discYear)
                        exoplanetData["method"].append(methodDict[discoverymethod])
                        exoplanetData["class"].append(sClass)
                        exoplanetData["period"].append(period)
                        exoplanetData["ringInfo"].append(ringInfo)
                        exoplanetData["name"].append(name)
                        exoplanetData["URL"].append(makeURL(name))

                    


In [None]:
createJSON(exoplanetData, 'data/OpenExoplanetCatalog_Nov2017.json', 'exoplanets')


In [None]:
#second check for the URLs
for i, u in enumerate(exoplanetData["URL"]):
    if (exoplanetData["URL"][i] == "http://www.openexoplanetcatalogue.com/systems/"):
        exoplanetData["URL"][i] = makeURL(exoplanetData["name"][i])


## Some information about how to access the exoplanet data stored in oec

In [None]:
i=0
for system in oec.findall(".//system"):
    planet= system.find(".//planet")
    star = system.find(".//star")
    if (isHabitable((planet,star))):
        i=i+1
print(i)

system=oec.find(".//system")
planet = system.find(".//planet")
star = system.find(".//star")
isHabitable((system,star,planet))
    

In [None]:
for planet in system:
    print(planet)

In [None]:
for child in planet:
    print (child.tag,child.text,child.attrib,planet.findtext("name"))

In [None]:
for child in star:
    print(child.tag, child.text)
print star.findtext('name')

In [None]:
for e in planet.getchildren():
    if e.tag == "mass":
        print (e.get("errorminus"))

In [None]:
t =planet.find("period")
t.get('errorplus')