# Add an object to the interactive

*This will take a name and use the information from wikipedia to generate an entry*

In [1]:
import pandas as pd
import numpy as np
import os
import re
import json
import wikipedia
import wptools #https://github.com/siznax/wptools

*Info for WWT*

In [2]:
def getWWTcmd(RA,DEC,view):
    return 'http://tilewall.adlerplanetarium.org:5050/layerApi.aspx?cmd=mode&lookat=Sky&flyto='+str(RA)+','+str(DEC)+','+str(view)+',0,0'

*Info from wikipedia*

In [3]:
def splitString(st):
    digits = np.array([i for i,c in enumerate(st) if (c.isdigit() or c == '.')])
    diff = np.diff(digits)
    loc = np.where(diff > 1)[0]
    #print(digits, diff, loc)
    svals = []
    i0 = digits[0]
    for lo in loc:
        i1 = digits[lo+1]-(diff[lo]-1)
        svals.append(st[i0:i1])
        i0 = digits[loc[0]+1]
    if (len(loc)>0):
        svals.append(st[digits[loc[-1]+1]:digits[-1]+1])  
    else:
        svals.append(st[digits[0]:digits[-1]+1]) 
    return svals

def getCaption(parsetree,img, maxLen = 1000, minLen = 10):
    p1 = 0
    if ('/' in img):
        p1 = img.rfind('/')+1
    imgSearch = img[p1:].replace('_',' ')
    print("\nSEARCHING FOR : ", img, imgSearch)

    #print("PARSETREE :",parsetree)
    caption = ''
    p1 = parsetree.lower().find(imgSearch.lower())
    if (p1 >= 0):
        caption = parsetree[p1+len(imgSearch):]
        #get the full caption, with all the markup
        left = 2
        right = 0
        for i,c in enumerate(caption):
            if (c == '['):
                left += 1
            if (c == ']'):
                right += 1
            if (left == right):
                break
        caption = caption[0:i-1]
        #print("first pass : ",caption)
        #remove the markup
        #for citations
        Ntrial = 0
        while ('{{' in caption and Ntrial < 100): 
            remove = []
            p1 = caption.find('{{')
            p2 = caption.find('}}')
            remove.extend([i+p1+1 for i in range(p2-p1)])
            cap = ''
            for i, c in enumerate(caption):
                if (i not in remove):
                    cap += c
            #print('cap : ', cap, remove)
            caption = cap
            Ntrial += 1
        
        #links to other wikipedia pages
        Ntrial = 0
        while ('[[' in caption and Ntrial < 100): 
            remove = []
            p1 = caption.find('[[')
            p2 = caption.find(']]')
            remove.extend([p1,p1+1, p2,p2+1])
            check = caption[p1:p2]
            #print("checking", check)
            p3 = check.find('|')
            if (p3 > 0):
                remove.extend([i+p1+1 for i in range(p3)])
            cap = ''
            for i, c in enumerate(caption):
                if (i not in remove):
                    cap += c
            #print('cap : ', cap, remove)
            caption = cap
            Ntrial += 1
            
        #other html markup
        Ntrial = 0
        done = False
        #print('caption to here : ', caption)
        while (not done and Ntrial < 100):
            remove = []
            p1 = caption.find('<ext>')
            p2 = caption.find('</ext>')
            if (p1 >=0 and p2 >=0):
                remove.extend([i+p1 for i in range(p2-p1+6)])
            else:
                done = True
                break
            cap = ''
            for i, c in enumerate(caption):
                if (i not in remove):
                    cap += c
            #print('cap : ', cap, remove)
            caption = cap
            Ntrial += 1
            
        #remove any bits at the beginning
        p1 = 0
        p = caption.rfind('|')
        if (p > 0):
            p1 = p
        caption = caption[p1+1:]
        
        
        if (len(caption) > maxLen or len(caption) < minLen):
            print("bad caption : ", len(caption))
            caption = None
            
    print("CAPTION : ", caption)
        
        
    return caption


def getWikiInfo(search, view=2):
    #https://wikipedia.readthedocs.io/en/latest/quickstart.html
    
    site = wikipedia.search(search)
    print(search)
    if (site):
        print("wiki page: ", site[0])
        page = None
        try:
            page = wikipedia.page(site[0])
        except wikipedia.exceptions.DisambiguationError as e:
            print(e.options)
            #pass
        if (page):
            images = []
            captions = []
            
            #get more info with wptools
            #https://github.com/siznax/wptools/wiki/
            wpage = wptools.page(site[0])
            wpage.get_more()
            wpage.get_query()
            wpage.get_parse()
            
            #get all the images (works better with wikipedia instead of wptools, but need wptools for captions)
            #print(page.images)
            #print(wpage.data['parsetree'])
            for i in page.images:
                if (("jpg" in i or "png" in i) and "Celestia" not in i):
                    cap = getCaption(wpage.data['parsetree'], i)
                    if (cap): #only keep images with captions.  I think this will cut down on the images we don't want.
                        captions.append(cap)
                        images.append(i)
                    
        
            #get the main image (only possible with wptools)
            mainImg = None
            mainCap = None
            if (wpage.images()):
                for i in wpage.images():
                    print('CHECKING', i['kind'])
                    if (i['kind'] == 'query-pageimage'):
                        mainImg = i['url']
                        print('HAVE mainImg : ', mainImg)
                        if ('metadata' in i):
                            if ('ImageDescription' in i['metadata']):
                                mainCap = i['metadata']['ImageDescription']['value']
                                
                        if (mainCap == None):
                            mainCap = getCaption(wpage.data['parsetree'], i['url'])
                        print('HAVE mainCap :', mainCap)
                            
            if (mainImg):
                img2 = images
                caption2 = captions
                images = [mainImg]
                captions = [mainCap]
                for i,c in zip(img2,caption2):
                    if (i != mainImg):
                        images.append(i)
                        captions.append(c)
            
            #get the infobox for RA, Dec, etc.
            RA = None
            Dec = None
            Distance = None
            Size = None
            wwt = None
            
            info = wpage.data['infobox']
            if ('ra' in info):
                RAstring = info['ra']
                print('RAstring', RAstring)
                st = splitString(RAstring)
                #print('RA', st)
                RA = float(st[0])
                if (len(st) > 1): RA += float(st[1])/60.
                if (len(st) > 2): RA += float(st[2])/3600.
                
            if ('dec' in info):
                DecString = info['dec']
                print('DecString', DecString)
                digits = np.array([i for i,c in enumerate(RAstring) if (c.isdigit() or c == '.')])
                st = splitString(DecString)
                #print('Dec', st)
                Dec = float(st[0])
                if (len(st) > 1): Dec += float(st[1])/60.
                if (len(st) > 2): Dec += float(st[2])/3600.
                
            if ('distance' in info):
                DistString = info['distance']
                print('DistString', DistString)
                st = splitString(DistString)
                #print('Distance',st)
                d = st[0]
                p1 = DistString.find(d) + len(d)
                DistString = DistString[(p1+1):]
                p1 = DistString.find('|')
                u = DistString[:p1]
                Distance = d + ' ' + u
                print('Distance ', Distance)
                
            if ('size' in info):
                Size = info['size']
            
            if (RA and Dec and view):
                wwt = getWWTcmd(RA,Dec,view)
                
            return images, captions, page.url, page.summary, RA, Dec, Distance, Size, wwt

    return None, None, None, None, None, None, None, None

*Create the json file*

In [4]:
def makeOutput(fname, nme, 
               WWTurl=None, Distance=None, Size=None, Notes=None, Category=None, 
               images=None, captions=None, wikipedia=None):
    dictOut = {}
    dictOut[nme] = {}
    dictOut[nme]['WWTurl'] = WWTurl
    dictOut[nme]['Distance'] = Distance
    dictOut[nme]['Size'] = Size
    dictOut[nme]['Notes'] = Notes
    dictOut[nme]['Category'] = Category
    dictOut[nme]['images'] = images
    dictOut[nme]['captions'] = captions
    dictOut[nme]['wikipedia'] = wikipedia
    
    with open(fname, 'w') as fp:
        json.dump(dictOut, fp)

### A method to create the object in one line

In [5]:
def createObject(nme, cat, fname):
    images, captions, url, summary, RA, Dec, Distance, Size, wwt = getWikiInfo(nme)
    print('IMAGES : ', images)
    makeOutput(fname, 
               nme,
               WWTurl=wwt, 
               Distance=Distance, 
               Size=Size, 
               Notes=summary,
               Category=cat, 
               images=images,
               captions=captions,
               wikipedia=url)

## Create a couple new objects

In [6]:
nme = "SN 1987a"
cat = "Nebulae"
fname = 'userObjects/SN1987a.json'
createObject(nme, cat, fname)

SN 1987a
wiki page:  SN 1987A


en.wikipedia.org (querymore) SN 1987A
SN 1987A (en) data
{
  backlinks: <list(362)> {'pageid': 1962, 'ns': 0, 'title': 'Appar...
  categories: <list(4)> Category:Astronomical objects discovered i...
  contributors: 361
  files: <list(16)> File:Blue pencil.svg, File:Commons-logo.svg, F...
  languages: <list(32)> {'lang': 'af', 'title': 'SN 1987A'}, {'lan...
  pageid: 28930
  requests: <list(1)> querymore
  title: SN 1987A
  views: 234
}
en.wikipedia.org (query) SN 1987A
en.wikipedia.org (imageinfo) File:Eso0708a.jpg
SN 1987A (en) data
{
  assessments: <dict(2)> Astronomy, Physics
  backlinks: <list(362)> {'pageid': 1962, 'ns': 0, 'title': 'Appar...
  categories: <list(4)> Category:Astronomical objects discovered i...
  contributors: 361
  description: supernova
  extext: <str(969)> **SN 1987A** was a peculiar type II supernova...
  extract: <str(1033)> <p class="mw-empty-elt"></p><p><b>SN 1987A<...
  files: <list(16)> File:Blue pencil.svg, File:Commons-logo.svg, F...
  image: <list(2)> 


SEARCHING FOR :  https://upload.wikimedia.org/wikipedia/commons/a/af/Composite_image_of_Supernova_1987A.jpg Composite image of Supernova 1987A.jpg
CAPTION :  Remnant of SN 1987A seen in light overlays of different spectra. ALMA data (radio, in red) shows newly formed dust in the center of the remnant. Hubble (visible, in green) and Chandra (X-ray, in blue) data show the expanding shock wave.

SEARCHING FOR :  https://upload.wikimedia.org/wikipedia/commons/0/0a/Eso0708a.jpg Eso0708a.jpg
bad caption :  6279
CAPTION :  None

SEARCHING FOR :  https://upload.wikimedia.org/wikipedia/commons/b/bd/G299-Remnants-SuperNova-Type1a-20150218.jpg G299-Remnants-SuperNova-Type1a-20150218.jpg
CAPTION :  

SEARCHING FOR :  https://upload.wikimedia.org/wikipedia/commons/5/5f/He1523a.jpg He1523a.jpg
CAPTION :  

SEARCHING FOR :  https://upload.wikimedia.org/wikipedia/commons/9/94/Large.mc.arp.750pix.jpg Large.mc.arp.750pix.jpg
CAPTION :  SN 1987A within the Large Magellanic Cloud

SEARCHING FOR :  https:

In [7]:
nme = "Cassiopeia A"
cat = "Nebulae"
fname = 'userObjects/CassiopeiaA.json'
createObject(nme, cat, fname)



Cassiopeia A
wiki page:  Cassiopeia A


en.wikipedia.org (querymore) Cassiopeia A
Cassiopeia A (en) data
{
  backlinks: <list(254)> {'pageid': 6361, 'ns': 0, 'title': 'Cassi...
  categories: <list(7)> Category:1947 in science, Category:3C obje...
  contributors: 152
  files: <list(8)> File:Blue pencil.svg, File:Cassiopeia A Spitzer...
  languages: <list(22)> {'lang': 'ar', 'title': 'ذات الكرسي A'}, {...
  pageid: 1996903
  requests: <list(1)> querymore
  title: Cassiopeia A
  views: 81
}
en.wikipedia.org (query) Cassiopeia A
en.wikipedia.org (imageinfo) File:Cassiopeia A Spitzer Crop.jpg
Cassiopeia A (en) data
{
  assessments: <dict(1)> Astronomy
  backlinks: <list(254)> {'pageid': 6361, 'ns': 0, 'title': 'Cassi...
  categories: <list(7)> Category:1947 in science, Category:3C obje...
  contributors: 152
  description: supernova remnant in the constellation Cassiopeia
  extext: <str(1658)> **Cassiopeia A** ( **Cas A** ) is a supernov...
  extract: <str(1714)> <p><b>Cassiopeia A</b> (<b>Cas A</b>) is a ...
  files: <list(8)> F


SEARCHING FOR :  https://upload.wikimedia.org/wikipedia/commons/c/cb/Cassiopeia_A_Spitzer_Crop.jpg Cassiopeia A Spitzer Crop.jpg
bad caption :  5
CAPTION :  None

SEARCHING FOR :  https://upload.wikimedia.org/wikipedia/commons/b/bd/G299-Remnants-SuperNova-Type1a-20150218.jpg G299-Remnants-SuperNova-Type1a-20150218.jpg
CAPTION :  

SEARCHING FOR :  https://upload.wikimedia.org/wikipedia/commons/5/56/Heic0609.jpg Heic0609.jpg
CAPTION :   Cassiopeia A observed by the Hubble Space Telescope

SEARCHING FOR :  https://upload.wikimedia.org/wikipedia/commons/c/c5/Supernova%26galaxia.png Supernova%26galaxia.png
CAPTION :  
CHECKING query-pageimage
HAVE mainImg :  https://upload.wikimedia.org/wikipedia/commons/c/cb/Cassiopeia_A_Spitzer_Crop.jpg
HAVE mainCap : A false color image of Cassiopeia A (Cas A) using observations from both the Hubble and Spitzer telescopes as well as the Chandra X-ray Observatory (cropped).
CHECKING query-thumbnail
RAstring 23<sup>h</sup> 23<sup>m</sup> 24<sup>s</sup>
D

### Compile all the objects for the interactive

In [1]:
import compileObjects
compileObjects

TileWallData/objectFiles
userObjects
Abell1689.json
AlphaCentauri.json
AndromedaM31.json
AntennaeGalaxies.json
Aquarius.json
Aries.json
Betelgeuse.json
BlackEyeGalaxyM64.json
BodesM81.json
BulletCluster.json
ButterflyClusterM6.json
Cancer.json
Canopus.json
Capricornus.json
CarinaNebula.json
Cartwheel.json
Cassiopeia.json
CassiopeiaA.json
CatsEyeNebula.json
CentaurusA.json
CigarGalaxyM82.json
CL002417.json
ComaCluster.json
CrabNebulaM1.json
EagleNebulaM16.json
EggNebula.json
ElephantsTrunkNebula.json
ElGordo.json
ESO510G13.json
EtaCarinae.json
EyesGalaxies.json
Fomalhaut.json
Gemini.json
HelixNebula.json
Hercules.json
HLTau.json
HLTauriHLTau.json
HoagsObject.json
HomunculusNebula.json
HorseheadNebula.json
HubbleDeepField.json
HubbleUltraDeepField.json
Hydra.json
LagoonNebulaM8.json
LargeMagellanicCloud.json
Leo.json
LeoTripletM66Group.json
Libra.json
Lyra.json
M10.json
M15.json
M2.json
M3.json
M4.json
M5.json
M60.json
M87akaVirgoA.json
M9.json
MayallsObject.json
MiceGalaxies.json
MilkyW

<module 'compileObjects' from '/Volumes/highnoon2go/highnoon/Visualizations/ml5js/onGitHub/SVLml5js/data/compileObjects.py'>