In [1]:
# Laura Chapman
# Script for developing statistical tools for Glue
# Computes statistics for subsets as well as entire data using compute_statistic
# Changed from astropy tables to pandas dataframe

# Format data well in a popup using qt
# Color code by subset

In [2]:
# Basic code that imports glue and loads in and links the data

import sys
from glue.core.data_factories import load_data
from glue.core import DataCollection
from glue.core.link_helpers import LinkSame
from glue.app.qt.application import GlueApplication
from glue.viewers.image.qt import ImageViewer
from glue_vispy_viewers.volume.volume_viewer import VispyVolumeViewer

image_filename='w5.fits'
catalog_filename='w5_psc.vot'

#load 2 datasets from files
catalog = load_data(catalog_filename)
image = load_data(image_filename)

dc = DataCollection([catalog,image])

# link positional information
dc.add_link(LinkSame(catalog.id['RAJ2000'], image.id['Right Ascension']))
dc.add_link(LinkSame(catalog.id['DEJ2000'], image.id['Declination']))

#Create subset based on filament mask
ra_state=(image.id['Right Ascension'] > 44) & (image.id['Right Ascension'] < 46)
subset_group=dc.new_subset_group('RA_Selection',ra_state)
subset_group.style.color = '#0000FF'

#start Glue
# app = GlueApplication(dc)

# imageviewer = app.new_data_viewer(ImageViewer)
# imageviewer.add_data(image)

# app.start()





In [3]:
import glue.utils.array as gua
import glue.core.data as gcd
from astropy.table import Table

In [4]:
# Use compute_statistic to avoid accessing arrays of data directly
# Correctly creates a table using compute_statistic

mean_array = []
median_array = []
min_array = []
max_array = []
sum_array = []
name_array = []
tables = []
headings = ('component','mean', 'median', 'minimum', 'maximum', 'sum')

for i in range (0, len(dc)):
    print(dc[i].label)
    print()   
    for j in range (0, len(dc[i].components)):
        name = dc[i].components[j].label # Get the name of each component
        name_array.append(name) # add to the name array to build the table
        mean_array.append(dc[i].compute_statistic('mean', dc[i].components[j]))
        median_array.append(dc[i].compute_statistic('median', dc[i].components[j]))       
        min_array.append(dc[i].compute_statistic('minimum', dc[i].components[j]))       
        max_array.append(dc[i].compute_statistic('maximum', dc[i].components[j]))      
        sum_array.append(dc[i].compute_statistic('sum', dc[i].components[j]))         
        t = Table([name_array, mean_array, median_array, min_array, max_array, sum_array], names = headings)
   
    # Save all the tables in an array for display in the popup
    tables.append(t)
    mean_array = [] # Clear the arrays
    median_array = []
    min_array = []
    max_array = []
    name_array = []
    sum_array = []
    print(t)

w5_psc

   component          mean           median     ...    maximum         sum     
---------------- -------------- --------------- ... ------------- -------------
Pixel Axis 0 [x]         8885.0          8885.0 ...       17770.0   157895335.0
         World 0         8885.0          8885.0 ...       17770.0   157895335.0
              ID         8886.0          8886.0 ...       17771.0   157913106.0
         RAJ2000     43.5318018       43.468829 ...     46.039649 773603.649788
         DEJ2000  60.3405947875       60.382988 ...     61.493241 1072312.70997
            Jmag   13.563952446   13.7600002289 ... 17.4599990845    231740.125
            Hmag  12.9239377975   13.1700000763 ... 16.4500007629   222214.1875
           Ksmag    12.66751194   12.9399995804 ... 16.0499992371  216766.46875
          __3.6_  12.5111446381   12.8000001907 ... 16.6200008392 222335.546875
          __4.5_  12.4496717453   12.7399997711 ... 16.1299991608    221243.125
          __5.8_  12.3555679321 

In [5]:
# Constructs a pandas DataFrame instead of an astropy table

import pandas as pd
from pandas import DataFrame
import numpy as np

In [6]:
# Only calculates statistics for one of the data sets
# Displays using a pandas DataFrame

data = dc[0]

mean_array = []
median_array = []
min_array = []
max_array = []
sum_array = []
name_array = []
tables = []
headings = ('mean', 'median', 'minimum', 'maximum', 'sum')

print(data.label)

for j in range (0, len(data.components)):
    name = data.components[j].label # Get the name of each component
    name_array.append(name) # add to the name array to build the table
    mean_array.append(data.compute_statistic('mean', data.components[j]))
    median_array.append(data.compute_statistic('median', data.components[j]))       
    min_array.append(data.compute_statistic('minimum', data.components[j]))       
    max_array.append(data.compute_statistic('maximum', data.components[j]))      
    sum_array.append(data.compute_statistic('sum', data.components[j]))         

    # At this point we have several column arrays to put into a pandas data frame
    
column_data = np.asarray([mean_array, median_array, min_array, max_array, sum_array]).transpose()

pd.DataFrame(column_data, index=name_array, columns=headings)

w5_psc


Unnamed: 0,mean,median,minimum,maximum,sum
Pixel Axis 0 [x],8885.0,8885.0,0.0,17770.0,157895300.0
World 0,8885.0,8885.0,0.0,17770.0,157895300.0
ID,8886.0,8886.0,1.0,17771.0,157913100.0
RAJ2000,43.531802,43.468829,41.081526,46.039649,773603.6
DEJ2000,60.340595,60.382988,59.246093,61.493241,1072313.0
Jmag,13.563952,13.76,5.12,17.459999,231740.1
Hmag,12.923938,13.17,3.89,16.450001,222214.2
Ksmag,12.667512,12.94,3.51,16.049999,216766.5
__3.6_,12.511145,12.8,4.86,16.620001,222335.5
__4.5_,12.449672,12.74,3.98,16.129999,221243.1


In [7]:
# Defines a subset and runs statistics using compute_statistic for the subset
# Displays data using a pandas dataframe

# Data that subset is pulled from
data = dc[0]

# Define state and subset
state1 = data.id['Jmag'] > 14
subset1 = data.new_subset(state1, label='Jmag > 14')

# Same arrays as with full data
mean_array = []
median_array = []
min_array = []
max_array = []
sum_array = []
name_array = []
tables = []

headings = ('mean', 'median', 'minimum', 'maximum', 'sum')

print(data.label, 'subset1')
print() 
for j in range (0, len(data.components)):
    name = data.components[j].label # Get the name of each component
    name_array.append(name) # add to the name array to build the table
    mean_array.append(data.compute_statistic('mean', subset1.components[j], subset_state=subset1.subset_state))
    median_array.append(data.compute_statistic('median', subset1.components[j], subset_state=subset1.subset_state))       
    min_array.append(data.compute_statistic('minimum', subset1.components[j], subset_state=subset1.subset_state))       
    max_array.append(data.compute_statistic('maximum', subset1.components[j], subset_state=subset1.subset_state))      
    sum_array.append(data.compute_statistic('sum', subset1.components[j], subset_state=subset1.subset_state))        
   
column_data = np.asarray([mean_array, median_array, min_array, max_array, sum_array]).transpose()

pd.DataFrame(column_data, index=name_array, columns=headings)

w5_psc subset1



Unnamed: 0,mean,median,minimum,maximum,sum
Pixel Axis 0 [x],8972.511287,8775.5,0.0,17770.0,63597160.0
World 0,8972.511287,8775.5,0.0,17770.0,63597160.0
ID,8973.511287,8776.5,1.0,17771.0,63604250.0
RAJ2000,43.553914,43.448784,41.081526,46.039649,308710.1
DEJ2000,60.36616,60.414345,59.246093,61.480995,427875.3
Jmag,14.830504,14.64,14.01,17.459999,105118.6
Hmag,14.083164,14.0,10.66,16.379999,98469.48
Ksmag,13.770971,13.76,8.38,16.049999,95350.2
__3.6_,13.48763,13.58,6.21,15.34,95600.32
__4.5_,13.389089,13.52,5.39,15.39,94901.86


In [8]:
# # Pop up window setup

# LARGE_FONT= ("Verdana", 12)
# NORM_FONT = ("Helvetica", 10)
# SMALL_FONT = ("Helvetica", 8)

# import tkinter as tk
# from tkinter import ttk

In [9]:
# # Creates a popup window with the statistical information
# # Does not preserve the lined-up formatting of the astropy tables- to do

# import tkinter as tk

# num_cols = len(headings)
# width = num_cols * 150

# class Application(tk.Frame):
#     def __init__(self, master=None):
#         super().__init__(master)
#         self.pack()
#         self.create_widgets()

#     def create_widgets(self):
#         self.hi_there = tk.Button(self)
#         w = tk.Message(self, text=tables, width=width)
#         w.pack()
# #         self.hi_there["text"] = "Two houses both alike in dignity in fair verona where we lay our scene"
# #         self.hi_there.pack(side="top")

#         self.quit = tk.Button(self, text="Done", fg="red",
#                               command=root.destroy)
#         self.quit.pack(side="bottom")

# root = tk.Tk()
# root.geometry("400x400")
# # root.geometry("500x100+300+300")
# app = Application(master=root)
# app.mainloop()

In [10]:
from qtpy.QtWidgets import (QApplication, QLabel, QWidget,
                            QVBoxLayout, QPushButton, QGridLayout)

In [15]:
# Use qtpy to make a popup with statistics
# Unsure how to feed qt the table directly- this code attempts to do it by indexing through the statistical array
# Probably not the most efficient solution

# Initialize application
app = QApplication([])

# Create layout and add widgets
layout = QGridLayout()

for i in range (0, len(column_data)):
    for j in range (0, len(headings)):
        layout.addWidget(QLabel(str(column_data[i][j]), i, j)

# Commenting out the above for loop and uncommenting the below makes the code work
# But as it is now, it claims that widget = QWidget is 'invalid syntax'
                         
# layout.addWidget(QLabel('Label 1'), 0, 0)
# layout.addWidget(QLabel('Label 2'), 1, 0)
# layout.addWidget(QLabel('Label 3'), 0, 1)
# layout.addWidget(QLabel(str(column_data[0][3])), 1, 1)

# Apply layout to widget
widget = QWidget()
widget.setLayout(layout)

# Show widget
widget.show()

# Start event loop
app.exec_()

SyntaxError: invalid syntax (<ipython-input-15-21e6d4827b05>, line 17)

In [None]:
# Look into QTableView, QAbstractTableModel
# Possible solution to displaying a table in Qt here 
# https://stackoverflow.com/questions/44603119/how-to-display-a-pandas-data-frame-with-pyqt5