Import the libraries, as always, and read in the data

In [37]:
from galyleo.galyleo_table import GalyleoTable
from galyleo.galyleo_constants import GALYLEO_STRING, GALYLEO_NUMBER
from galyleo.galyleo_jupyterlab_client import GalyleoClient
import csv

def state_code(current_code):
    if current_code == '': return ''
    canada_codes = {'ab', 'bc', 'sk', 'mn', 'on', 'qc', 'pe', 'nb', 'ns', 'nl', 'nt', 'nu', 'yt'}
    country = 'CA' if current_code in canada_codes else 'US'
    return f'{country}-{current_code.upper()}'

def cleanse_row(row):
    values = [entry.strip() for entry in row]
    return  [int(values[i]) for i in range(3) ] + values[3:5] + [state_code(values[5]), values[6], float(values[7])]

# read the file and make a table
with open('ufos.csv', 'r') as ufo_file:
    reader = csv.reader(ufo_file)
    column_names = next(reader)
    data = [cleanse_row(row) for row in reader]
ufo_file.close()

In [38]:
data[:10]

[[588, 10, 1949, 'night', 'us', 'US-TX', 'cylinder', 2700.0],
 [588, 10, 1949, 'night', 'us', 'US-TX', 'light', 7200.0],
 [2779, 10, 1955, 'afternoon', 'gb', '', 'circle', 20.0],
 [3145, 10, 1956, 'night', 'us', 'US-TX', 'circle', 20.0],
 [4606, 10, 1960, 'night', 'us', 'US-HI', 'light', 900.0],
 [4971, 10, 1961, 'night', 'us', 'US-TN', 'sphere', 300.0],
 [6432, 10, 1965, 'night', 'gb', '', 'circle', 180.0],
 [6432, 10, 1965, 'night', 'us', 'US-CT', 'disk', 1200.0],
 [6797, 10, 1966, 'night', 'us', 'US-AL', 'disk', 180.0],
 [6797, 10, 1966, 'night', 'us', 'US-FL', 'disk', 120.0]]

In [39]:
column_names = [name.strip() for name in column_names]
schema = [(column_names[i], GALYLEO_NUMBER) for i in range(3)] + [(column_names[i], GALYLEO_STRING) for i in range(3,7)] + [(column_names[7], GALYLEO_STRING)]
table = GalyleoTable('ufos')
table.load_from_schema_and_data(schema, data)

Aggregating by a set of columns is something we will do a lot, so let's have a function to do that.  This function takes in a list of column names, a table,  a name for a new table and the name for the new column, computes the list of unique combinations of the values of the columns, and returns a table whose columns are the column names + count_column_name, and whose rows are the unique combinations and the count of each combination

In [40]:
from itertools import product
def aggregate_by(galyleo_table,  column_list, new_table_name, count_column_name):
    names = set(column_list)
    indices = [i for i in range(len(galyleo_table.schema)) if galyleo_table.schema[i]["name"] in names]
    unique_values = [set(row[indices[i]] for row in galyleo_table.data) for i in range(len(indices))]
    keys = list(product(*unique_values))
    count = {}
    for key in keys: count[key] = 0
    for row in galyleo_table.data:
        key = tuple([row[i] for i in indices])
        count[key] = count[key] + 1
    usable_keys = [key for key in keys if count[key] > 0]
    schema = [entry for entry in galyleo_table.schema if entry["name"] in names] + [{"name": "count", "type": GALYLEO_NUMBER}]
    result = GalyleoTable(new_table_name)
    data = [list(key) + [count[key]] for key in usable_keys]
    result.load_from_dictionary({"columns": schema, "rows": data})
    return result

Aggregate by year, month, country

In [41]:
sightings_by_country_year_month = aggregate_by(table,  [ 'country', 'year', 'month'], 'aggregate_cym', 'count')

Create a Dashboard using the Launcher or the File menu, then execute the next cell to send the data to it

In [42]:
client = GalyleoClient()
client.send_data_to_dashboard(sightings_by_country_year_month)

Aggregate by year, and country 

In [43]:
sightings_by_country_year = aggregate_by(table, [ 'country', 'year', ], 'aggregate_cy', 'count')
client.send_data_to_dashboard(sightings_by_country_year)

A function to filter data by a column_name.  This is something we do interactively in the dashboard, but sometimes it's useful to do it statically, here.  Very simple, just apply the function to the right column in each data set, then return a table with the same column and only those rows which match the filter

In [44]:
def filter_by_column(galyleo_table, column_name, filter_function, new_table_name):
    index = [i for i in range(len(galyleo_table.schema)) if galyleo_table.schema[i]["name"] == column_name][0]
    rows = [row[:] for row in galyleo_table.data if filter_function(row[index])]
    result = GalyleoTable(new_table_name)
    result.load_from_dictionary({"columns": galyleo_table.schema[:], "rows": rows})
    return result
    

Use this to get North American sightings

In [45]:
north_american_sightings = filter_by_column(table, "country", lambda x: x in {'ca', 'us'}, 'north_america_table')

Aggregate by state, year, month and by state and month

In [48]:
sightings_by_state_year_month = aggregate_by(north_american_sightings,  [ 'state', 'year', 'month'], 'aggregate_sym', 'count')
sightings_by_state_year = aggregate_by(north_american_sightings,  [ 'state', 'year'], 'aggregate_sy', 'count')
client.send_data_to_dashboard(sightings_by_state_year)
client.send_data_to_dashboard(sightings_by_state_year_month)

Aggregate by state, year, month, type and by country, year, month, type

In [51]:
sightings_by_state_year_type = aggregate_by(north_american_sightings,  [ 'state', 'year', 'type'], 'aggregate_syt', 'count')
sightings_by_country_year_type = aggregate_by(table,  [ 'country', 'year', 'type'], 'aggregate_cyt', 'count')
client.send_data_to_dashboard(sightings_by_state_year_type)
client.send_data_to_dashboard(sightings_by_country_year_type)