Import the libraries, as always, and read in the data

In [7]:
from galyleo.galyleo_table import GalyleoTable
from galyleo.galyleo_constants import GALYLEO_STRING, GALYLEO_NUMBER
from galyleo.galyleo_jupyterlab_client import GalyleoClient
import csv
def cleanse_row(row):
    values = [entry.strip() for entry in row]
    return [int(values[i]) for i in range(3) ] + values[3:7] + [float(values[7])]

# read the file and make a table
with open('../ufos.csv', 'r') as ufo_file:
    reader = csv.reader(ufo_file)
    column_names = next(reader)
    
    data = [cleanse_row(row) for row in reader]
ufo_file.close()


In [8]:
column_names = [name.strip() for name in column_names]
schema = [(column_names[i], GALYLEO_NUMBER) for i in range(3)] + [(column_names[i], GALYLEO_STRING) for i in range(3,7)] + [(column_names[7], GALYLEO_STRING)]
table = GalyleoTable('ufos')
table.load_from_schema_and_data(schema, data)

Aggregating by a set of columns is something we will do a lot, so let's have a function to do that.  This function takes in a list of column names, a table,  a name for a new table and the name for the new column, computes the list of unique combinations of the values of the columns, and returns a table whose columns are the column names + count_column_name, and whose rows are the unique combinations and the count of each combination

In [9]:
from itertools import product
def aggregate_by(column_list, galyleo_table, new_table_name, count_column_name):
    names = set(column_list)
    indices = [i for i in range(len(galyleo_table.schema)) if galyleo_table.schema[i]["name"] in names]
    unique_values = [set(row[indices[i]] for row in galyleo_table.data) for i in range(len(indices))]
    keys = list(product(*unique_values))
    count = {}
    for key in keys: count[key] = 0
    for row in galyleo_table.data:
        key = tuple([row[i] for i in indices])
        count[key] = count[key] + 1
    usable_keys = [key for key in keys if count[key] > 0]
    schema = [entry for entry in galyleo_table.schema if entry["name"] in names] + [{"name": "count", "type": GALYLEO_NUMBER}]
    result = GalyleoTable(new_table_name)
    data = [list(key) + [count[key]] for key in usable_keys]
    result.load_from_dictionary({"columns": schema, "rows": data})
    return result

Aggregate by year, month, country

In [10]:
sightings_by_country_year_month = aggregate_by([ 'country', 'year', 'month'], table, 'aggregate_cmy', 'count')

Create a Dashboard using the Launcher or the File menu, then execute the next cell to send the data to it

In [11]:
client = GalyleoClient()
client.send_data_to_dashboard(sightings_by_country_year_month)

Aggregate by year, and country 

In [12]:
sightings_by_country_year = aggregate_by([ 'country', 'year', ], table, 'aggregate_cy', 'count')
client.send_data_to_dashboard(sightings_by_country_year)