In [1]:
import pandas as pd
import math
from statistics import mean 
import itertools

Read the raw observation data fromthe csv file

In [2]:
sea_ice_data = pd.read_csv('seaice.csv', index_col=None)

(Year, Month) is not a good index, so associate a month from the start with each pair (year, month).  Very simple, the month num is just 12 * (year-1978) + month.  Once the calculation is in place, apply it to every row of the sea ice dataframe and take a look at it

In [3]:
def month_from_start(month, year):
    return month + 12 * (year - 1978)

In [4]:
def month_from_row(row):
    return month_from_start(row['Month'], row['Year'])

In [5]:
sea_ice_data['MonthNum'] = sea_ice_data.apply(month_from_row, axis=1)

In [6]:
sea_ice_data

Unnamed: 0,Year,Month,Day,Extent,Missing,hemisphere,MonthNum
0,1978,10,26,10.231,0.0,north,10
1,1978,10,28,10.420,0.0,north,10
2,1978,10,30,10.557,0.0,north,10
3,1978,11,1,10.670,0.0,north,11
4,1978,11,3,10.777,0.0,north,11
...,...,...,...,...,...,...,...
26349,2019,5,27,10.085,0.0,south,497
26350,2019,5,28,10.078,0.0,south,497
26351,2019,5,29,10.219,0.0,south,497
26352,2019,5,30,10.363,0.0,south,497


We're more interested in the aggregate data by month,so form a class to collect all the observations from a month in a hemishere, then generate a row with the year, month, monthnumber, hemisphere, min, max, and average sea ice extent

In [23]:
class MonthAggregator:
    def __init__(self, dataframe, monthnum,  hemisphere):
        df = dataframe[(dataframe['MonthNum'] == monthnum) & (dataframe['hemisphere'] == hemisphere)]
        self.df = df
        self.series = df['Extent']
        if len(self.series) > 0:
            self.year = df['Year'].tolist()[0]
            self.month = df['Month'].tolist()[0]
        self.monthnum = monthnum
        self.hemisphere = hemisphere
                       
    def _average_(self):
        return mean(self.series)
    
    def _max_(self):
        return max(self.series)
    
    def _min_(self):
        return min(self.series)
    
    def row(self):
        return [self.year, self.month, self.monthnum, self.hemisphere, self._min_(), self._max_(), self._average_()]

Find the aggregate data for each individual pair (month, hemisphere) and extract the data rows

In [24]:

month_list = sea_ice_data['MonthNum'].tolist()
hemisphere = sea_ice_data['hemisphere'].tolist()
unique_keys = set([(month_list[i], hemisphere[i]) for i in range(len(month_list))])
month_dict = {}
for key in unique_keys:
    month_dict[key] = MonthAggregator(sea_ice_data, key[0], key[1])
rows = [aggregator.row() for aggregator in month_dict.values()]

Form the aggregate dataframe from the rows just found, and then sort the rows in increasing order by month number

In [33]:
aggregate = pd.DataFrame(data=rows, columns=['Year', 'Month', 'MonthNum', 'hemisphere', 'Min', 'Max', 'Average'])
aggregate.sort_values(['MonthNum', 'hemisphere'], inplace=True)
aggregate

Unnamed: 0,Year,Month,MonthNum,hemisphere,Min,Max,Average
881,1978,10,10,north,10.231,10.557,10.402667
441,1978,10,10,south,17.624,17.803,17.699000
555,1978,11,11,north,10.670,12.684,11.645133
774,1978,11,11,south,13.619,17.527,15.896400
952,1978,12,12,north,12.660,14.585,13.667063
...,...,...,...,...,...,...,...
482,2019,3,495,south,2.474,4.163,3.164161
347,2019,4,496,north,13.009,14.038,13.454600
880,2019,4,496,south,4.240,6.992,5.708633
685,2019,5,497,north,11.486,12.820,12.163161


In [None]:
Check to make sure the types are right: it should be (int64, int64, int64, object, float64, float64, float64)

In [28]:
aggregate.dtypes

Year            int64
Month           int64
MonthNum        int64
hemisphere     object
Min           float64
Max           float64
Average       float64
dtype: object

Import the Galyleo libraries and use a convenience routine that takes a dataframe, converts it to a table, and sends the table to the dashboard

In [29]:
from galyleo.galyleo_table import GalyleoTable
from galyleo.galyleo_constants import GALYLEO_STRING, GALYLEO_NUMBER
from galyleo.galyleo_jupyterlab_client import GalyleoClient

In [30]:
def send_dataframe_to_dashboard(dataframe, table_name, client):
    table = GalyleoTable(table_name)
    table.load_from_dataframe(dataframe)
    client.send_data_to_dashboard(table)

In [None]:
Create a client and send the observations and aggregate dateframes to the dashboard

In [34]:
client = GalyleoClient()
send_dataframe_to_dashboard(sea_ice_data, 'observations', client)
send_dataframe_to_dashboard(aggregate, 'month_data', client)