In [None]:
# Export from a gcloud shell.
# It does not include all columns in the table because a dump of all columns
# yielded a bad csv probably because of this known issue with exporting NULL values:
# https://cloud.google.com/sql/docs/mysql/known-issues#import-export

# gcloud sql export csv --escape="5C" software-usage-stats gs://logging-natcap/model_log_table-2021-12-10.csv --query="SELECT 'model_name', 'invest_release', 'invest_interface', 'system_full_platform_string', 'time' UNION SELECT model_name, invest_release, invest_interface, system_full_platform_string, time FROM model_log_table" --database=invest_model_usage

# From a local shell:
# gsutil cp gs://logging-natcap/model_log_table-2021-12-10.csv .

In [1]:
%load_ext autoreload
%autoreload 2

In [9]:
import pandas
import altair

from load_and_clean import load_and_clean_csv

pandas.set_option('display.max_rows', 100)

In [10]:
table_path = 'model_log_table-2021-12-10.csv'

In [11]:
df = load_and_clean_csv(table_path)

dropping 22547 rows for models we dont care about
remaining model counts:
sdr                                 50835
annual_water_yield                  41403
carbon                              32977
habitat_quality                     27752
ndr                                 24999
recmodel_client                     16427
seasonal_water_yield                16222
fisheries                            8556
hra                                  7596
pollination                          7476
scenario_generator                   6437
coastal_vulnerability                6234
urban_cooling_model                  5989
coastal_blue_carbon                  5363
delineateit                          3353
scenario_gen_proximity               3079
urban_flood_risk_mitigation          2368
scenic_quality                       2205
forest_carbon_edge_effect            1842
routedem                             1780
crop_production_percentile           1601
wind_energy                          1361
ro

# Workbench stats

In [28]:
beta_release_date = pandas.to_datetime('2021-11-19', utc=True)
post_wb_runs = df[df['datetime'] >= beta_release_date]
print(f'{post_wb_runs.shape[0]} invest runs since workbench release')
wb_runs = post_wb_runs[post_wb_runs['invest_interface'].str.contains('Workbench 0.1.0-beta')]
print(f'including {wb_runs.shape[0]} Workbench 0.1.0-beta runs')
wb_runs

2697 invest runs since workbench release
including 14 Workbench 0.1.0-beta runs


Unnamed: 0,model_name,invest_release,invest_interface,system_full_platform_string,time,model,datetime
302398,natcap.invest.pollination,3.9.2.post480+gc06f40d7,Workbench 0.1.0-beta,Windows-10-10.0.19043-SP0,2021-11-19T19:13:22.531Z,pollination,2021-11-19 19:13:22.531000+00:00
302399,natcap.invest.delineateit.delineateit,3.9.2.post480+gc06f40d7,Workbench 0.1.0-beta,macOS-10.13.6-x86_64-i386-64bit,2021-11-19T19:24:50.201Z,delineateit,2021-11-19 19:24:50.201000+00:00
302501,natcap.invest.urban_cooling_model,3.9.2.post480+gc06f40d7,Workbench 0.1.0-beta,Windows-10-10.0.19043-SP0,2021-11-20T22:08:38.257Z,urban_cooling_model,2021-11-20 22:08:38.257000+00:00
303115,natcap.invest.carbon,3.9.2.post480+gc06f40d7,Workbench 0.1.0-beta,Windows-10-10.0.19042-SP0,2021-11-25T04:57:48.557Z,carbon,2021-11-25 04:57:48.557000+00:00
303680,natcap.invest.habitat_quality,3.9.2.post480+gc06f40d7,Workbench 0.1.0-beta,Windows-10-10.0.19042-SP0,2021-11-29T13:16:16.251Z,habitat_quality,2021-11-29 13:16:16.251000+00:00
303681,natcap.invest.habitat_quality,3.9.2.post480+gc06f40d7,Workbench 0.1.0-beta,Windows-10-10.0.19042-SP0,2021-11-29T13:29:55.741Z,habitat_quality,2021-11-29 13:29:55.741000+00:00
303730,natcap.invest.ndr.ndr,3.9.2.post480+gc06f40d7,Workbench 0.1.0-beta,Windows-10-10.0.19042-SP0,2021-11-29T20:08:53.224Z,ndr,2021-11-29 20:08:53.224000+00:00
303731,natcap.invest.seasonal_water_yield.seasonal_wa...,3.9.2.post480+gc06f40d7,Workbench 0.1.0-beta,Windows-10-10.0.19042-SP0,2021-11-29T20:40:23.888Z,seasonal_water_yield,2021-11-29 20:40:23.888000+00:00
304452,natcap.invest.carbon,3.9.2.post480+gc06f40d7,Workbench 0.1.0-beta,Windows-10-10.0.19043-SP0,2021-12-06T14:02:49.367Z,carbon,2021-12-06 14:02:49.367000+00:00
304455,natcap.invest.recreation.recmodel_client,3.9.2.post480+gc06f40d7,Workbench 0.1.0-beta,Windows-10-10.0.19043-SP0,2021-12-06T14:30:09.645Z,recmodel_client,2021-12-06 14:30:09.645000+00:00


# Monthly counts by model

In [None]:
# Aggregate to a time frequency so we can count runs per unit of time per model
frequency = 'M'
data = df.groupby([
    pandas.Grouper(key='datetime', freq=frequency),
    pandas.Grouper(key='model')]).size().reset_index(name='counts')
data.head()

In [None]:
# For the benefit of plots, fill in 0s where no models were run
wide = data.pivot(index='datetime', columns='model', values='counts')
wide.fillna(0, inplace=True)
months_with_counts = len(wide)

# And in case there were months where no models were run
# And in case the first & last months are incomplete (assume they are)
# trim them off with offsets
begin = wide.index.min() + pandas.offsets.MonthBegin()
end = wide.index.max() - pandas.offsets.MonthEnd()
date_range = pandas.date_range(begin, end, freq=frequency)
wide = wide.reindex(date_range, fill_value=0)
print(f'complete data from {begin} to {end}')

In [None]:
# Format data for altair
wide = wide.reset_index() # altair cannot plot indices, so move date to normal column
long = pandas.melt(wide, id_vars='index')
# long.head()

In [None]:
def plot_model_counts_over_time(model_list, title=None):
    altair.data_transformers.disable_max_rows()
    selection = altair.selection_multi(fields=['model'], bind='legend')

    to_plot = long[long['model'].isin(model_list)]
    return (
        altair.Chart(to_plot).mark_line().encode(
            altair.X('index:T', axis=altair.Axis(format='%Y-%m'), title=None),
            altair.Y('value:Q', title='runs per month'),
            color=altair.Color('model', scale=altair.Scale(scheme='category10')),
            opacity=altair.condition(selection, altair.value(1), altair.value(0.2)),
            size=altair.value(1)
        ).properties(
            width=800,
            height=300,
            title=title
        ).add_selection(
            selection
        ).configure_axis(
            grid=False,
            labelFontSize=12,
            titleFontSize=14,
            titlePadding=15
        ).configure_legend(
            labelFontSize=12,
            title=None
        )
    )

In [None]:
all_models_counts = long.groupby('index').sum().reset_index()
altair.Chart(all_models_counts).mark_line().encode(
    altair.X('index:T', axis=altair.Axis(format='%Y-%m'), title=None),
    altair.Y('value:Q', title='runs per month'),
    size=altair.value(1)
).properties(
    width=800,
    height=300,
    title='all models'
).configure_axis(
    grid=False
)

## These plots are interactive - click a series in the legend

In [None]:
high_use_models = ['sdr',
                   'hydropower_water_yield',
                   'carbon',
                   'habitat_quality',
                   'ndr',
                   'seasonal_water_yield',
                   'recmodel_client',
                   'fisheries'
                  ]
plot_model_counts_over_time(high_use_models, 'high-use models')

In [None]:
mid_use_models = ['pollination',
                  'hra',
                  'scenario_generator',
                  'coastal_vulnerability',
                  'urban_cooling_model',
                  'coastal_blue_carbon'
                 ]
plot_model_counts_over_time(mid_use_models, 'mid-use models')

In [None]:
mid_low_use_models = [
    'delineateit',
    'scenario_gen_proximity',
    'urban_flood_risk_mitigation',
    'scenic_quality',
    'routedem',
    'crop_production_percentile',
    'wind_energy',
    'crop_production_regression',
]
plot_model_counts_over_time(mid_low_use_models, 'mid-low-use models')

In [None]:
low_use_models = [
    'overlap_analysis',
    'wave_energy',
    'finfish_aquaculture',
    'fisheries_hst',
    'forest_carbon_edge_effect',
    'globio',
    'marine_water_quality_biophysical',
    'timber',
]
plot_model_counts_over_time(low_use_models, 'low-use models (including some that are already deprecated)')

In [None]:
long[long['model'] == 'wave_energy']