Skip to content

Commit

Permalink
Update HTML reports using Github Actions (#63)
Browse files Browse the repository at this point in the history
* Github Actions for building examples as artifacts
  • Loading branch information
sbrugman committed Oct 28, 2020
1 parent 1f6c45e commit 5b1a6af
Show file tree
Hide file tree
Showing 6 changed files with 222 additions and 53 deletions.
32 changes: 31 additions & 1 deletion .github/workflows/build.yml
Expand Up @@ -11,7 +11,6 @@ on:

jobs:
build:

runs-on: ubuntu-latest

steps:
Expand All @@ -35,3 +34,34 @@ jobs:
run: |
pip install pytest
pytest
examples:
runs-on: ubuntu-latest
needs: build
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.6
uses: actions/setup-python@v1
with:
python-version: 3.6
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e .
- name:
run: |
cd examples
python synthetic_data.py
python flight_delays.py
- uses: actions/upload-artifact@v2
with:
name: synthetic-report
path: examples/test_data_report.html
if-no-files-found: error

- uses: actions/upload-artifact@v2
with:
name: flight-delays-report
path: examples/flight_delays_report.html
if-no-files-found: error
22 changes: 22 additions & 0 deletions examples/flight_delays.py
@@ -0,0 +1,22 @@
import pandas as pd

import popmon
from popmon import resources

# open synthetic data
df = pd.read_csv(
resources.data("flight_delays.csv.gz"), index_col=0, parse_dates=["DATE"]
)

# generate stability report using automatic binning of all encountered features
# (importing popmon automatically adds this functionality to a dataframe)
report = df.pm_stability_report(
time_axis="DATE",
time_width="1w",
time_offset="2015-07-02",
extended_report=False,
pull_rules={"*_pull": [10, 7, -7, -10]},
)

# or save the report to file
report.to_file("flight_delays_report.html")
14 changes: 14 additions & 0 deletions examples/synthetic_data.py
@@ -0,0 +1,14 @@
import pandas as pd

import popmon
from popmon import resources

# open synthetic data
df = pd.read_csv(resources.data("test.csv.gz"), parse_dates=["date"])

# generate stability report using automatic binning of all encountered features
# (importing popmon automatically adds this functionality to a dataframe)
report = df.pm_stability_report(time_axis="date", features=["date:age", "date:gender"])

# or save the report to file
report.to_file("test_data_report.html")
131 changes: 100 additions & 31 deletions popmon/notebooks/popmon_tutorial_advanced.ipynb
Expand Up @@ -14,7 +14,8 @@
},
"outputs": [],
"source": [
"from IPython.core.display import display, HTML\n",
"from IPython.core.display import HTML, display\n",
"\n",
"display(HTML(\"<style>.container { width:80% !important; }</style>\"))\n",
"display(HTML(\"<style>div.output_scroll { height: 44em; }</style>\"))"
]
Expand All @@ -27,6 +28,7 @@
"source": [
"# install popmon (if not installed yet)\n",
"import sys\n",
"\n",
"!{sys.executable} -m pip install popmon"
]
},
Expand All @@ -37,6 +39,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"import popmon\n",
"from popmon import resources"
]
Expand All @@ -55,7 +58,9 @@
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(resources.data(\"flight_delays.csv.gz\"), index_col=0, parse_dates=[\"DATE\"])"
"df = pd.read_csv(\n",
" resources.data(\"flight_delays.csv.gz\"), index_col=0, parse_dates=[\"DATE\"]\n",
")"
]
},
{
Expand All @@ -72,7 +77,7 @@
"metadata": {},
"outputs": [],
"source": [
"df.pm_stability_report(time_axis='DATE')"
"df.pm_stability_report(time_axis=\"DATE\")"
]
},
{
Expand All @@ -92,7 +97,9 @@
"metadata": {},
"outputs": [],
"source": [
"df.pm_stability_report(time_axis='DATE', time_width='1w', time_offset='2015-07-02', extended_report=False)"
"df.pm_stability_report(\n",
" time_axis=\"DATE\", time_width=\"1w\", time_offset=\"2015-07-02\", extended_report=False\n",
")"
]
},
{
Expand All @@ -109,7 +116,13 @@
"metadata": {},
"outputs": [],
"source": [
"df.pm_stability_report(time_axis='DATE', time_width='1w', time_offset='2015-07-02', extended_report=False, pull_rules={\"*_pull\": [10, 7, -7, -10]})"
"df.pm_stability_report(\n",
" time_axis=\"DATE\",\n",
" time_width=\"1w\",\n",
" time_offset=\"2015-07-02\",\n",
" extended_report=False,\n",
" pull_rules={\"*_pull\": [10, 7, -7, -10]},\n",
")"
]
},
{
Expand All @@ -133,7 +146,10 @@
"source": [
"# download histogrammar jar files if not already installed, used for histogramming of spark dataframe\n",
"from pyspark.sql import SparkSession\n",
"spark = SparkSession.builder.config('spark.jars.packages','org.diana-hep:histogrammar-sparksql_2.11:1.0.4').getOrCreate()"
"\n",
"spark = SparkSession.builder.config(\n",
" \"spark.jars.packages\", \"org.diana-hep:histogrammar-sparksql_2.11:1.0.4\"\n",
").getOrCreate()"
]
},
{
Expand All @@ -151,7 +167,9 @@
"metadata": {},
"outputs": [],
"source": [
"sdf.pm_stability_report(time_axis='DATE', time_width='1w', time_offset='2015-07-02', extended_report=False)"
"sdf.pm_stability_report(\n",
" time_axis=\"DATE\", time_width=\"1w\", time_offset=\"2015-07-02\", extended_report=False\n",
")"
]
},
{
Expand All @@ -172,8 +190,17 @@
"metadata": {},
"outputs": [],
"source": [
"df_ref = pd.read_csv(resources.data(\"flight_delays_reference.csv.gz\"), index_col=0, parse_dates=['DATE'])\n",
"df.pm_stability_report(time_axis='DATE', time_width='1w', time_offset='2015-07-02', extended_report=False, reference_type='external', reference=df_ref)"
"df_ref = pd.read_csv(\n",
" resources.data(\"flight_delays_reference.csv.gz\"), index_col=0, parse_dates=[\"DATE\"]\n",
")\n",
"df.pm_stability_report(\n",
" time_axis=\"DATE\",\n",
" time_width=\"1w\",\n",
" time_offset=\"2015-07-02\",\n",
" extended_report=False,\n",
" reference_type=\"external\",\n",
" reference=df_ref,\n",
")"
]
},
{
Expand All @@ -190,7 +217,13 @@
"metadata": {},
"outputs": [],
"source": [
"df.pm_stability_report(time_axis='DATE', time_width='1w', time_offset='2015-07-02', extended_report=False, reference_type=\"expanding\")"
"df.pm_stability_report(\n",
" time_axis=\"DATE\",\n",
" time_width=\"1w\",\n",
" time_offset=\"2015-07-02\",\n",
" extended_report=False,\n",
" reference_type=\"expanding\",\n",
")"
]
},
{
Expand All @@ -208,7 +241,14 @@
"metadata": {},
"outputs": [],
"source": [
"df.pm_stability_report(time_axis='DATE', time_width='1w', time_offset='2015-07-02', extended_report=False, reference_type=\"rolling\", window=5)"
"df.pm_stability_report(\n",
" time_axis=\"DATE\",\n",
" time_width=\"1w\",\n",
" time_offset=\"2015-07-02\",\n",
" extended_report=False,\n",
" reference_type=\"rolling\",\n",
" window=5,\n",
")"
]
},
{
Expand All @@ -226,8 +266,10 @@
"metadata": {},
"outputs": [],
"source": [
"report = df.pm_stability_report(time_axis='DATE', time_width='1w', time_offset='2015-07-02')\n",
"split_hists = report.datastore['split_hists']['DEPARTURE_DELAY']\n",
"report = df.pm_stability_report(\n",
" time_axis=\"DATE\", time_width=\"1w\", time_offset=\"2015-07-02\"\n",
")\n",
"split_hists = report.datastore[\"split_hists\"][\"DEPARTURE_DELAY\"]\n",
"split_hists"
]
},
Expand Down Expand Up @@ -279,9 +321,10 @@
"outputs": [],
"source": [
"import pickle\n",
"with open('report.pkl', 'wb') as f: \n",
"\n",
"with open(\"report.pkl\", \"wb\") as f:\n",
" pickle.dump(report, f)\n",
"report.to_file('report.html')"
"report.to_file(\"report.html\")"
]
},
{
Expand All @@ -298,8 +341,16 @@
"metadata": {},
"outputs": [],
"source": [
"report.regenerate(last_n=0, skip_first_n=0, skip_last_n=0, plot_hist_n=2, skip_empty_plots=True,\n",
" report_filepath=None, store_key='html_report', sections_key='report_sections')"
"report.regenerate(\n",
" last_n=0,\n",
" skip_first_n=0,\n",
" skip_last_n=0,\n",
" plot_hist_n=2,\n",
" skip_empty_plots=True,\n",
" report_filepath=None,\n",
" store_key=\"html_report\",\n",
" sections_key=\"report_sections\",\n",
")\n",
]
},
{
Expand All @@ -322,15 +373,23 @@
"from popmon.base import Pipeline\n",
"from popmon.visualization import SectionGenerator, ReportGenerator\n",
"\n",
"monitoring_rules = {\"*_pull\": [7, 4, -4, -7], \"*_zscore\": [7, 4, -4, -7], \"[!p]*_unknown_labels\": [0.5, 0.5, 0, 0]}\n",
"monitoring_rules = {\n",
" \"*_pull\": [7, 4, -4, -7],\n",
" \"*_zscore\": [7, 4, -4, -7],\n",
" \"[!p]*_unknown_labels\": [0.5, 0.5, 0, 0],\n",
"}\n",
"datastore = dict()\n",
"datastore['hists'] = df.pm_make_histograms(time_axis='DATE', time_width='1w', time_offset='2015-07-02')\n",
"datastore[\"hists\"] = df.pm_make_histograms(\n",
" time_axis=\"DATE\", time_width=\"1w\", time_offset=\"2015-07-02\"\n",
")\n",
"\n",
"modules = [\n",
" HistSplitter(read_key='hists', store_key='split_hists', feature_begins_with='DATE'),\n",
" HistProfiler(read_key='split_hists', store_key='profiles'),\n",
" SectionGenerator(section_name='Profiles', read_key=\"profiles\", store_key=\"report_sections\"),\n",
" ReportGenerator(read_key=\"report_sections\", store_key=\"html_report\")\n",
" HistSplitter(read_key=\"hists\", store_key=\"split_hists\", feature_begins_with=\"DATE\"),\n",
" HistProfiler(read_key=\"split_hists\", store_key=\"profiles\"),\n",
" SectionGenerator(\n",
" section_name=\"Profiles\", read_key=\"profiles\", store_key=\"report_sections\"\n",
" ),\n",
" ReportGenerator(read_key=\"report_sections\", store_key=\"html_report\"),\n",
"]\n",
"\n",
"pipeline = Pipeline(modules)\n",
Expand All @@ -356,15 +415,25 @@
"from popmon.analysis.comparison.hist_comparer import ReferenceHistComparer\n",
"\n",
"datastore = dict()\n",
"datastore['hists'] = df.pm_make_histograms(time_axis='DATE', time_width='1w', time_offset='2015-07-02')\n",
"datastore[\"hists\"] = df.pm_make_histograms(\n",
" time_axis=\"DATE\", time_width=\"1w\", time_offset=\"2015-07-02\"\n",
")\n",
"\n",
"modules = [\n",
" HistSplitter(read_key='hists', store_key='split_hists', feature_begins_with='DATE'),\n",
" HistProfiler(read_key='split_hists', store_key='profiles'),\n",
" ReferenceHistComparer(reference_key='split_hists', assign_to_key='split_hists', store_key='comparisons'),\n",
" SectionGenerator(section_name='Profiles', read_key=\"profiles\", store_key=\"report_sections\"),\n",
" SectionGenerator(section_name=\"Comparisons\", read_key=\"comparisons\", store_key=\"report_sections\"),\n",
" ReportGenerator(read_key=\"report_sections\", store_key=\"html_report\")\n",
" HistSplitter(read_key=\"hists\", store_key=\"split_hists\", feature_begins_with=\"DATE\"),\n",
" HistProfiler(read_key=\"split_hists\", store_key=\"profiles\"),\n",
" ReferenceHistComparer(\n",
" reference_key=\"split_hists\",\n",
" assign_to_key=\"split_hists\",\n",
" store_key=\"comparisons\",\n",
" ),\n",
" SectionGenerator(\n",
" section_name=\"Profiles\", read_key=\"profiles\", store_key=\"report_sections\"\n",
" ),\n",
" SectionGenerator(\n",
" section_name=\"Comparisons\", read_key=\"comparisons\", store_key=\"report_sections\"\n",
" ),\n",
" ReportGenerator(read_key=\"report_sections\", store_key=\"html_report\"),\n",
"]\n",
"\n",
"pipeline = Pipeline(modules)\n",
Expand Down Expand Up @@ -421,4 +490,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}
16 changes: 13 additions & 3 deletions popmon/notebooks/popmon_tutorial_basic.ipynb
Expand Up @@ -16,6 +16,7 @@
"source": [
"# (optional) Adjust the jupyter notebook style for easier navigation of the reports\n",
"from IPython.core.display import display, HTML\n",
"\n",
"# Wider notebook\n",
"display(HTML(\"<style>.container { width:80% !important; }</style>\"))\n",
"# Cells are higher by default\n",
Expand All @@ -37,6 +38,7 @@
"source": [
"# install popmon (if not installed yet)\n",
"import sys\n",
"\n",
"!{sys.executable} -m pip install popmon"
]
},
Expand Down Expand Up @@ -79,7 +81,11 @@
"source": [
"# first we generate histograms,\n",
"# but we could load pre-generated histograms from a pickle or json file as well.\n",
"hists = df.pm_make_histograms(time_axis=\"date\", time_width='2w', features=['date:age', 'date:gender', 'date:isActive'])"
"hists = df.pm_make_histograms(\n",
" time_axis=\"date\",\n",
" time_width=\"2w\",\n",
" features=[\"date:age\", \"date:gender\", \"date:isActive\"],\n",
")"
]
},
{
Expand Down Expand Up @@ -109,7 +115,7 @@
},
"outputs": [],
"source": [
"report # or report_.to_notebook_iframe()"
"report # or report_.to_notebook_iframe()"
]
},
{
Expand Down Expand Up @@ -145,7 +151,11 @@
"metadata": {},
"outputs": [],
"source": [
"report_ = df.pm_stability_report(time_axis=\"date\", time_width='2w', features=['date:age', 'date:isActive', 'date:eyeColor'])"
"report_ = df.pm_stability_report(\n",
" time_axis=\"date\",\n",
" time_width=\"2w\",\n",
" features=[\"date:age\", \"date:isActive\", \"date:eyeColor\"],\n",
")"
]
},
{
Expand Down

0 comments on commit 5b1a6af

Please sign in to comment.