Notebook to demo the functionality of the Excel Report writer using my parkrun data

Note will do an example of getting the data dictionary data using query and manually filtering?

imports

In [1]:
import sys
import os
import pandas as pd
import sqlite3
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Add the 'src' directory to the path so we can import the wrapper class
# Assumes this notebook is in the 'notebooks/' folder and source is in 'src/'
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..', 'src')))

from enterprise_writer import EnterpriseExcelWriter
from query_library import fGetDataDictionary, fGetparkrunByYear, fGetparkrunKpis
from config_provider import fGetReportConfig

In [2]:
# Fetch Configuration
vProfileName = 'GaryTest'

# Fetch config from DB
vConfig = fGetReportConfig(vProfileName)


Get the data from the query notebook

In [3]:
dfKpi = fGetparkrunKpis()
dfRuns = fGetparkrunByYear()

#display(dfKpi)
#display(dfRuns)

## Objective: Data Dictionary Filtering
**Action:** Retrieve the data dictionary and filter it specifically for the entries relevant to this notebook.

### Context & Current Constraints
There is an automated process to filter these entries; however, the current layout requires the Data Dictionary (DD) to be located on the **same tab and above** the actual data. This creates a procedural conflict:

* **Option A (Manual):** Add the DD entries manually.
* **Option B (Gap Method):**
    1.  Leave a gap at the top of the sheet.
    2.  Move down to add the actual data first.
    3.  Go back up to populate the DD.

In [4]:
dfDict = fGetDataDictionary()

# List of values to filter for
vTargetColumns = ["RunYear", "TotalRuns", "LowestPos", "FastestTime"]

#Apply the filter using .isin()
# This creates a new dataframe containing only the matching rows
dfFilteredDict = dfDict[dfDict["column_name"].isin(vTargetColumns)]

#display(dfFilteredDict)

In [None]:
# Start the writer process and associate it with a config (loaded earlier)
vReport = EnterpriseExcelWriter(vFilename="A500760.xlsx", vConfig=vConfig, vDefaultSheetName='Sheet1')

# Link the Data dictionary mappings - have pre filtered
vReport.fSetColumnMapping(dfFilteredDict)

# Add a title 
vReport.fAddTitle("A500760 Report")

# Skip a row
vReport.fSkipRows(1)

# Example: Adding a note with specific bold/coloured words
vReport.fAddText(["Gary ", {'text': 'Performance ', 'bold': True, 'colour': 'red'},  "Report"],vFontSize=12, vFontColour = 'FF0000',vBgColour='FFFF11')

# Skip a row
vReport.fSkipRows(1)

# Add the data dictionary Title and table
vReport.fAddText("Data Dictionary")

# Define the list of columns you want to keep
vColumnsToKeep = ["display_name", "column_description"]

# Create a new dataframe with only those columns
dfDDOut = dfFilteredDict[vColumnsToKeep]

vReport.fAddDefinitionList(dfDDOut, vStartCol = 0)

# Skip a row
vReport.fSkipRows(1)

# Add a banner stating this is just my data
vReport.fAddBanner("Just my Data from parkrun", vStyleProfile='Warning')

# Add the KPIS after processing: 
# Select the first (and only) row with .iloc[0] and convert to dictionary
vKpiData = dfKpi.iloc[0].to_dict()

# Pass it to your function
vReport.fAddKpiRow(vKpiData)

# Skip a row
vReport.fSkipRows(1)

#Write the dataframe
vReport.fWriteDataframe(dfRuns, vAddTotals=False, vAutoFilter=True, vColAlignments = {'FastestTime': 'center'})
vReport.fAddConditionalFormat('TotalRuns', 'cell', {'criteria': '<', 'value': 40} , vColour = 'Red')
vReport.fAddConditionalFormat('TotalRuns', 'cell', {'criteria': '>', 'value': 39} , vColour = 'Green')

###########################################################################
### Add new tab 
###########################################################################

vReport.fNewSheet("Graphs", "Graphs for this data")

vReport.fAddTitle("Graphs of my parkrun data")

#  Freeze Panes (Row 3, Col 0 - Header is at Row 3) this will keep it in view 
vReport.fFreezePanes(3, 0)

# Add a static Seaborn driven Graph as an image
vReport.fAddSeabornChart(
    dfRuns, 
    vXCol='RunYear', 
    vYCol='TotalRuns', 
    vTitle='Total Runs by Year', 
    vChartType='bar'
)

# Add another graphs of best finish position by year
vReport.fAddSeabornChart(
    dfRuns, 
    vXCol='RunYear', 
    vYCol='LowestPos', 
    vTitle='Best Finish by Year', 
    vChartType='line'
)

## won't add here but could easily add a DD and TOC tab

vReport.fClose() 

File saved: A500760.xlsx
