# KPI Exploration & Aggregation

- Loads cleaned data, prints date range.
- Rolls up `country × operator × RAT`: p50/p95/p99 PLT, p95 buffering/startup, and n.
- Hotspot targeting: sorts by p95 PLT and n.
- Drilldown: contributors by cell, PCI, TAC, band, channel.
- Diagnoses: PLT vs radio KPIs and observables.
- Trends: daily PLT by RAT, overall and by hotspot.


In [None]:
# Step 1: Imports + load
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme(style="whitegrid")

if os.path.exists("synthetic_qoe_sessions_clean.parquet"):
    df = pd.read_parquet("synthetic_qoe_sessions_clean.parquet")
else:
    df = pd.read_csv("synthetic_qoe_sessions_clean.csv", parse_dates=['timestamp'])

print(f"Dataset shape: {df.shape}")
display(df.head())

# Core groupings
kpis        = ['page_load_time_ms', 'buffering_ratio', 'startup_delay_ms']
radio       = ['rsrp_dbm', 'rsrq_db', 'sinr_db']
observables = ['app_size_kb', 'rtt_ms']
cats        = ['country', 'device', 'network_type', 'operator', 'band']


In [None]:
# Step 2: Hotspot scan: period range + country × operator × RAT percentiles

# Period range for the loaded dataset (used to contextualize S5 time series)
period_start = pd.to_datetime(df['timestamp'].min())
period_end   = pd.to_datetime(df['timestamp'].max())
num_days = int((period_end - period_start).total_seconds() // 86400) + 1
print(f"Data period: {period_start:%Y-%m-%d} → {period_end:%Y-%m-%d}  ({num_days} days)")

# Aggregates at country × operator × RAT
g = (
    df.groupby(['country','operator','network_type'], observed=True)
      .agg(
          p50_plt=('page_load_time_ms','median'),
          p95_plt=('page_load_time_ms', lambda x: x.quantile(0.95)),
          p99_plt=('page_load_time_ms', lambda x: x.quantile(0.99)),
          p95_buf=('buffering_ratio',    lambda x: x.quantile(0.95)),
          p95_sdl=('startup_delay_ms',   lambda x: x.quantile(0.95)),
          n=('session_id','count')
      )
      .reset_index()
      .sort_values(['p95_plt','n'], ascending=[False, False])
)

# ---- Print remarks for interpretation ----
print("\n[Operator-Level KPI Summary]")
print("- Scope: Aggregated QoE metrics per operator (p50, p95, p99 PLT, buffering, session size), focusing on PLT.")
print("- Median Stability: Typical PLT ~420–435 ms across all operators.")
print("- Tail Variability:")
print("    * T-Mobile (US): very high p99 PLT (1536 ms) → severe delays in worst sessions.")
print("    * Airtel (IN): relatively lower tail (1317 ms).")
print("\nInterpretation (for page_load_time_ms):")
print("- p95 ≈ 1200 ms → 95% of sessions finish within 1200 ms; 5% are slower (the 'tail').")
print("- Why percentiles:")
print("    * p50 shows the typical user experience.")
print("    * p95 / p99 capture the worst-case experience that drives complaints and churn.")

display(g.head(15))

# ---- Prioritization lens & hotspot target ----
print("\n[Hotspot Target Selection]")
print("- Prioritize top hotspot row with both high p95_plt and large n (big user impact).")
SELECT_COUNTRY  = 'VN'          # e.g., 'VN'
SELECT_OPERATOR = 'Viettel'     # e.g., 'Viettel'
SELECT_RAT      = '4G'          # e.g., '4G'
print(f"- Selected hotspot → country={SELECT_COUNTRY} | operator={SELECT_OPERATOR} | RAT={SELECT_RAT}")


In [None]:
# Step 3: Drill-down contributors inside the worst operator hotspot

# Prioritize top hotspot row with both **high p95_plt** and **large n** (big user impact).  
SELECT_COUNTRY  = 'VN'          # e.g., 'VN'
SELECT_OPERATOR = 'Viettel'     # e.g., 'Viettel'
SELECT_RAT      = '4G'          # e.g., '4G'

print(f"Hotspot target → country={SELECT_COUNTRY} | operator={SELECT_OPERATOR} | RAT={SELECT_RAT}")

hot = df[
    (df['country'] == SELECT_COUNTRY) &
    (df['operator'] == SELECT_OPERATOR) &
    (df['network_type'] == SELECT_RAT)
].copy()

print(f"Subset size: {len(hot):,} sessions")

# 1) cell_id contributors
cell_tbl = (
    hot.dropna(subset=['cell_id'])
       .groupby('cell_id', observed=True)
       .agg(
           n=('session_id','count'),
           p95_plt=('page_load_time_ms', lambda x: x.quantile(0.95)),
           p99_plt=('page_load_time_ms', lambda x: x.quantile(0.99)),
           rsrp_mean=('rsrp_dbm','mean'),
           sinr_mean=('sinr_db','mean')
       )
       .sort_values(['p95_plt','n'], ascending=[False, False])
       .head(20)
)
print("\nTop cells by p95_plt (and n):")
display(cell_tbl)

# 2) PCI contributors
pci_tbl = (
    hot.dropna(subset=['pci'])
       .groupby('pci', observed=True)
       .agg(
           n=('session_id','count'),
           p95_plt=('page_load_time_ms', lambda x: x.quantile(0.95)),
           rsrq_mean=('rsrq_db','mean'),
           sinr_mean=('sinr_db','mean')
       )
       .sort_values(['p95_plt','n'], ascending=[False, False])
       .head(20)
)
print("\nTop PCI by p95_plt (and n):")
display(pci_tbl)

# 3) TAC contributors
tac_tbl = (
    hot.dropna(subset=['tac'])
       .groupby('tac', observed=True)
       .agg(
           n=('session_id','count'),
           p95_plt=('page_load_time_ms', lambda x: x.quantile(0.95))
       )
       .sort_values(['p95_plt','n'], ascending=[False, False])
       .head(20)
)
print("\nTop TAC by p95_plt (and n):")
display(tac_tbl)

# ---- Print remarks for interpretation ----
print("\n[Hotspot Analysis — Viettel 4G (VN)]")
print(f"- Scope: Subset filtered to country={SELECT_COUNTRY} | operator={SELECT_OPERATOR} | RAT={SELECT_RAT} ({len(hot):,} sessions).")
print("- Top Cells, PIC, and TAC by QoE: Prioritize rows with both high p95_plt and large n (big user impact).")
print("- Band × Channel View: Traffic is concentrated across B3, B7, and B8 carriers. This helps link QoE outliers to spectrum usage.")

# 4) Band × Channel heatmap (volume)
import seaborn as sns
import matplotlib.pyplot as plt

bch = (
    hot.dropna(subset=['band','channel_number'])
       .groupby(['band','channel_number'], observed=True)
       .size()
       .unstack(fill_value=0)
)
plt.figure(figsize=(12,4))
sns.heatmap(bch, cmap='Blues')
plt.title(f"Traffic Concentration — {SELECT_OPERATOR} {SELECT_RAT} in {SELECT_COUNTRY} (Band × Channel)")
plt.xlabel("Channel Number"); plt.ylabel("Band"); plt.tight_layout(); plt.show()


In [None]:
# Step 4: Relationship diagnostics — PLT-focused (sampled)
def dense_limits(s, lo=1, hi=99):
    lo_v, hi_v = s.quantile([lo/100.0, hi/100.0])
    return float(lo_v), float(hi_v)

SAMPLE_N = 60000

try:
    SCOPE = hot.copy()
    scope_label = f"{SELECT_OPERATOR} {SELECT_RAT} in {SELECT_COUNTRY}"
    rat_filter = [SELECT_RAT]
except NameError:
    SCOPE = df
    scope_label = "All operators / all countries"
    rat_filter = ['4G', '5G']          # original behavior

# ---- Print remarks for interpretation ----
print(f"\n[Relationship Diagnostics — {scope_label}]")
print(f"Scope: Same hotspot subset ({scope_label}). Goal: validate whether poor PLT tails align with radio or transport factors.")
print("\n- Radio ↔ PLT:")
print("  * RSRP: Flat → coverage is not the main driver.")
print("  * RSRQ: Flat → load is not strongly reflected in PLT for this subset.")
print("  * SINR: Strong negative slope → as interference reduces (higher SINR), PLT improves.")

# 4A: Radio ↔ PLT (filtered scope)
radio_cols = ['rsrp_dbm', 'rsrq_db', 'sinr_db', 'page_load_time_ms']
radio_df = (
    SCOPE[SCOPE['network_type'].isin(rat_filter)][radio_cols]
    .dropna(subset=radio_cols)
)
sample_radio = radio_df.sample(SAMPLE_N, random_state=7) if len(radio_df) > SAMPLE_N else radio_df
y_lo, y_hi = dense_limits(sample_radio['page_load_time_ms'])

fig, axes = plt.subplots(1, 3, figsize=(15, 4))

sns.regplot(
    data=sample_radio, x='rsrp_dbm', y='page_load_time_ms',
    scatter_kws={'alpha':0.12, 's':10}, line_kws={'linewidth':2}, ax=axes[0]
)
axes[0].set_title(f'PLT vs RSRP ({", ".join(rat_filter)}) — {scope_label}')
axes[0].set_ylim(y_lo, y_hi * 1.05)

sns.regplot(
    data=sample_radio, x='rsrq_db', y='page_load_time_ms',
    scatter_kws={'alpha':0.12, 's':10}, line_kws={'linewidth':2}, ax=axes[1]
)
axes[1].set_title(f'PLT vs RSRQ ({", ".join(rat_filter)}) — {scope_label}')
axes[1].set_ylim(y_lo, y_hi * 1.05)

sns.regplot(
    data=sample_radio, x='sinr_db', y='page_load_time_ms',
    scatter_kws={'alpha':0.12, 's':10}, line_kws={'linewidth':2}, ax=axes[2]
)
axes[2].set_title(f'PLT vs SINR ({", ".join(rat_filter)}) — {scope_label}')
axes[2].set_ylim(y_lo, y_hi * 1.05)

plt.tight_layout(); plt.show()

# ---- Print remarks for interpretation ----
print("\n- Transport / Application ↔ PLT:")
print("  * RTT: Clear positive correlation → higher latency directly increases PLT.")
print("  * App Size: Exponential effect (log-x) → large page sizes dominate PLT tails.")

# 4B: Observables ↔ PLT (same filtered scope)
obs_df = SCOPE[['rtt_ms', 'app_size_kb', 'page_load_time_ms']].dropna()
sample_obs = obs_df.sample(SAMPLE_N, random_state=7) if len(obs_df) > SAMPLE_N else obs_df
y_lo2, y_hi2 = dense_limits(sample_obs['page_load_time_ms'])

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

sns.regplot(
    data=sample_obs, x='rtt_ms', y='page_load_time_ms',
    scatter_kws={'alpha':0.12, 's':10}, line_kws={'linewidth':2}, ax=axes[0]
)
axes[0].set_title(f'PLT vs RTT — {scope_label}')
axes[0].set_ylim(y_lo2, y_hi2 * 1.05)

sns.regplot(
    data=sample_obs, x='app_size_kb', y='page_load_time_ms',
    scatter_kws={'alpha':0.12, 's':10}, line_kws={'linewidth':2}, ax=axes[1]
)
axes[1].set_xscale('log')
axes[1].set_title(f'PLT vs App Size (log-x) — {scope_label}')
axes[1].set_ylim(y_lo2, y_hi2 * 1.05)

plt.tight_layout(); plt.show()

print("\nInterpretation:")
print("- Tail PLT events are a mix of interference (SINR) and transport/application factors (RTT, app size).")
print("- Neither coverage (RSRP) nor load (RSRQ) are primary bottlenecks here.")


In [None]:
# Step 5: Daily trends — overall vs selected hotspot (country+operator)

# Overall daily PLT by network type
overall_daily = (
    df.set_index('timestamp')
      .groupby('network_type', observed=True)['page_load_time_ms']
      .resample('D').mean().unstack(0)
)

# Select hotspot target
if 'SELECT_COUNTRY' in globals() and 'SELECT_OPERATOR' in globals():
    sel_country  = str(SELECT_COUNTRY)
    sel_operator = str(SELECT_OPERATOR)
else:
    _top = g.iloc[0]
    sel_country  = str(_top['country'])
    sel_operator = str(_top['operator'])

sel_df = df[(df['country'] == sel_country) & (df['operator'] == sel_operator)]

# Guard for empty subset
if sel_df.empty:
    print(f"No rows for selected hotspot → country={sel_country}, operator={sel_operator}. Showing overall only.")
    ax = overall_daily.plot(figsize=(14,6), title="Daily PLT — Overall by Network Type")
    ax.set_ylabel("Page Load Time (ms)")
    ax.set_xlabel("Date")
    ax.legend(title="Network Type")
    plt.show()
else:
    sel_daily = (
        sel_df.set_index('timestamp')
              .groupby('network_type', observed=True)['page_load_time_ms']
              .resample('D').mean().unstack(0)
    )

    fig, axes = plt.subplots(1, 2, figsize=(20,5), sharey=True)

    overall_daily.plot(ax=axes[0])
    axes[0].set_title("Daily PLT — Overall by Network Type")
    axes[0].set_ylabel("Page Load Time (ms)")
    axes[0].set_xlabel("Date")
    axes[0].legend(title="Network Type")

    sel_daily.plot(ax=axes[1])
    axes[1].set_title(f"Daily PLT — {sel_operator} in {sel_country}")
    axes[1].set_xlabel("Date")
    axes[1].legend(title="Network Type")

    plt.tight_layout()
    plt.show()

    # ---- Print remarks for interpretation ----
    print("\n[Daily PLT Trends]")
    print("Scope: Time-series of daily median Page Load Time (PLT).")
    print("\n- Overall View (left):")
    print("  * 4G shows consistently higher PLT (~500 ms), 5G is lowest and most stable (~400 ms), and WiFi sits in between slightly better than 4G.")
    print("  * Confirms expected hierarchy: 5G < WiFi < 4G in terms of application-layer QoE.")
    print(f"\n- {sel_operator} in {sel_country} (right):")
    print("  * 4G PLT is volatile, with spikes exceeding 600 ms on several days. 5G remains consistently stable and low, with minimal variance.")
    print("  * Indicates that 4G tail events are persistent, while 5G performance is robust.")
    print("\nInterpretation & Operational Use:")
    print("- Baseline: Daily trend analysis helps confirm if a hotspot is persistent vs transient.")
    print("- Alerting: If 4G PLT breaches thresholds consistently, an alert should be raised.")
    print("- Action: Persistent hotspot trends should trigger targeted interventions (capacity, interference mitigation, or spectrum refarming) with the operator.")


In [None]:
# Step 6: Artefacts for handoff
import os, re

os.makedirs("artefacts", exist_ok=True)

g.to_csv("artefacts/kpi_aggregates_country_operator_network.csv", index=False)
overall_out = overall_daily.copy()
overall_out.index.name = "date"
overall_out.to_csv("artefacts/daily_page_load_by_network_overall.csv")

sel_out = sel_daily.copy()
sel_out.index.name = "date"

def _slug(s):  # safe-ish filename
    return re.sub(r'[^A-Za-z0-9._-]+', '_', str(s))

sel_fname = f"artefacts/daily_page_load_by_network_{_slug(sel_operator)}_{_slug(sel_country)}.csv"
sel_out.to_csv(sel_fname)

cell_tbl.to_csv("artefacts/hotspot_top_cells.csv")
pci_tbl.to_csv("artefacts/hotspot_top_pci.csv")
tac_tbl.to_csv("artefacts/hotspot_top_tac.csv")

print("Saved artefacts to ./artefacts/")


## Correlation & Category Effects

- Separate from the hotspot view: this section checks **which things move together** and **which categories matter**.
- **Heatmap:** shows how PLT relates to `rtt_ms`, `app_size_kb`, and radio KPIs (expect PLT ↑ with RTT/size, ↓ with SINR/RSRP).
- **Category impact:** see how `country`, `operator`, `network_type`, `device`, `band` shift PLT and other KPIs.
- Use the results to **pick telemetry to track**, **choose model features**, and **decide segments** for target/alerts.
- Correlation is directional guidance, **not** causation—validate before action.


In [None]:
# Step 7: Numeric ↔ numeric correlations (Pearson) — include observables
import seaborn as sns
import matplotlib.pyplot as plt

# Define groups (reuse if already defined earlier)
kpis        = ['page_load_time_ms', 'buffering_ratio', 'startup_delay_ms']
radio       = ['rsrp_dbm', 'rsrq_db', 'sinr_db']
observables = ['rtt_ms', 'app_size_kb']

numeric_vars = [c for c in (kpis + radio + observables) if c in df.columns]

num_df = df[numeric_vars].copy()
corr_mat = num_df.corr(method='pearson')

plt.figure(figsize=(20,4))
sns.heatmap(corr_mat, annot=True, fmt=".2f", cmap="vlag", center=0)
plt.title("Pearson Correlation — KPIs, Radio, Observables")
plt.show()

# Optional pairplot (kept light)
DO_PAIRPLOT = False
if DO_PAIRPLOT:
    pair_vars = [v for v in ['page_load_time_ms','rtt_ms','app_size_kb','sinr_db','buffering_ratio'] if v in num_df.columns]
    pp_sample = df[pair_vars].dropna()
    pp_sample = pp_sample.sample(3000, random_state=13) if len(pp_sample) > 3000 else pp_sample
    sns.pairplot(pp_sample, corner=True, plot_kws={'alpha':0.2,'s':12})
    plt.suptitle("Pairwise Relationships (Sampled)", y=1.02)
    plt.show()

# ---- Print remarks for interpretation ----
print("\n[Correlation Summary — KPIs × Radio × Observables]")
if 'page_load_time_ms' in corr_mat and 'rtt_ms' in corr_mat:
    print(f"- PLT vs RTT: positive (≈ {corr_mat.loc['page_load_time_ms','rtt_ms']:.2f}) → higher transport latency increases PLT.")
if 'page_load_time_ms' in corr_mat and 'app_size_kb' in corr_mat:
    print(f"- PLT vs App Size: positive (≈ {corr_mat.loc['page_load_time_ms','app_size_kb']:.2f}) → heavier pages drive PLT tails.")
if 'page_load_time_ms' in corr_mat and 'sinr_db' in corr_mat:
    print(f"- PLT vs SINR: negative (≈ {corr_mat.loc['page_load_time_ms','sinr_db']:.2f}) → less interference (higher SINR) improves PLT.")
if 'page_load_time_ms' in corr_mat and 'rsrp_dbm' in corr_mat:
    print(f"- PLT vs RSRP: weak (≈ {corr_mat.loc['page_load_time_ms','rsrp_dbm']:.2f}) → coverage not the main driver here.")
if 'page_load_time_ms' in corr_mat and 'rsrq_db' in corr_mat:
    print(f"- PLT vs RSRQ: weak (≈ {corr_mat.loc['page_load_time_ms','rsrq_db']:.2f}) → load signal is limited in this slice.")

# Secondary relationships that help RCA
if 'buffering_ratio' in corr_mat and 'sinr_db' in corr_mat:
    print(f"- Buffering vs SINR: strong negative (≈ {corr_mat.loc['buffering_ratio','sinr_db']:.2f}) → interference elevates buffering.")
if 'startup_delay_ms' in corr_mat and 'rtt_ms' in corr_mat:
    print(f"- Startup Delay vs RTT: positive (≈ {corr_mat.loc['startup_delay_ms','rtt_ms']:.2f}) → transport latency affects start-up.")

print("\nInterpretation:")
print("- PLT is primarily driven by transport (RTT) and payload size; radio interference (SINR) is a secondary lever.")
print("- Coverage (RSRP) and load proxy (RSRQ) are not dominant in this dataset; verify per-hotspot before acting.")


In [None]:
# Step 8: Categorical → numeric effect size (η; correlation ratio) — KPIs + observables
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

cats = ['country', 'device', 'network_type', 'operator', 'band']
targets_for_eta = [c for c in (kpis + observables) if c in df.columns]

def correlation_ratio(categories, measurements):
    s_cat = pd.Series(categories, copy=False).astype('category')
    s_num = pd.Series(measurements, copy=False).astype(float)
    valid = ~(s_cat.isna() | s_num.isna())
    if not np.any(valid):
        return np.nan
    s_cat = s_cat[valid].cat.remove_unused_categories()
    s_num = s_num[valid]
    g = s_num.groupby(s_cat, observed=True)
    means = g.mean(); counts = g.size()
    overall_mean = s_num.mean()
    ss_between = np.sum(counts * (means - overall_mean) ** 2)
    ss_total   = np.sum((s_num - overall_mean) ** 2)
    return float(np.sqrt(ss_between / ss_total)) if ss_total > 0 else np.nan

# Build per-category rows as Series (floats only), index = feature name
rows = []
for c in cats:
    if c in df.columns:
        s = pd.Series(
            {t: correlation_ratio(df[c], df[t]) for t in targets_for_eta},
            name=c,
            dtype='float64'
        )
        rows.append(s)

eta_df = pd.DataFrame(rows)
eta_df.index.name = 'feature'

plt.figure(figsize=(20,5))
sns.heatmap(eta_df, annot=True, fmt=".2f", cmap="YlGnBu", vmin=0, vmax=1)
plt.title("Effect Size (η) of Categories on QoE KPIs + Observables")
plt.tight_layout(); plt.show()

# ---- Print remarks for interpretation ----
print("\n[Effect Size (η) — Categories on QoE KPIs + Observables]")
print("- Network Type and Band show the strongest effects on KPIs (η up to ~0.75–0.77 for RTT).")
print("- Operator also influences buffering and PLT (η ~0.3–0.4), reflecting policy and deployment differences.")
print("- Device effect is small for KPIs, but noticeable on App Size (η ~0.39) — likely content type or device class bias.")
print("- Country effect is negligible (η ~0.01) → avoid using 'country' as a model feature since it adds little predictive power.")

print("\nInterpretation:")
print("- QoE variability is mainly explained by technology choice (4G/5G/WiFi) and spectrum band.")
print("- Operators still differentiate user experience, especially via buffering and load management.")
print("- Device profile matters more for payload size than core QoE KPIs.")
print("- Country label should not be included as a model feature; operator and band capture the real drivers.")


In [None]:
# Step 9: Categorical ↔ categorical (Cramér’s V)
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

cat_cols = [c for c in ['country','device','network_type','operator','band'] if c in df.columns]

def cramers_v(x, y):
    tbl = pd.crosstab(x, y)
    if tbl.size == 0:
        return np.nan
    observed = tbl.values
    n = observed.sum()
    expected = np.outer(observed.sum(axis=1), observed.sum(axis=0)) / n
    with np.errstate(divide='ignore', invalid='ignore'):
        chi2 = np.nansum((observed - expected) ** 2 / expected)
    k, r = observed.shape
    denom = n * (min(k - 1, r - 1))
    return float(np.sqrt(chi2 / denom)) if denom > 0 else np.nan

cv = pd.DataFrame(index=cat_cols, columns=cat_cols, dtype=float)
for i in cat_cols:
    for j in cat_cols:
        cv.loc[i, j] = 1.0 if i == j else cramers_v(df[i], df[j])

plt.figure(figsize=(20,5))
sns.heatmap(cv.astype(float), annot=True, fmt=".2f", cmap="Purples", vmin=0, vmax=1)
plt.title("Cramér's V (Categorical ↔ Categorical)")
plt.tight_layout(); plt.show()

# ---- Print remarks for interpretation ----
print("\n[Cramér’s V — Categorical ↔ Categorical Associations]")
print("- Country ↔ Operator: very strong association (≈0.82) → redundant; operator already encodes country deployment.")
print("- Network Type ↔ Band: perfect association (1.0) → band choice is determined by RAT; avoid including both in the same model.")
print("- Network Type ↔ Operator: strong link (≈0.71) → reflects operator rollout policy by RAT.")
print("- Device has weak associations (<0.05) → relatively independent of other categories.")
print("\nInterpretation:")
print("- For modeling, drop 'country' and avoid using both 'network_type' and 'band' together.")
print("- Keep 'operator' and either 'network_type' or 'band' depending on modeling granularity.")
print("- Device remains useful as an independent signal.")


In [None]:
# Step 10: Save artefacts from the correlation suite
import os
os.makedirs("artefacts", exist_ok=True)
corr_mat.to_csv("artefacts/numeric_correlation_matrix.csv")
eta_df.to_csv("artefacts/categorical_to_kpi_eta.csv")
cv.to_csv("artefacts/cramers_v_categorical_matrix.csv")
print("Saved correlation artefacts to ./artefacts/")
