Le format de ce code est inspiré du script de Alex Garcia pour Pageviews


In [1]:
# ── 1. Imports ──────────────────────────────────────────────────────────────
import requests, pandas as pd, time
from datetime import datetime
import plotly.express as px
import ipywidgets as w


In [2]:

# ── 2. Session avec User‑Agent ──────────────────────────────────────────────
UA = "PageviewsDemo/1.0 (https://github.com/aureliusLF; alefichoux@gmail.com)"
session = requests.Session()
session.headers.update({"User-Agent": UA})


In [3]:

# ── 3. Fonction d'appel API ─────────────────────────────────────────────────
def pageviews(site: str, page: str, start: str, end: str) -> pd.DataFrame:
    start_ts = datetime.strptime(start, "%Y-%m-%d").strftime("%Y%m%d00")
    end_ts   = datetime.strptime(end,   "%Y-%m-%d").strftime("%Y%m%d")

    url = (
        f"https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/"
        f"{site}/all-access/user/{page}/daily/{start_ts}/{end_ts}"
    )
    r = session.get(url, timeout=30)
    r.raise_for_status()                    # lève une erreur autre que 403 si besoin

    items = r.json().get("items", [])
    return pd.DataFrame({
        "date":  pd.to_datetime([it["timestamp"] for it in items], format="%Y%m%d00"),
        "views": [it["views"] for it in items],
        "page":  page
    })


In [4]:

# ── 4. Téléchargement de plusieurs pages ────────────────────────────────────
def fetch_pages(site, pages, start, end):
    dfs = []
    for p in pages:
        dfs.append(pageviews(site, p, start, end))
        time.sleep(0.1)                     # courtoisie : 10 req/s max.
    return pd.concat(dfs, ignore_index=True)


In [11]:

# ── 5. Widgets d’entrée ─────────────────────────────────────────────────────
site_w   = w.Text(value="fr.wikipedia", description="Wiki :")
pages_w  = w.Textarea(
    value="Punaise de lit",
    description="Pages :", layout=w.Layout(width="50%"))
start_w  = w.DatePicker(value=datetime(2022,10,1), description="Start :")
end_w    = w.DatePicker(value=datetime(2025,4,1),  description="End :")
display(site_w, pages_w, start_w, end_w)


Text(value='fr.wikipedia', description='Wiki\xa0:')

Textarea(value='Punaise de lit', description='Pages\xa0:', layout=Layout(width='50%'))

DatePicker(value=datetime.datetime(2022, 10, 1, 0, 0), description='Start\xa0:', step=1)

DatePicker(value=datetime.datetime(2025, 4, 1, 0, 0), description='End\xa0:', step=1)

In [12]:

# ── 6. Récupération & graphique ─────────────────────────────────────────────
pages = [p.strip() for p in pages_w.value.splitlines() if p.strip()]
df = fetch_pages(site_w.value, pages,
                 start_w.value.strftime("%Y-%m-%d"),
                 end_w.value.strftime("%Y-%m-%d"))

fig = px.line(df, x="date", y="views", color="page",
              title=f"Pageviews — {site_w.value}",
              labels={"views": "views / day"})
fig.show()
