In [379]:
import frontmatter
import glob
import requests
from dotenv import load_dotenv
import os

In [380]:
load_dotenv()

True

### Fonctions

In [383]:
DATAGOUV_URL = "https://demo.data.gouv.fr"
api_key = os.getenv("API_KEY")
headers = {
    "X-API-KEY": api_key,
}

In [331]:
def publish_post(post, mapping_datagouv, df):
    payload = {}
    payload["datasets"] = []
    if "api" in post and post["api"]:
        for item in post["api"]:
            if df[df["title"] == item].shape[0] > 0:
                obj = {}
                obj["id"] = df[df["title"] == item]["dataset_id"].iloc[0]
                payload["datasets"].append(obj)

    for item in sorted(post.keys()):
        if item in mapping_datagouv:
            payload[mapping_datagouv[item]] = post[item]
    payload["published"] = True
    r = requests.post(
        f"{DATAGOUV_URL}/api/1/posts/", json=payload, headers=headers
    )
    r.raise_for_status()
    res = r.json()
    r = requests.post(
        f"{DATAGOUV_URL}/api/1/posts/{res['id']}/publish", headers=headers
    )
    r.raise_for_status()
    return res          

In [332]:
def publish_guide_on_dataset(post, page):
    if "api" in post and post["api"]:
        for item in post["api"]:
            if df[df["title"] == item].shape[0] > 0:
                r = requests.get(
                    f"{DATAGOUV_URL}/api/1/datasets/{df[df['title'] == item]['dataset_id'].iloc[0]}/"
                )
                content = r.json()["description"]
                content = f"> Cet API est référencée dans le guide [{post['title']}]({page}).\n\n\n" + content
                payload = {
                    'description': content
                }
                url = f"{DATAGOUV_URL}/api/1/datasets/{df[df['title'] == item]['dataset_id'].iloc[0]}/"
                r = requests.put(
                    url,
                    json=payload,
                    headers=headers
                )
                r.raise_for_status()

In [362]:
def publish_resources(post, dataset_id, file_name):
    url_doc = "https://api.gouv.fr/documentation/" + file_name.replace("api.gouv.fr/_data/api/", "").replace(".md", "")
    if "doc_tech_external" in post:
        url_api = post["doc_tech_external"]
    else:
        url_api = url_doc
        
    # Doc first
    payload = {
        'title': "Documentation de l'API",
        'url': url_doc,
        'type': "documentation",
        'filetype': 'remote',
        'format': "html",
    }
    url = f"{DATAGOUV_URL}/api/1/datasets/{dataset_id}/resources/"
    r = requests.post(
        url,
        json=payload,
        headers=headers
    )
    r.raise_for_status()
    
    # API then
    payload = {
        'title': post["title"],
        'url': url_api,
        'type': "api",
        'filetype': 'remote',
        'format': "html",
    }
    url = f"{DATAGOUV_URL}/api/1/datasets/{dataset_id}/resources/"
    r = requests.post(
        url,
        json=payload,
        headers=headers
    )
    r.raise_for_status()
    



In [351]:
def publish_datasets(post, mapping_datagouv):
    payload = {}
    payload["organization"] = {}
    payload["organization"]["id"] = "64d361fa8d0adad7c8999bb1"
    extras = {}
    for item in sorted(post.keys()):
        if item in mapping_datagouv:
            payload[mapping_datagouv[item]] = post[item]
        else:
            if type(post[item]) == dict:
                post[item] = [post[item]]
            if post[item] != None and post[item] != "None":
                extras["api-gouvfr-test:" + item] = post[item]
    payload["extras"] = extras
    payload["frequency"] = "unknown"
    if "tags" in payload:
        payload["tags"] = [x for x in payload["tags"] if x != None and len(x) > 2]
    r = requests.post(
        f"{DATAGOUV_URL}/api/1/datasets/", json=payload, headers=headers
    )
    r.raise_for_status()
    return r.json()

### Reading properties of api pages

In [363]:
mapping_datagouv = {
    "title": "title",
    "keywords": "tags",
    "description": "description"
}

In [364]:
files = glob.glob("api.gouv.fr/_data/api/*")

In [365]:
list_apis = []
for f in files:
    post = frontmatter.load(f)
    post["description"] = post["content_intro"] + "\n" + post.content if "content_intro" in post else post.content
    post["description"] = post["tagline"] + "\n" + post["description"] if "tagline" in post else post["description"]
    post = post.to_dict()
    if "content_intro" in post:
        post.pop('content_intro')
    if "tagline" in post:
        post.pop('tagline')
    if "content" in post:
        post.pop('content')
    res = publish_datasets(post, mapping_datagouv)
    print(res['id'])
    publish_resources(post, res["id"], f)
    obj = {}
    obj["dataset_id"] = res["id"]
    obj["title"] = res["title"]
    obj["file_name"] = f.replace("api.gouv.fr/_data/api/", "").replace(".md", "")
    list_apis.append(obj)
    

64d3ddeb46755d07296bd44d
64d3ddecfbd89f285bc438b6
64d3ddedfbd89f285bc438b7
64d3ddee6e0f7ef5ff615a5f
64d3ddef46755d07296bd44e
64d3ddf1fbd89f285bc438b8
64d3ddf21bbb5f6e807aaaa0
64d3ddf3e94fe55e86cea927
64d3ddf4fbd89f285bc438b9
64d3ddf520f076fcf55aadf7
64d3ddf6fbd89f285bc438ba
64d3ddf76e0f7ef5ff615a60
64d3ddf846755d07296bd44f
64d3ddf96e0f7ef5ff615a61
64d3ddfa20f076fcf55aadf8
64d3ddfc6e0f7ef5ff615a62
64d3ddfdfbd89f285bc438bb
64d3ddfea26888a1f2a83e8c
64d3ddfe1bbb5f6e807aaaa1
64d3ddff6e0f7ef5ff615a63
64d3de016e0f7ef5ff615a64
64d3de0220f076fcf55aadf9
64d3de031bbb5f6e807aaaa2
64d3de0420f076fcf55aadfa
64d3de05e94fe55e86cea928
64d3de06fbd89f285bc438bc
64d3de07fbd89f285bc438bd
64d3de0720f076fcf55aadfb
64d3de086e0f7ef5ff615a65
64d3de091bbb5f6e807aaaa3
64d3de0bfbd89f285bc438be
64d3de0c20f076fcf55aadfc
64d3de0dfbd89f285bc438bf
64d3de0efbd89f285bc438c0
64d3de0f6e0f7ef5ff615a66
64d3de1046755d07296bd450
64d3de111bbb5f6e807aaaa4
64d3de1246755d07296bd451
64d3de1338f56f824b97b0e1
64d3de141bbb5f6e807aaaa5


In [366]:
len(files)

147

In [367]:
files = glob.glob("api.gouv.fr/_data/guides/*")

In [368]:
mapping_datagouv = {
    "title": "name",
    "tags": "tags",
    "tagline": "headline",
    "description": "content"
}

In [369]:
list_usecases = []
df = pd.DataFrame(list_apis)
for f in files:
    post = frontmatter.load(f)
    if "publish" in post and post["publish"] == True:
        post["description"] = post.content
        post = post.to_dict()
        if "content" in post:
            post.pop('content')
        if "tags" in post and post["tags"]:
            post["tags"] = post["tags"].split(", ")
        res = publish_post(post, mapping_datagouv, df)
        publish_guide_on_dataset(post, res["page"])
        print(res["id"])
        list_usecases.append({"post_id": res["id"]})
        

64d3de9a6e0f7ef5ff615a81
64d3de9cfbd89f285bc438e1
64d3dea722bc573d5416ace9
64d3dea8fbd89f285bc438e2
64d3deab22bc573d5416acea
64d3deaf46755d07296bd45d
64d3deb346755d07296bd45e
64d3deb446755d07296bd45f
64d3deb646755d07296bd460
64d3deb722bc573d5416aceb
64d3deb86e0f7ef5ff615a82
64d3deba22bc573d5416acec
64d3debb22bc573d5416aced
64d3debf38f56f824b97b0e9
64d3dec01bbb5f6e807aaab4
64d3dec2da3e962ad21d5f01
64d3dec36e0f7ef5ff615a83
64d3dec438f56f824b97b0ea
64d3decbfbd89f285bc438e3
64d3dedc6e0f7ef5ff615a84
64d3dededa3e962ad21d5f02
64d3dee06e0f7ef5ff615a85
64d3dee26e0f7ef5ff615a86
64d3dee522bc573d5416acee


### Remove if wanted

In [326]:
def delete_list_datasets(datasets):
    for obj in datasets:
        r = requests.delete(
            f"{DATAGOUV_URL}/api/1/datasets/{obj['dataset_id']}/",
            headers=headers
        )
        print(r.status_code)

In [359]:
delete_list_datasets(list_apis)

204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204


In [261]:
def delete_list_posts(posts):
    for obj in posts:
        r = requests.delete(
            f"{DATAGOUV_URL}/api/1/posts/{obj['post_id']}/",
            headers=headers
        )
        print(r.status_code)

In [360]:
delete_list_posts(list_usecases)

204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204
204


In [None]:
[]