# TO DO

1) extract more relevant data points
2) parse dict data from columns (before conversion to DF? after conversion to DF? identify whats most efficient)
3) identify latest offer published (max date)
4) create an airflow project that runs on demand: runs the script, adds records to the DB, uploads to PostgreSQL database

# Connection a l'API France Travail

Process:
1) Request a token for the API you want to work with
2) Retrieve the token
3) Use it to communicate with your target API

# Imports

In [None]:
import configparser
import os
import requests
from datetime import datetime, timedelta
import pandas as pd

# Identifiers

In [None]:
# identifiers
config = configparser.ConfigParser()
secrets_file = os.path.join(os.getcwd(), "secrets.ini")
config.read(secrets_file)
client_id = config['ft_api']['client_id']
client_secret = config['ft_api']['client_secret']

# Getting an API access token

In [None]:
# URL to get an access token
# specify the realm at the end of the endpoint
url = "https://entreprise.francetravail.fr/connexion/oauth2/access_token?realm=%2Fpartenaire"

# POST request parameters
data = {
    "grant_type": "client_credentials",
    "client_id": client_id,
    "client_secret": client_secret,
    "scope": "api_offresdemploiv2 o2dsoffre",
}

# Headers
headers = {
    "Content-Type": "application/x-www-form-urlencoded",
    "Host": "francetravail.io"
}

# API request
response = requests.post(
    url, 
    data=data, 
    headers=headers
)

# if positive response, store token
if response.status_code == 200:
    access_token = response.json().get("access_token")
    print("Token successfully granted:", access_token)
else:
    print("Error when retrieving the API token:", response.text)

In [None]:
# casting dates to isoformat
min_creation_date = (datetime.now() - timedelta(days=30)).strftime("%Y-%m-%dT%H:%M:%SZ") # 30 days ago
max_creation_date = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") # today

# API endpoint
offres_emplois_url = "https://api.francetravail.io/partenaire/offresdemploi/v2/offres/search"

# headers
headers = {
    "Accept": "application/json",
    "Authorization": f"Bearer {access_token}"
}

# parameters
# looking for
    # data related
    # in two specific departments (Paris and Hauts de Seine)
    # permanent contract (CDI)
    # created in the past 90 days
    # senior experience (3 is the max value = over 3 years of XP)
params = {
    "motsCles": "data",
    "departement": "75,92",
    "typeContrat": "CDI",
    "minCreationDate": str(min_creation_date),
    "maxCreationDate": str(max_creation_date),
    "experience": "3",
}

# API request
response = requests.get(
    offres_emplois_url, 
    params=params, 
    headers=headers
)

# 200: positive response
# 206: positive, partial response
if response.status_code in (200, 206):
    offres = response.json()
    print("Nombre d'offres d'emploi trouvées :", len(offres["resultats"]))
else:
    print("Erreur lors de la requête :", response.text)

In [None]:
# creates a dataframe with the keys below (considered relevant)

selected_keys = [
    "intitule", 
    "description", 
    "dateCreation",
    "appellationlibelle",
    "secteurActiviteLibelle",
    "typeContratLibelle",
    "salaire"
]

filtered_list = [
    {key: offre[key] for key in selected_keys if key in offre}
    for offre in offres["resultats"]
]

offres_df = pd.DataFrame(filtered_list)

In [None]:
# some data processing
offres_df["dateCreation"] = pd.to_datetime(offres_df["dateCreation"]).dt.date

# WORK IN PROGRESS

In [None]:
# let's check the different keys in a job offer
# we will analyze a single offer and retrieve all of its keys
keys = list(offres["resultats"][0].keys())

for key in keys:
    print(key)