### Récupération des données de statistiques d'Aftercinema

#### Vérification du bon fonctionnement de l'environnement + installations + imports

In [None]:
print("hello")

In [None]:
%pip show psycopg2

In [None]:
%pip install requests
%pip install pandas
%pip install sqlalchemy
%pip install psycopg2

In [None]:
import os
import json
import requests
import pandas as pd
from sqlalchemy import create_engine, Table, Column, String, MetaData
from datetime import datetime,timedelta
import xml.etree.ElementTree as ET
from sqlalchemy.dialects.postgresql import JSONB

#### 1.Récupération des données PostHog

In [None]:
token = os.getenv("POSTHOG_API_KEY")

headers = {"Authorization": "Bearer " + token}

with open("./POSTHOG_QUERIES.json", 'r') as fichier:
    queries = json.load(fichier)

events_data = {}

for event in ["Page viewed","Platform button clicked"]:
    data = {
        "query": {
            "kind": "HogQLQuery",
            "query": queries[event]
        }
    }

    request = requests.post("https://eu.posthog.com/api/projects/20861/query",headers=headers,json=data)

    data = request.json()

    df = pd.DataFrame(data["results"])
    df.columns = data["columns"]

    events_data[event] = df

Traitement des données pour le graphique PostHog - plateformes

In [None]:
df_platforms = events_data["Platform button clicked"]
df_platforms = df_platforms.groupby('platform').size().reset_index(name='count')
result_platforms = df_platforms.to_dict(orient='records')

Traitement des données pour le graphique PostHog - pages

In [None]:
df_pages = events_data["Page viewed"]
df_pages['time'] = pd.to_datetime(df_pages['time'])
df_pages['year_month'] = df_pages['time'].dt.tz_localize(None).dt.to_period('M')
month_map = {1: 'jan', 2: 'fév', 3: 'mar', 4: 'avr', 5: 'mai', 6: 'jun',7: 'jui', 8: 'aoû', 9: 'sep', 10: 'oct', 11: 'nov', 12: 'déc'}
df_pages['month'] = df_pages['year_month'].dt.month.map(month_map) + " " + (df_pages['year_month'].dt.year % 100).astype(str)
df_pages['page'] = df_pages['url'].apply(lambda x: '/listen' if '/listen' in x else '/')
df_pages = df_pages.groupby(['month', 'page']).size().reset_index(name='count')
df_pages = df_pages.pivot(index='month', columns='page', values='count').fillna(0).reset_index()
result_pages = df_pages.to_dict(orient='records')

#### 2. Récupération des données Acast

Récupération des titres et des ids des épisodes du podcast

In [None]:

# Récupération du fichier XML
response = requests.get("https://feeds.acast.com/public/shows/aftercinema")
root = ET.fromstring(response.content)

# Espaces de noms XML à définir, si nécessaire
namespaces = {'acast': 'https://schema.acast.com/1.0/'}

# Fonction pour convertir la date au format 'YYYY-MM-DD'
def convert_date(date_str):
    try:
        # Parser la date au format RFC 822
        dt = datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S %Z')
        # Reformater la date au format 'YYYY-MM-DD'
        return dt.strftime('%Y-%m-%d')
    except ValueError:
        return date_str  # Retourne la date d'origine si le format est incorrect

# Récupération des ids, titres et dates de publication
podcast_episodes = []

for item in root.findall('.//item'):
    episode_id = item.find('acast:episodeId', namespaces).text if item.find('acast:episodeId', namespaces) is not None else ''
    title = item.find('title').text if item.find('title') is not None else ''
    pub_date = item.find('pubDate').text if item.find('pubDate') is not None else ''
    formatted_date = convert_date(pub_date)  # Conversion de la date
    
    podcast_episodes.append({
        "id": episode_id,
        "title": title,
        "publishedDate": formatted_date
    })

In [None]:
acast_token = os.getenv("ACAST_TOKEN")
headers = {"Authorization": "Bearer " + acast_token}
url = "https://insights-api.acast.com/api/v2/charts/downloads/65d49906c4c0ce0016eadf8c/episode/"
to_param = datetime.now()-timedelta(days=1)
params = {
    "from": "2024-02-19T23:00:00.000Z",
    "to": to_param,
    "interval": "day",
    "timeZone": "Europe/Paris"
}
full_df = pd.DataFrame()
for episode in podcast_episodes:
    response = requests.get(url+episode["id"], params=params,headers=headers)
    data = response.json()
    df = pd.DataFrame(data)
    df.rename(columns={'label': 'date'}, inplace=True)
    df['date'] = df['date'].str.split('T').str[0]
    df['title'] = episode["title"]
    full_df = pd.concat([full_df, df])

grouped = full_df.groupby('date').agg({'value': 'sum'}).reset_index()
grouped['title'] = 'Tous les épisodes'
full_df = pd.concat([full_df, grouped], ignore_index=True)

df_pivot = full_df.pivot(index='date', columns='title', values='value').reset_index()
result_downloads = df_pivot.to_dict(orient='records')

#### Stockage du résultat

In [None]:
date = datetime.now().strftime("%Y-%m-%d %H:%M")

def convert_data(data):
    return json.dumps(data, ensure_ascii=False)

data_to_insert = [
    {
        "data_name":"PostHog - Page viewed",
        "data":result_pages,
        "date": date
    },
    {
        "data_name":"PostHog - Platform button clicked",
        "data":result_platforms,
        "date": date
    },
    {
        "data_name":"Acast - Downloads",
        "data":result_downloads,
        "date": date
    }
]

engine = create_engine(os.getenv("POSTGRESQL_CONN_STRING"))

metadata = MetaData()
table = Table('stats_data', metadata,
              Column('data_name', String),
              Column('data', JSONB),
              Column('date', String))

metadata.create_all(engine)

for line in data_to_insert:
    with engine.connect() as connection:
        with connection.begin() as transaction:
            connection.execute(table.insert().values(data_name=line["data_name"],data=line["data"],date=line["date"]))
            transaction.commit()