### Récupération des données de statistiques d'Aftercinema

#### Vérification du bon fonctionnement de l'environnement + installations + imports

In [None]:
print("hello")

In [None]:
%pip install requests
%pip install pandas
%pip install sqlalchemy
%pip install psycopg2

In [None]:
import os
import json
import requests
import pandas as pd
from sqlalchemy import create_engine, Table, Column, String, MetaData
from datetime import datetime,timedelta
import xml.etree.ElementTree as ET
from sqlalchemy.dialects.postgresql import JSONB

#### 1.Récupération des données PostHog

In [None]:
token = os.getenv("POSTHOG_API_KEY")

headers = {"Authorization": "Bearer " + token}

with open("./POSTHOG_QUERIES.json", 'r') as fichier:
    queries = json.load(fichier)

events_data = {}

for event in ["Page viewed","Platform button clicked"]:
    data = {
        "query": {
            "kind": "HogQLQuery",
            "query": queries[event]
        }
    }

    request = requests.post("https://eu.posthog.com/api/projects/20861/query",headers=headers,json=data)

    data = request.json()

    df = pd.DataFrame(data["results"])
    df.columns = data["columns"]

    events_data[event] = df

Traitement des données pour le graphique PostHog - plateformes

In [None]:
df_platforms = events_data["Platform button clicked"]
df_platforms = df_platforms.groupby('platform').size().reset_index(name='count')
result_platforms = df_platforms.to_dict(orient='records')

Traitement des données pour le graphique PostHog - pages

In [None]:
df_pages = events_data["Page viewed"]
df_pages['time'] = pd.to_datetime(df_pages['time'])
df_pages['year_month'] = df_pages['time'].dt.tz_localize(None).dt.to_period('M')
month_map = {1: 'jan', 2: 'fév', 3: 'mar', 4: 'avr', 5: 'mai', 6: 'jun',7: 'jui', 8: 'aoû', 9: 'sep', 10: 'oct', 11: 'nov', 12: 'déc'}
df_pages['month'] = df_pages['year_month'].dt.month.map(month_map) + " " + (df_pages['year_month'].dt.year % 100).astype(str)
df_pages['page'] = df_pages['url'].apply(lambda x: '/listen' if '/listen' in x else '/')
df_pages = df_pages.groupby(['month', 'page']).size().reset_index(name='count')
df_pages = df_pages.pivot(index='month', columns='page', values='count').fillna(0).reset_index()
result_pages = df_pages.to_dict(orient='records')

#### 2. Récupération des données Acast

Récupération des titres et des ids des épisodes du podcast

In [None]:

response = requests.get("https://feeds.acast.com/public/shows/aftercinema")
root = ET.fromstring(response.content)
namespaces = {'acast': 'https://schema.acast.com/1.0/'}
podcast_episodes = []

for item in root.findall('.//item'):
    episode_id = item.find('acast:episodeId', namespaces).text
    title = item.find('title').text
    pub_date = item.find('pubDate').text
    
    podcast_episodes.append({
        "id": episode_id,
        "title": title,
        "publishedDate": datetime.strptime(pub_date, '%a, %d %b %Y %H:%M:%S %Z').strftime('%Y-%m-%d')
    })

In [None]:
acast_token = os.getenv("ACAST_TOKEN")
headers = {"Authorization": "Bearer " + acast_token}
to_param = datetime.now()-timedelta(days=1)
params = {
    "from": "2024-02-19T23:00:00.000Z",
    "to": to_param,
    "interval": "day",
    "timeZone": "Europe/Paris"
}

Récupération des données de téléchargements

In [None]:
url = "https://insights-api.acast.com/api/v2/charts/downloads/65d49906c4c0ce0016eadf8c/episode/"

full_df = pd.DataFrame()
for episode in podcast_episodes:
    response = requests.get(url+episode["id"], params=params,headers=headers)
    data = response.json()
    df = pd.DataFrame(data)
    df.rename(columns={'label': 'date'}, inplace=True)
    df['date'] = df['date'].str.split('T').str[0]
    df['title'] = episode["title"]
    full_df = pd.concat([full_df, df])

grouped = full_df.groupby('date').agg({'value': 'sum'}).reset_index()
grouped['title'] = 'Tous les épisodes'
full_df = pd.concat([full_df, grouped], ignore_index=True)

df_pivot = full_df.pivot(index='date', columns='title', values='value').reset_index()
result_downloads = df_pivot.to_dict(orient='records')

Récupération des données des auditeurs

In [None]:
url = "https://insights-api.acast.com/api/v2/shows/65d49906c4c0ce0016eadf8c/reach/histogram/episode/"

full_df = pd.DataFrame()
for episode in podcast_episodes:
    response = requests.get(url+episode["id"], params=params,headers=headers)
    data = response.json()
    df = pd.DataFrame(data)
    df.rename(columns={'label': 'date'}, inplace=True)
    df['date'] = df['date'].str.split('T').str[0]
    df['title'] = episode["title"]
    full_df = pd.concat([full_df, df])

grouped = full_df.groupby('date').agg({'value': 'sum'}).reset_index()
grouped['title'] = 'Tous les épisodes'
full_df = pd.concat([full_df, grouped], ignore_index=True)

df_pivot = full_df.pivot(index='date', columns='title', values='value').reset_index()
result_listeners = df_pivot.to_dict(orient='records')

Récupération des données des plateformes

In [None]:
url = "https://insights-api.acast.com/api/v2/shows/65d49906c4c0ce0016eadf8c/clients/histogram?clients=Spotify,Deezer,Apple+Podcasts,Other,Chrome,Safari,Acast+embed-player,Firefox,iVoox,CastBox,Podcast+Addict"
platform_params = params
platform_params["interval"] = "month"
response = requests.get(url, params=platform_params,headers=headers)

result_platforms = []
autres_total_value = 0
specific_platforms = ['Spotify', 'Deezer', 'Apple Podcasts']

for platform in response.json():
    platform_name = platform['name']
    total_value = sum(item['value'] for item in platform['values'])
    
    if platform_name in specific_platforms:
        result_platforms.append({"platform": platform_name, "value": total_value})
    else:
        autres_total_value += total_value

if autres_total_value > 0:
    result_platforms.append({"platform": "autres", "value": autres_total_value})

#### 3. Récupération des données YouTube

In [None]:
%pip install google-api-python-client
%pip install google-auth
%pip install google-auth-oauthlib
%pip install google-auth-httplib2

In [None]:
import os
import google.oauth2.credentials
import google_auth_oauthlib.flow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google_auth_oauthlib.flow import InstalledAppFlow

SCOPES = ['https://www.googleapis.com/auth/yt-analytics.readonly']

API_SERVICE_NAME = 'youtubeAnalytics'
API_VERSION = 'v2'
CLIENT_SECRETS_FILE = 'YOUR_CLIENT_SECRET_FILE.json'
def get_service():
  flow = InstalledAppFlow.from_client_secrets_file(CLIENT_SECRETS_FILE, SCOPES)
  credentials = flow.run_local_server()
  return build(API_SERVICE_NAME, API_VERSION, credentials = credentials)

def execute_api_request(client_library_function, **kwargs):
  response = client_library_function(
    **kwargs
  ).execute()

  print(response)
%env OAUTHLIB_INSECURE_TRANSPORT=1
if __name__ == '__main__':
  # Disable OAuthlib's HTTPs verification when running locally.
  # *DO NOT* leave this option enabled when running in production.
  os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1'

  youtubeAnalytics = get_service()
  execute_api_request(
      youtubeAnalytics.reports().query,
      ids='channel==MINE',
      startDate='2024-01-01',
      endDate='2024-12-31',
      metrics='views',
      dimensions='day',
      sort='day'
  )

In [None]:
from google.oauth2 import service_account
from googleapiclient.discovery import build

# Créer des informations d'identification pour le compte de service
credentials = service_account.Credentials.from_service_account_info(
    SERVICE_ACCOUNT_INFO,
    scopes=["https://www.googleapis.com/auth/youtube.readonly",
            "https://www.googleapis.com/auth/yt-analytics.readonly"]
)

# Construire le client YouTube API
youtube = build('youtube', 'v3', credentials=credentials)
youtube_analytics = build('youtubeAnalytics', 'v2', credentials=credentials)

request = youtube.playlistItems().list(
        part="snippet",
        playlistId="PLA75TyAwTPpmU0UuoPGXciOMcE9ipNLgT",
        maxResults=50
    ).execute()
request
# def get_video_views(video_id):
#     request = youtube_analytics.reports().query(
#         ids="channel==MINE",
#         startDate='2024-01-01',
#         endDate='2024-12-31',
#         metrics="views",
#         dimensions="day",
#         filters=f"video=={video_id}"
#     )
#     response = request.execute()
#     return response

# for video in request['items']:
#     stats = get_video_views(video['snippet']['resourceId']['videoId'])
#     print(stats)

In [None]:
request = youtube_analytics.reports().query(
        ids="channel==UCqyW0dbyHKY0i1H88128AFg",
        startDate='2024-01-01',
        endDate='2024-08-01',
        metrics="views",
        dimensions="day",
        sort="day"
    )
response = request.execute()
response

#### Stockage du résultat

In [None]:
date = datetime.now().strftime("%Y-%m-%d %H:%M")

def convert_data(data):
    return json.dumps(data, ensure_ascii=False)

data_to_insert = [
    {
        "data_name":"PostHog - Page viewed",
        "data":result_pages,
        "date": date
    },
    {
        "data_name":"PostHog - Platform button clicked",
        "data":result_platforms,
        "date": date
    },
    {
        "data_name":"Acast - Downloads",
        "data":result_downloads,
        "date": date
    },
    {
        "data_name":"Acast - Listeners",
        "data":result_listeners,
        "date": date
    },
    {
        "data_name":"Acast - Platforms",
        "data":result_platforms,
        "date": date
    }
]

engine = create_engine(os.getenv("POSTGRESQL_CONN_STRING"))

metadata = MetaData()
table = Table('stats_data', metadata,
              Column('data_name', String),
              Column('data', JSONB),
              Column('date', String))

metadata.create_all(engine)

for line in data_to_insert:
    with engine.connect() as connection:
        with connection.begin() as transaction:
            connection.execute(table.insert().values(data_name=line["data_name"],data=line["data"],date=line["date"]))
            transaction.commit()