# COP27
> Ce notebook sert de marche à suivre pour analyser les données d'une journée de COP

In [1]:
import pandas as pd
import numpy as np
import os
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import datetime

import sys
sys.path.append("../")

%load_ext autoreload
%autoreload 2

from quotaclimat.utils.plotly_theme import *

In [2]:
%%html
<style type="text/css">
@import url('http://fonts.googleapis.com/css?family=Poppins');
</style>

# Récupération et préparation des données

## Récupérer et préparer les données de la journée
Choisissez bien le bon fichier

In [5]:
from quotaclimat.data_processing.read_format_deduplicate import read_and_format_one
from quotaclimat.data_processing.read_format_deduplicate import read_and_format_all_data_dump
from quotaclimat.data_processing.read_format_deduplicate import deduplicate_extracts

In [6]:
os.listdir("../data/cop27/")

['20221106_20221105_20221105_all_COP27.xlsx',
 '20221106_20221105_20221105_all_écologie.xlsx']

In [7]:
path = "../data/cop27/20221106_20221105_20221105_all_COP27.xlsx"

In [10]:
data = read_and_format_one(path_file = path,path_channels=None)
data.shape

(567, 14)

## Filtrer sur les 50 chaînes TV et Radio avec le plus d'audience

In [12]:
top_audiences = pd.read_excel("../data/channels.xlsx",sheet_name = "top_audiences")
top_audiences["channel_id"] = top_audiences["channel_name"] + "_" + top_audiences["media"]
top_channels_tv = top_audiences.query("media=='TV'")["channel_name"].tolist()
top_channels_radio = top_audiences.query("media=='Radio'")["channel_name"].tolist()

In [13]:
data["channel_id"] = data["channel_name"] + "_" + data["media"]

In [14]:
data = data.merge(top_audiences[["channel_id"]],on = ["channel_id"],how = "inner")

In [15]:
data.shape

(350, 15)

##### Nombre de chaînes TV ou Radio dans l'échantillon
Vérifier combien de chaînes sur les 50 parlent du sujet 

In [16]:
data.drop_duplicates(subset = ["channel_name"]).groupby(["media"])["channel_name"].count()

media
Radio     8
TV       11
Name: channel_name, dtype: int64

## Filtrer dans les horaires d'antenne entre 6h et minuit

In [17]:
from quotaclimat.data_analytics.exploration import filter_data_between_hours

In [18]:
data = filter_data_between_hours(data,"06:00","24:00")

In [19]:
data.shape

(348, 15)

# Préparation des analyses

In [20]:
from quotaclimat.utils.channels import TOP_25_CHANNELS,TOP_CHANNELS_TV,TOP_CHANNELS_TV_8
from quotaclimat.data_analytics.exploration import show_mentions_by_channel
from quotaclimat.data_analytics.exploration import show_mentions_by_time_of_the_day
from quotaclimat.data_analytics.exploration import show_mentions_over_time
from quotaclimat.data_analytics.exploration import show_mentions_treemap
from quotaclimat.data_analytics.exploration import show_piechart_split_tv_radio

## Analyse 1 - volume médiatique total sur les 50 chaînes

In [70]:
# Multiplier = n_mentions * 2 min / (n_channels * 60 minutes * 18h * n_days)
n_days = 1
n_channels = 25 #TV and Radio

media_time = data.groupby(["media"]).agg({"count":"sum","channel_name":"nunique"})
media_time["n_channels"] = 25
media_time = media_time.append(pd.DataFrame(media_time.sum(axis = 0).rename("Total")).T)
media_time["media_time"] = media_time["count"] * 2
media_time["total_time"] = media_time["n_channels"] * n_days * 18 * 60
media_time["media_part"] = media_time["media_time"] / media_time["total_time"]

media_time

Unnamed: 0,count,channel_name,n_channels,media_time,total_time,media_part
Radio,181,8,25,362,27000,0.013407
TV,167,11,25,334,27000,0.01237
Total,348,19,50,696,54000,0.012889


In [71]:
fig = px.bar(media_time.drop("Total").reset_index(),x = "index",y = "media_part",height = 400,text_auto = ".1%")
fig.update_layout(yaxis_tickformat='0%',
                  title = "Volume médiatique total sur les 50 chaînes TV et Radio",
                  font_family="Poppins",yaxis_title="% du volume médiatique",xaxis_title = "")
fig.update_traces(marker_color=["#f49182",'#e6381b'])
fig

## Analyse 1bis - Volume médiatique total sur les top 10 chaînes TV et Radio

In [72]:
# Multiplier = n_mentions * 2 min / (n_channels * 60 minutes * 18h * n_days)
n_days = 1
n_channels = 25 #TV and Radio

data_top10 = data.loc[(data["channel_name"].isin(top_channels_tv[:10])) | (data["channel_name"].isin(top_channels_radio[:10]))]

media_time = data_top10.groupby(["media"]).agg({"count":"sum","channel_name":"nunique"})
media_time["n_channels"] = 10
media_time = media_time.append(pd.DataFrame(media_time.sum(axis = 0).rename("Total")).T)
media_time["media_time"] = media_time["count"] * 2
media_time["total_time"] = media_time["n_channels"] * n_days * 18 * 60
media_time["media_part"] = media_time["media_time"] / media_time["total_time"]

media_time

Unnamed: 0,count,channel_name,n_channels,media_time,total_time,media_part
Radio,164,6,10,328,10800,0.03037
TV,29,5,10,58,10800,0.00537
Total,193,11,20,386,21600,0.01787


In [73]:
fig = px.bar(media_time.drop("Total").reset_index(),x = "index",y = "media_part",height = 400,text_auto = ".1%")
fig.update_layout(yaxis_tickformat='0%',
                  title = "Volume médiatique total sur les 10 premières chaînes TV et Radio",
                  font_family="Poppins",yaxis_title="% du volume médiatique",xaxis_title = "")
fig.update_traces(marker_color=["#f49182",'#e6381b'])
fig

## Analyse 2 - TOP 3 TV et Radio

In [54]:
# Multiplier for one channel = n_mentions * 2 min / (60 minutes * 18h * n_days)
multiplier = 2 / (1 * 60 * 18 * 1)

fig = show_mentions_by_channel(data,list_of_channels=top_channels_tv,n = 3,
                               method = multiplier,height = 400,text_auto = ".1%")
fig.update_layout(yaxis_tickformat='0%',
                  title = "Podium TV",
                  font_family="Poppins",yaxis_title="% du volume médiatique",xaxis_title = "")
fig.update_traces(marker_color='#e6381b')
fig

In [57]:
# Multiplier for one channel = n_mentions * 2 min / (60 minutes * 18h * n_days)
multiplier = 2 / (1 * 60 * 18 * 1)

fig = show_mentions_by_channel(data,list_of_channels=top_channels_radio,n = 3,
                               method = multiplier,height = 400,text_auto = ".1%")

fig.update_layout(yaxis_tickformat='0%',
                  title = "Podium Radio",
                  font_family="Poppins",yaxis_title="% du volume médiatique",xaxis_title = "")
fig.update_traces(marker_color='#f49182')
fig

## Analyse 3 - Classement complet TV et Radio

In [56]:
# Multiplier for one channel = n_mentions * 2 min / (60 minutes * 18h * n_days)
multiplier = 2 / (1 * 60 * 18 * 1)

fig = show_mentions_by_channel(data,list_of_channels=top_channels_tv,n = 25,
                               title = "Classement des chaînes TV qui ont consacré un temps d'antenne à la COP26",
                               method = multiplier,height = 400,text_auto = ".1%")
fig.update_layout(yaxis_tickformat='0%',font_family="Poppins",yaxis_title="% du temps d'antenne sur la COP26")
fig.update_traces(marker_color='#e6381b')
fig.show()

# Multiplier for one channel = n_mentions * 2 min / (60 minutes * 18h * n_days)
multiplier = 2 / (1 * 60 * 18 * 1)

fig = show_mentions_by_channel(data,list_of_channels=top_channels_radio,n = 25,
                               color_discrete_sequence=["#E6381B"],
                               title="Classement des chaînes Radio qui ont consacré un temps d'antenne à la COP26",
                               method = multiplier,height = 400,text_auto = ".1%")
fig.update_layout(yaxis_tickformat='0%',font_family="Poppins",yaxis_title="% du temps d'antenne sur la COP26")
fig.update_traces(marker_color='#f49182')
fig.show()