# COP27
> Ce notebook sert de marche à suivre pour analyser les données d'une journée de COP

In [61]:
import pandas as pd
import numpy as np
import os
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import datetime

import sys
sys.path.append("../../")

%load_ext autoreload
%autoreload 2

from quotaclimat.utils.plotly_theme import *
from quotaclimat.data_analytics.exploration import filter_data_between_hours
from quotaclimat.utils.channels import TOP_25_CHANNELS,TOP_CHANNELS_TV,TOP_CHANNELS_TV_8
from quotaclimat.data_analytics.exploration import show_mentions_by_channel
from quotaclimat.data_analytics.exploration import show_mentions_by_time_of_the_day
from quotaclimat.data_analytics.exploration import show_mentions_over_time
from quotaclimat.data_analytics.exploration import show_mentions_treemap
from quotaclimat.data_analytics.exploration import show_piechart_split_tv_radio

from quotaclimat.data_processing.read_format_deduplicate import read_and_format_one
from quotaclimat.data_processing.read_format_deduplicate import read_and_format_all_data_dump
from quotaclimat.data_processing.read_format_deduplicate import deduplicate_extracts
from tqdm.auto import tqdm

def make_ws_palette(n = 10):
    return [f"rgb({int(x*255)},{int(y*255)},{int(z*255)})" for x,y,z in list(sns.color_palette("RdBu_r",n_colors=n))]


from quotaclimat.utils.plotly_theme import WARMING_STRIPES_SEQUENCE

COLOR_RADIO = WARMING_STRIPES_SEQUENCE[0]
COLOR_TV= WARMING_STRIPES_SEQUENCE[1]
COLOR_ECO = WARMING_STRIPES_SEQUENCE[3]


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [62]:
%%html
<style type="text/css">
@import url('http://fonts.googleapis.com/css?family=Poppins');
</style>

# Récupération et préparation des données

## Récupérer et préparer les données de la journée
Choisissez bien le bon fichier

## Filtrer sur les 50 chaînes TV et Radio avec le plus d'audience

In [63]:
top_audiences = pd.read_excel("../../data/channels.xlsx",sheet_name = "top_audiences")
top_audiences.loc[top_audiences.channel_name == 'Sud Radio', 'channel_name'] = 'Sud Radio Paris'
top_audiences["channel_id"] = top_audiences["channel_name"] + "_" + top_audiences["media"]
top_channels_tv = top_audiences.query("media=='TV'")["channel_name"].tolist()
top_channels_radio = top_audiences.query("media=='Radio'")["channel_name"].tolist()

top_channels_tv_gen = top_audiences.query("type=='Généraliste'")["channel_name"].tolist()
top_channels_tv_info = top_audiences.query("type=='Information en continu'")["channel_name"].tolist()

In [64]:

data_all_days = []
for folder in tqdm(["05","06","07","08","09", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20"]):
    
    day = f"{folder}11"

    data_total = read_and_format_all_data_dump(path_folder = f"../../data/COP27/{day}/",path_channel_metadata=None)

    data_total["channel_id"] = data_total["channel_name"] + "_" + data_total["media"]
    data_total = data_total.merge(top_audiences[["channel_id","type"]],on = ["channel_id"],how = "inner")

    data_total.loc[data_total["media"]=="TV","media2"] = "TV" + " - " + data_total["type"]
    data_total.loc[data_total["media"]=="Radio","media2"] = "Radio"

    data_total = filter_data_between_hours(data_total,"06:00","24:00").reset_index(drop = True)
    #data_total.loc[data_total["keywords"].map(lambda x : "cop27" in x),"keyword"] = "COP27"

    data_total = data_total[data_total["keyword"].isin(['COP27', 'ecologie'])]
    assert sorted(data_total["keyword"].unique()) == ["COP27","ecologie"]
    
    
    data_total["day_extract"] = day
    data_all_days.append(data_total)
    
data_all_days = pd.concat(data_all_days,axis = 0,ignore_index = True)

data_all_days["day_dt"] = data_all_days["date"].dt.date
data_all_days["day_str"] = data_all_days["day_dt"].map(str)

  0%|          | 0/16 [00:00<?, ?it/s]

In [8]:
data_cop27 = data_all_days[data_all_days.keyword == 'COP27'].copy()


In [9]:
(data_cop27.date.max() - data_cop27.date.min())

Timedelta('15 days 15:40:00')

# 1. Analyse sur le total

In [10]:
# Multiplier = n_mentions * 2 min / (n_channels * 60 minutes * 18h * n_days)
n_days = (data_cop27['day_dt'].max() - data_cop27['day_dt'].min()).days + 1
n_channels = 25 #TV and Radio


media_time = data_cop27.groupby(["media2"]).agg({"count":"sum","channel_name":"nunique"})
media_time.loc["Radio","n_channels"] = 25
media_time.loc["TV - Généraliste","n_channels"] = 19
media_time.loc["TV - Information en continu","n_channels"] = 6
media_time = media_time.append(pd.DataFrame(media_time.sum(axis = 0).rename("Total")).T)
media_time["media_time"] = media_time["count"] * 2
media_time["total_time"] = media_time["n_channels"] * n_days * 18 * 60
media_time["media_part"] = media_time["media_time"] / media_time["total_time"]

media_time

  media_time = media_time.append(pd.DataFrame(media_time.sum(axis = 0).rename("Total")).T)


Unnamed: 0,count,channel_name,n_channels,media_time,total_time,media_part
Radio,3589.0,22.0,25.0,7178.0,432000.0,0.016616
TV - Généraliste,277.0,9.0,19.0,554.0,328320.0,0.001687
TV - Information en continu,2231.0,6.0,6.0,4462.0,103680.0,0.043036
Total,6097.0,37.0,50.0,12194.0,864000.0,0.014113


In [11]:
fig = px.bar(media_time.reset_index(),x = "index",y = "media_part",height = 400,text_auto = ".1%")
fig.update_layout(yaxis_tickformat='0%',
                  title = "Volume médiatique total sur les 50 chaînes TV et Radio cop 27",
                  font_family="Poppins",yaxis_title="% du volume médiatique",xaxis_title = "")
fig.update_traces(marker_color=[COLOR_RADIO,COLOR_TV,COLOR_TV])
fig

## Classements

In [12]:
# Multiplier for one channel = n_mentions * 2 min / (60 minutes * 18h * n_days)
multiplier = 2 / (1 * 60 * 18 * n_days)

fig = show_mentions_by_channel(data_cop27,list_of_channels=top_channels_tv_info,n = 25,split="keyword",
                               method = multiplier,height = 400,text_auto = ".1%",
                                )
fig.update_layout(yaxis_tickformat='0%',font_family="Poppins",yaxis_title="% du volume médiatique",legend_title = "",title = "Classement TV - Chaînes d'information en continu")
fig.update_traces(marker_color=COLOR_TV)
fig.show()

multiplier = 2 / (1 * 60 * 18 * n_days)


fig = show_mentions_by_channel(data_cop27,list_of_channels=top_channels_tv_gen,n = 25,split="keyword",
                               method = multiplier,height = 400,text_auto = ".1%",
                               )
fig.update_layout(yaxis_tickformat='0%',font_family="Poppins",yaxis_title="% du volume médiatique",legend_title = "",title = "Classement TV -  Chaînes généralistes")
fig.update_traces(marker_color=COLOR_TV)
fig.show()



fig = show_mentions_by_channel(data_cop27,list_of_channels=top_channels_radio,n = 25,split = "keyword",
                               method = multiplier,height = 400,text_auto = ".1%",
                              )
fig.update_layout(yaxis_tickformat='0%',font_family="Poppins",yaxis_title="% du volume médiatique",legend_title = "",title = "Classement Radio")
fig.update_traces(marker_color=COLOR_RADIO)
fig.show()

## JT chaine généralistes

In [59]:
multiplier = 2 / (1 * 60 * 3 * n_days)
data_cop27_jt = data_cop27[(data_cop27.date.dt.hour >= 19) & (data_cop27.date.dt.hour < 22)]

fig = show_mentions_by_channel(data_cop27_jt,list_of_channels=top_channels_tv_gen,n = 25,split="keyword",
                               method = multiplier,height = 400,text_auto = ".1%",
                               )
fig.update_layout(yaxis_tickformat='0%',font_family="Poppins",yaxis_title="% du volume médiatique",legend_title = "",title = "Classement TV -  Chaînes généralistes")
fig.update_traces(marker_color=COLOR_TV)
fig.show()

## Autre sujet écologiques

In [65]:
data_total = data_all_days[data_all_days.keyword.isin(['COP27', 'ecologie'])].copy()

media_time = data_total.groupby(["media2"]).agg({"count":"sum","channel_name":"nunique"})
media_time.loc["Radio","n_channels"] = 25
media_time.loc["TV - Généraliste","n_channels"] = 19
media_time.loc["TV - Information en continu","n_channels"] = 6
media_time = media_time.append(pd.DataFrame(media_time.sum(axis = 0).rename("Total")).T)
media_time["media_time"] = media_time["count"] * 2
media_time["total_time"] = media_time["n_channels"] * n_days * 18 * 60
media_time["media_part"] = media_time["media_time"] / media_time["total_time"]

fig = px.bar(media_time.reset_index(),x = "index",y = "media_part",height = 400,text_auto = ".1%")
fig.update_layout(yaxis_tickformat='0%',
                  title = "Volume médiatique total sur les 50 chaînes TV et Radio",
                  font_family="Poppins",yaxis_title="% du volume médiatique",xaxis_title = "")
fig.update_traces(marker_color=[COLOR_RADIO,COLOR_TV,COLOR_TV])
fig.show()


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



In [14]:
DISCRETE_MAP = {
    "COP27":WARMING_STRIPES_SEQUENCE[2],
    "écologie":WARMING_STRIPES_SEQUENCE[0],
}

# Multiplier for one channel = n_mentions * 2 min / (60 minutes * 18h * n_days)
multiplier = 2 / (1 * 60 * 18 * n_days)

fig = show_mentions_by_channel(data_total,list_of_channels=top_channels_tv_info,n = 25,split="keyword",
                               method = multiplier,height = 500,text_auto = ".1%",
                               color_discrete_map = DISCRETE_MAP,
                              )
fig.update_layout(yaxis_tickformat='0%',font_family="Poppins",yaxis_title="% du volume médiatique",
                  title = "Classement TV - Chaînes d'information en continu",legend_title = "")
# fig.update_traces(marker_color='#e6381b')
fig.show()


fig = show_mentions_by_channel(data_total,list_of_channels=top_channels_tv_gen,n = 25,split="keyword",
                               method = multiplier,height = 500,text_auto = ".1%",
                               color_discrete_map = DISCRETE_MAP,
                              )
fig.update_layout(yaxis_tickformat='0%',font_family="Poppins",yaxis_title="% du volume médiatique",
                  title = "Classement TV - Chaînes généralistes",legend_title = "")
# fig.update_traces(marker_color='#e6381b')
fig.show()


fig = show_mentions_by_channel(data_total,list_of_channels=top_channels_radio,n = 25,split = "keyword",
                               method = multiplier,height = 500,text_auto = ".1%",
                               color_discrete_map = DISCRETE_MAP
                              )
fig.update_layout(yaxis_tickformat='0%',font_family="Poppins",yaxis_title="% du volume médiatique",title = "Classement Radio",legend_title = "")
# fig.update_traces(marker_color='#f49182')
fig.show()

In [71]:
multiplier = 2 / (1 * 60 * 1 * n_days)
data_total_jt = data_total[(data_total.date.dt.hour >= 20) & (data_total.date.dt.hour < 21)]

fig = show_mentions_by_channel(data_total_jt,list_of_channels=top_channels_tv_gen,n = 25,split="keyword",
                               method = multiplier,height = 400,text_auto = ".1%",color_discrete_map = DISCRETE_MAP
                               )
fig.update_layout(yaxis_tickformat='0%',font_family="Poppins",yaxis_title="% du volume médiatique",legend_title = "",title = "Classement TV -  Chaînes généralistes entre 20h et 21h")
#fig.update_traces(marker_color=COLOR_TV)
fig.show()

In [72]:
# Emission agroforestrie France 2

In [75]:
data_total.columns

Index(['index', 'channel', 'radio', 'text', 'highlight', 'url', 'date', 'time',
       'time_of_the_day', 'media', 'path_file', 'count', 'duration', 'keyword',
       'channel_name', 'channel_id', 'type', 'media2', 'day_extract', 'day_dt',
       'day_str'],
      dtype='object')

In [81]:
data_total.time_of_the_day

0       0 days 18:12:00
1       0 days 17:48:00
2       0 days 18:14:00
3       0 days 18:44:00
4       0 days 17:58:00
              ...      
19011   0 days 09:20:00
19012   0 days 09:32:00
19013   0 days 09:12:00
19014   0 days 07:00:00
19015   0 days 07:00:00
Name: time_of_the_day, Length: 19016, dtype: timedelta64[ns]

In [84]:
(15 + 7 + 6)*2

56

In [82]:
data_total['hour'] = data_total.date.dt.hour
data_total[(data_total.channel_name == 'France 2' ) & (data_total.day_str == '2022-11-08' )].groupby('hour').count().plot(backend='plotly')


In [78]:
data_total[data_total.channel_name == 'France 2' ].groupby('day_str').count().plot(backend='plotly')


# Focus evolution over time

In [15]:
method = "first" #average or first
ranking = (data_all_days
           .groupby(["channel_name","media","media2","keyword","day_dt"])["count"].sum()
           .unstack("day_dt").fillna(0.0).stack()
           .unstack("keyword").fillna(0.0).stack()
#            .unstack("channel_name").fillna(0.0).stack()
           .reset_index(drop = False)
           .rename(columns = {0:"count"})
          )

ranking["minutes"] = ranking["count"] * 2
ranking["total_time"] = 18 * 60 * n_days
ranking["media_part"] = ranking["minutes"] / ranking["total_time"]

ranking["rank"] = ranking.groupby(["media2","day_dt","keyword"])["count"].transform("rank",ascending = False,method = method)
ranking["count_total"] = ranking.groupby(["channel_name","media2","day_dt"])["count"].transform("sum")
ranking["rank_total"] = ranking.groupby(["media2","day_dt","keyword"])["count_total"].transform("rank",ascending = False,method = method)
ranking["media_part_total"] = ranking["count_total"] * 2 / ranking["total_time"]
ranking["day_str"] = ranking["day_dt"].map(str)
ranking.query("media2=='TV - Généraliste' and keyword=='COP27'").head()

Unnamed: 0,channel_name,media,media2,day_dt,keyword,count,minutes,total_time,media_part,rank,count_total,rank_total,media_part_total,day_str
64,C8,TV,TV - Généraliste,2022-11-05,COP27,2.0,4.0,17280,0.000231,3.0,10.0,2.0,0.001157,2022-11-05
66,C8,TV,TV - Généraliste,2022-11-06,COP27,1.0,2.0,17280,0.000116,5.0,3.0,5.0,0.000347,2022-11-06
68,C8,TV,TV - Généraliste,2022-11-07,COP27,2.0,4.0,17280,0.000231,6.0,14.0,6.0,0.00162,2022-11-07
70,C8,TV,TV - Généraliste,2022-11-08,COP27,1.0,2.0,17280,0.000116,6.0,11.0,5.0,0.001273,2022-11-08
72,C8,TV,TV - Généraliste,2022-11-09,COP27,0.0,0.0,17280,0.0,6.0,5.0,7.0,0.000579,2022-11-09


In [16]:
import seaborn as sns
ranking_data_chart = ranking.query("media2=='TV - Généraliste' and keyword=='cop27'")
def show_ranking_chart(ranking_data,title = "",height = 500,total = False):
    
    rank_col = "rank" if not total else "rank_total"
    percent_col = "media_part" if not total else "media_part_total"
    
    
    annot_labels_data = (ranking_data
                         .loc[ranking_data["day_dt"]==ranking_data["day_dt"].max()]
                         .sort_values(rank_col,ascending = True)
                        )
    
    fig = px.line(
        ranking_data,
        x = "day_dt",
        y = rank_col,
        color = "channel_name",
        text = rank_col,
        markers = True,
        color_discrete_sequence = make_ws_palette(len(annot_labels_data)),
        category_orders = {"channel_name":annot_labels_data["channel_name"].tolist()}
    )

    # fig.update_traces(marker=dict(size=15),selector=dict(mode='markers'))
    fig.update_layout(
        xaxis_tickmode = "linear",yaxis_autorange = "reversed",
        xaxis_showgrid = False,yaxis_showgrid = False,
        xaxis_title = "Date de la COP27",
        yaxis_title = None,
        yaxis_showticklabels = False,
    )
    fig.update_traces(marker_size= 20,
                      marker = dict(line=dict(width=2)),textposition="middle center",
                      textfont_size=12,
                      textfont_color="white",
                     )



    annotations = []





    # Adding labels    
    for i,row in annot_labels_data.iterrows():
        annotations.append(dict(xref='paper', x=0.95, y=row[rank_col],
                                      xanchor='left', yanchor='middle',
                                      text=row["channel_name"] + " ("+ f'{row[percent_col]:.1%}'+")",
                                      font=dict(family='Poppins',
                                                size=12),
                                      showarrow=False))
    #     # labeling the right_side of the plot
    #     annotations.append(dict(xref='paper', x=0.95, y=y_trace[11],
    #                                   xanchor='left', yanchor='middle',
    #                                   text='{}%'.format(y_trace[11]),
    #                                   font=dict(family='Arial',
    #                                             size=16),


    fig.update_layout(
    #     width = 1000,
        margin_r = 200,
        height = height,
        annotations=annotations,
        showlegend = False,title = title,
        font_family = "Poppins"
                     )
    return fig





In [17]:

show_ranking_chart(
    ranking.query("media2=='TV - Généraliste' and keyword=='COP27'"),
    "Evolution du classement des chaînes TV généralistes sur la COP27",
    height = 600,
).show()

show_ranking_chart(
    ranking.query("media2=='TV - Généraliste' and keyword=='COP27'"),
    "Evolution du classement des chaînes TV généralistes sur la COP27 et l'écologie",
    height = 600,
    total = True
).show()

In [18]:
show_ranking_chart(
    ranking.query("media2=='TV - Information en continu' and keyword=='COP27'"),
    "Evolution du classement des chaînes TV d'info en continu sur la COP27",
    height = 400,
).show()

show_ranking_chart(
    ranking.query("media2=='TV - Information en continu' and keyword=='COP27'"),
    "Evolution du classement des chaînes TV d'info en continu sur la COP27 et l'écologie",
    height = 400,
    total = True,
).show()

In [19]:
show_ranking_chart(
    ranking.query("media2=='Radio' and keyword=='COP27'"),
    "Evolution du classement des chaînes Radio sur la COP27",
    height = 800
).show()

show_ranking_chart(
    ranking.query("media2=='Radio' and keyword=='COP27'"),
    "Evolution du classement des chaînes Radio sur la COP27 et l'écologie",
    height = 700,
    total = True,
).show()

## Evolution du volume médiatique

In [20]:
# Multiplier = n_mentions * 2 min / (n_channels * 60 minutes * 18h * n_days)
n_days = 1
multiplier = 2 / (50 * 18 * 60)

fig = show_mentions_over_time(
    data_all_days.query("keyword=='COP27'"),
    freq = "1D",
    text_auto = ".1%",
    method = multiplier,
    color_discrete_sequence = WARMING_STRIPES_SEQUENCE
)
# yaxis_tickformat='0%',
fig.update_layout(font_family="Poppins",
                  xaxis_tickmode = "linear",
                  xaxis_title = "Date de la COP27",
                  yaxis_title="% du volume médiatique",
                  title = "Evolution du volume médiatique sur la COP27",legend_title = "",yaxis_tickformat='0%',
                 )
fig.show()

# Multiplier = n_mentions * 2 min / (n_channels * 60 minutes * 18h * n_days)
n_days = 1
multiplier = 2 / (50 * 18 * 60)

fig = show_mentions_over_time(
    data_all_days.query("keyword=='COP27' or keyword=='ecologie'"),
    freq = "1D",
    text_auto = ".1%",
    method = multiplier,
    color_discrete_sequence = WARMING_STRIPES_SEQUENCE
)
# yaxis_tickformat='0%',
fig.update_layout(font_family="Poppins",
                  xaxis_tickmode = "linear",
                  xaxis_title = "Date de la COP27",
                  yaxis_title="% du volume médiatique",
                  title = "Evolution du volume médiatique sur la COP27 et l'écologie",legend_title = "",yaxis_tickformat='0%',
                 )
# fig.update_traces(marker_color=WARMING_STRIPES_SEQUENCE)
fig.show()

In [21]:
# Multiplier = n_mentions * 2 min / (n_channels * 60 minutes * 18h * n_days)
n_days = 1
multiplier = 2 / (50 * 18 * 60)

fig = show_mentions_over_time(
    data_all_days.query("keyword=='COP27'"),
    freq = "1D",
    split = "media2",
    #text_auto = ".1%",
    method = multiplier,
        kind='area',

    color_discrete_sequence = WARMING_STRIPES_SEQUENCE
)
# yaxis_tickformat='0%',
fig.update_layout(font_family="Poppins",
                  xaxis_tickmode = "linear",
                  xaxis_title = "Date de la COP27",
                  yaxis_title="% du volume médiatique",
                  title = "Evolution du volume médiatique couvrant la COP27",legend_title = "",yaxis_tickformat='0%',
                 )
fig.update_traces(textposition="top right")

fig.show()



In [22]:
# Multiplier = n_mentions * 2 min / (n_channels * 60 minutes * 18h * n_days)
n_days = 1
multiplier = 2 / (50 * 18 * 60)

fig = show_mentions_over_time(
    data_all_days,
    freq = "1D",
    split = "media2",
    #text_auto = ".1%",
    method = multiplier,
    color_discrete_sequence = WARMING_STRIPES_SEQUENCE,
    kind='area',
)
# yaxis_tickformat='0%',
fig.update_layout(font_family="Poppins",
                  xaxis_tickmode = "linear",
                  xaxis_title = "Date de la COP27",
                  yaxis_title="% du volume médiatique",
                  title = "Evolution du volume médiatique sur la COP27 et l'écologie",legend_title = "",yaxis_tickformat='0%',
                 )
# fig.update_traces(marker_color=WARMING_STRIPES_SEQUENCE)
fig.update_traces(textposition="top right")

fig.show()

In [23]:
# Multiplier = n_mentions * 2 min / (n_channels * 60 minutes * 18h * n_days)
n_days = 1
multiplier = 2 / (50 * 18 * 60)

fig = show_mentions_over_time(
    data_all_days,
    freq = "1D",
    split = "keyword",
    text_auto = ".1%",
    method = multiplier,
    color_discrete_sequence = WARMING_STRIPES_SEQUENCE
)
# yaxis_tickformat='0%',
fig.update_layout(font_family="Poppins",
                  xaxis_tickmode = "linear",
                  xaxis_title = "Date de la COP27",
                  yaxis_title="% du volume médiatique",
                  title = "Evolution du volume médiatique sur la COP27 et l'écologie",legend_title = "",yaxis_tickformat='0%',
                 )
fig.show()

In [24]:
# Multiplier = n_mentions * 2 min / (n_channels * 60 minutes * 18h * n_days)
n_days = 1
multiplier = 2 / (50 * 1 * 60)

fig = show_mentions_over_time(
    data_all_days,
    freq = "1H",
    split = "media",
    method = multiplier,
    color_discrete_sequence = WARMING_STRIPES_SEQUENCE
)
# yaxis_tickformat='0%',
fig.update_layout(font_family="Poppins",
                  xaxis_tickmode = "linear",
                  xaxis_title = "Date de la COP27",
                  yaxis_title="% du volume médiatique",
                  title = "Evolution du volume médiatique sur la COP27 et l'écologie",legend_title = "",yaxis_tickformat='0%',
                 )
fig.show()

## Evolution des volumes médiatiques par chaîne

In [25]:
ranking_chart_data = ranking.query("media2=='TV - Généraliste' and keyword=='COP27'")
rank_col = "media_part"

annot_labels_data = (ranking_chart_data
                     .loc[ ranking_chart_data["day_dt"].map(lambda x : x.day)==7]
                     .sort_values(rank_col,ascending = False)
                    )
px.colors.sequential.RdBu_r
fig = px.line(
    ranking_chart_data,
    x = "day_dt",
    y = rank_col,
    color = "channel_name",
    text = rank_col,
    markers = True,
#     color_discrete_map = "RdBu_r",
    color_discrete_sequence = make_ws_palette(len(annot_labels_data)),
    category_orders = {"channel_name":annot_labels_data["channel_name"].tolist()}
)

# fig.update_traces(marker=dict(size=15),selector=dict(mode='markers'))
fig.update_layout(
    yaxis_tickformat='0%',font_family="Poppins",yaxis_title="% du volume médiatique",
    title = "Evolution des volumes médiatiques par chaîne TV généralistes sur la COP27",
    xaxis_tickmode = "linear",
    xaxis_title = "Date de la COP27",
    legend_title = "",
)
fig.update_layout()
fig.update_traces(
#     marker_size= 40,
    texttemplate="%{y:.1%}",
#     marker = dict(line=dict(width=2)),
    textposition="top center",
    textfont_size=12,
#     textfont_color="white"
)

fig

In [26]:
ranking_chart_data = ranking.query("media2=='TV - Information en continu' and keyword=='COP27'")
rank_col = "media_part"

annot_labels_data = (ranking_chart_data
                     .loc[ ranking_chart_data["day_dt"].map(lambda x : x.day)==7]
                     .sort_values(rank_col,ascending = False)
                    )

fig = px.line(
    ranking_chart_data,
    x = "day_dt",
    y = rank_col,
    color = "channel_name",
    text = rank_col,
    markers = True,
    color_discrete_sequence = make_ws_palette(len(annot_labels_data)),
    category_orders = {"channel_name":annot_labels_data["channel_name"].tolist()}
)

# fig.update_traces(marker=dict(size=15),selector=dict(mode='markers'))
fig.update_layout(
    yaxis_tickformat='0%',font_family="Poppins",yaxis_title="% du volume médiatique",
    title = "Evolution des volumes médiatiques par chaîne TV d'infos en continu sur la COP27",
    xaxis_tickmode = "linear",
    xaxis_title = "Date de la COP27",
    legend_title = "",
)
fig.update_layout()
fig.update_traces(
#     marker_size= 40,
    texttemplate="%{y:.1%}",
#     marker = dict(line=dict(width=2)),
    textposition="top center",
    textfont_size=13,
#     textfont_color="white"
)

fig

In [27]:
ranking_chart_data = ranking.query("media2=='Radio' and keyword=='COP27'")
rank_col = "media_part"

annot_labels_data = (ranking_chart_data
                     .loc[ ranking_chart_data["day_dt"].map(lambda x : x.day)==7]
                     .sort_values(rank_col,ascending = False)
                    )

fig = px.line(
    ranking_chart_data,
    x = "day_dt",
    y = rank_col,
    color = "channel_name",
    text = rank_col,
    markers = True,
    color_discrete_sequence = make_ws_palette(len(annot_labels_data)),
    category_orders = {"channel_name":annot_labels_data["channel_name"].tolist()}
)

# fig.update_traces(marker=dict(size=15),selector=dict(mode='markers'))
fig.update_layout(
    yaxis_tickformat='0%',font_family="Poppins",yaxis_title="% du volume médiatique",
    title = "Evolution des volumes médiatiques par chaîne Radio sur la COP27",
    xaxis_tickmode = "linear",
    xaxis_title = "Date de la COP27",
    legend_title = "",
)
fig.update_layout()
fig.update_traces(
#     marker_size= 40,
    texttemplate="%{y:.1%}",
#     marker = dict(line=dict(width=2)),
    textposition="top center",
    textfont_size=12,
#     textfont_color="white"
)

In [28]:
n_days = (ranking.day_dt.max() - ranking.day_dt.min()).days + 1
DISCRETE_MAP = {
    "COP27":WARMING_STRIPES_SEQUENCE[2],
    "écologie":WARMING_STRIPES_SEQUENCE[0],
}

# Multiplier for one channel = n_mentions * 2 min / (60 minutes * 18h * n_days)
multiplier = 2 / (1 * 60 * 18 * n_days)
daat_to_plot = data_all_days.copy()
fig = show_mentions_by_channel(daat_to_plot,list_of_channels=top_channels_tv_info,n = 25,split="keyword",
                               method = multiplier,height = 500,text_auto = ".1%",
                               color_discrete_map = DISCRETE_MAP,
                              )
fig.update_layout(yaxis_tickformat='0%',font_family="Poppins",yaxis_title="% du volume médiatique",
                  title = "Classement TV - Chaînes d'information en continu: mentions semaine de la COP27",legend_title = "")
# fig.update_traces(marker_color='#e6381b')
fig.show()

# Multiplier for one channel = n_mentions * 2 min / (60 minutes * 18h * n_days)
multiplier = 2 / (1 * 60 * 18 * n_days)

fig = show_mentions_by_channel(daat_to_plot,list_of_channels=top_channels_tv_gen,n = 25,split="keyword",
                               method = multiplier,height = 500,text_auto = ".1%",
                               color_discrete_map = DISCRETE_MAP,
                              )
fig.update_layout(yaxis_tickformat='0%',font_family="Poppins",yaxis_title="% du volume médiatique",
                  title = "Classement TV - Chaînes généralistes: mentions semaine de la COP27",legend_title = "")
# fig.update_traces(marker_color='#e6381b')
fig.show()


# Multiplier for one channel = n_mentions * 2 min / (60 minutes * 18h * n_days)
multiplier = 2 / (1 * 60 * 18 * n_days)

fig = show_mentions_by_channel(daat_to_plot,list_of_channels=top_channels_radio,n = 25,split = "keyword",
                               method = multiplier,height = 500,text_auto = ".1%",
                               color_discrete_map = DISCRETE_MAP
                              )
fig.update_layout(yaxis_tickformat='0%',font_family="Poppins",yaxis_title="% du volume médiatique",title = "Classement Radio : mentions semaine de la COP27",legend_title = "")
# fig.update_traces(marker_color='#f49182')
fig.show()

In [29]:
n_days = 
fig = px.bar(ranking.query("media2=='TV - Généraliste' and keyword=='COP27'").groupby('channel_name').sum().sort_values(by='media_part_total')[['media_part_total']],y = "media_part_total",height = 400,text_auto = ".1%")
fig.update_layout(yaxis_tickformat='0%',
                  title = "Volume médiatique total sur TV - Généraliste pour COP27",
                  font_family="Poppins",yaxis_title="% du volume médiatique",xaxis_title = "")
fig.update_traces(marker_color=[COLOR_TV])
fig.show()

SyntaxError: invalid syntax (3135872940.py, line 1)


The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



Unnamed: 0_level_0,media_part_total
channel_name,Unnamed: 1_level_1
TFX,0.009259
RMC Découverte,0.011111
W9,0.027778
Canal+,0.042593
C8,0.092593
TF1,0.105556
M6,0.109259
TMC,0.140741
RMC Story,0.212963
France 2,0.440741


In [None]:
ranking.query("media2=='TV - ' and keyword=='COP27'").groupby('channel_name').sum().sort_values(by='media_part_total')[['media_part_total']]

In [None]:
ranking.query("media2=='TV - Information en continu' and keyword=='COP27'").groupby('channel_name').sum().sort_values(by='media_part_total')[['media_part_total']]


The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



Unnamed: 0_level_0,media_part_total
channel_name,Unnamed: 1_level_1
LCI,0.492593
LCP,0.544444
CNEWS,0.661111
BFMTV,0.866667
France Info:,2.703704
France 24,3.4


In [None]:
ranking.query("media2=='TV - Généraliste'").groupby('channel_name').sum().sort_values(by='media_part_total')[['media_part_total']]


The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



Unnamed: 0_level_0,media_part_total
channel_name,Unnamed: 1_level_1
TFX,0.018519
RMC Découverte,0.022222
W9,0.055556
Canal+,0.085185
C8,0.185185
TF1,0.211111
M6,0.218519
TMC,0.281481
RMC Story,0.425926
France 2,0.881481


# Est ce que la COP est un inibiteur au traitement des sujets ecoliguqe?
On compare la courverture sur la semaine de la cop à la couverture d'une autre semaine d'octobre

In [76]:
data_october = read_and_format_all_data_dump(path_folder = f"../../data/keywords/ecologie_keywords/",path_channel_metadata=None)

data_october["channel_id"] = data_october["channel_name"] + "_" + data_october["media"]
data_october = data_october.merge(top_audiences[["channel_id","type"]],on = ["channel_id"],how = "inner")
data_october.loc[data_october["media"]=="TV","media2"] = "TV" + " - " + data_october["type"]
data_october.loc[data_october["media"]=="Radio","media2"] = "Radio"

data_october = filter_data_between_hours(data_october,"06:00","24:00").reset_index(drop = True)

In [88]:
n_days = (data_october.date.max() - data_october.date.min()).days + 1

In [93]:
media_time_october = data_october.groupby(["media2"]).agg({"count":"sum","channel_name":"nunique"})
media_time_october.loc["Radio","n_channels"] = 25
media_time_october.loc["TV - Généraliste","n_channels"] = 19
media_time_october.loc["TV - Information en continu","n_channels"] = 6
media_time_october = media_time_october.append(pd.DataFrame(media_time_october.sum(axis = 0).rename("Total")).T)
media_time_october["media_time_october"] = media_time_october["count"] * 2
media_time_october["total_time"] = media_time_october["n_channels"] * n_days * 18 * 60
media_time_october["media_part"] = media_time_october["media_time_october"] / media_time_october["total_time"]

fig = px.bar(media_time_october.reset_index(),x = "index",y = "media_part",height = 400,text_auto = ".1%")
fig.update_layout(yaxis_tickformat='0%',
                  title = "Volume médiatique total sur les 50 chaînes TV et Radio du 2/10 au 15/10",
                  font_family="Poppins",yaxis_title="% du volume médiatique",xaxis_title = "")
fig.update_traces(marker_color=[COLOR_RADIO,COLOR_TV,COLOR_TV])
fig.show()


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



In [99]:
media_time['Interval de temps'] = 'Semaines de la COP27'
media_time_october['Interval de temps'] = '1er quinzaine d Octobre'

media_time_cop_and_october = pd.concat([media_time, media_time_october])

In [100]:
fig = px.bar(media_time_cop_and_october.reset_index(),x = "index",y = "media_part", color='Interval de temps', height = 400,text_auto = ".1%", barmode="group")
fig.update_layout(yaxis_tickformat='0%',
                  title = "Volume médiatique total sur les 50 chaînes TV et Radio",
                  font_family="Poppins",yaxis_title="% du volume médiatique",xaxis_title = "")
#fig.update_traces(marker_color=[COLOR_RADIO,COLOR_TV,COLOR_TV])
fig.show()

## Compare to COP 26

In [None]:
data_october = read_and_format_all_data_dump(path_folder = f"../../data/keywords/ecologie_keywords/",path_channel_metadata=None)

data_october["channel_id"] = data_october["channel_name"] + "_" + data_october["media"]
data_october = data_october.merge(top_audiences[["channel_id","type"]],on = ["channel_id"],how = "inner")
data_october.loc[data_october["media"]=="TV","media2"] = "TV" + " - " + data_october["type"]
data_october.loc[data_october["media"]=="Radio","media2"] = "Radio"

data_october = filter_data_between_hours(data_october,"06:00","24:00").reset_index(drop = True)

In [90]:
multiplier = 2 / (n_days* 50 * 18 * 60)
(data_october.groupby(['keyword'])['count'].sum() * multiplier * 100).sort_values()

keyword
ecologie    1.231217
Name: count, dtype: float64

# Comparison autre sujet

In [37]:
from tqdm.auto import tqdm

data_all_days = []
for folder in tqdm(["06","07","08","09", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20"]):
    
    day = f"{folder}11"

    data_total = read_and_format_all_data_dump(path_folder = f"../../data/COP27/{day}/",path_channel_metadata=None)

    data_total["channel_id"] = data_total["channel_name"] + "_" + data_total["media"]
    data_total = data_total.merge(top_audiences[["channel_id","type"]],on = ["channel_id"],how = "inner")

    data_total.loc[data_total["media"]=="TV","media2"] = "TV" + " - " + data_total["type"]
    data_total.loc[data_total["media"]=="Radio","media2"] = "Radio"

    data_total = filter_data_between_hours(data_total,"06:00","24:00").reset_index(drop = True)
    #data_total = data_total[data_total["keyword"].isin(['COP27', 'ecologie'])]
    #assert sorted(data_total["keyword"].unique()) == ["COP27","ecologie"]
    
    #data_total.loc[data_total["keywords"].map(lambda x : "cop27" in x), "keyword"] = "COP27"
    
    data_total["day_extract"] = day
    data_all_days.append(data_total)
    
data_all_days = pd.concat(data_all_days, axis = 0,ignore_index = True)

data_all_days["day_dt"] = data_all_days["date"].dt.date
data_all_days["day_str"] = data_all_days["day_dt"].map(str)
method = "first" #average or first


  0%|          | 0/15 [00:00<?, ?it/s]

In [44]:
data_all_days.shape

(54519, 21)

In [46]:
data_all_days[data_all_days.keyword != 'oceanviking'].to_csv('df_aggregated_multiple_kewords.csv')

In [41]:
n_days = (data_all_days.date.max() - data_all_days.date.min()).days + 1

In [42]:
multiplier = 2 / ((n_days)* 50 * 18 * 60)
(data_all_days.groupby(['keyword'])['count'].sum() * multiplier * 100).sort_values()

keyword
oceanviking    0.272222
xvdefrance     0.502546
routedurhum    0.539583
COP27          1.350926
qatar          2.243519
migrants       2.375694
midterm        2.471759
ecologie       2.863889
Name: count, dtype: float64

In [None]:
data_all_days.date.min()

Timestamp('2022-11-05 23:14:00')

In [None]:
data_all_days.day_str.unique()

array(['2022-11-06', '2022-11-05', '2022-11-07', '2022-11-08',
       '2022-11-09', '2022-11-10', '2022-11-11', '2022-11-12'],
      dtype=object)

In [39]:
# Multiplier = n_mentions * 2 min / (n_channels * 60 minutes * 18h * n_days)
n_days = 1
multiplier = 2 / (50 * 18 * 60)

fig = show_mentions_over_time(
    data_all_days[data_all_days.date >= '2022-11-06'],
    freq = "1D",
    split = "keyword",
    text_auto = ".1%",
    method = multiplier,
    color_discrete_sequence = WARMING_STRIPES_SEQUENCE
)
# yaxis_tickformat='0%',
fig.update_layout(font_family="Poppins",
                  xaxis_tickmode = "linear",
                  xaxis_title = "Date de la COP27",
                  yaxis_title="% du volume médiatique",
                  title = "Evolution du volume médiatique sur la COP27 de différent sujet",legend_title = "",yaxis_tickformat='0%',
                 )
fig.show()

In [None]:
df_count.to_csv('percentage_coverage_mutliple_keywords_per_media_categorie_over_time.csv')

In [None]:
ranking_tv_continue_all_kw = ranking.query("media2=='TV - Information en continu' and day_str > '2022-11-05'")


In [None]:
data_all_days.groupby('keyword').day_str.min()

keyword
COP27          2022-11-05
écologie      2022-11-09
midterm        2022-11-05
oceanviking    2022-11-07
qatar          2022-11-06
routedurhum    2022-11-06
écologie       2022-11-05
Name: day_str, dtype: object

In [None]:
#ranking.keyword.unique()


In [None]:
ranking_tv_continue_all_kw = ranking.query("media2=='Radio' and day_str > '2022-11-05'")
minute_covered_radio = ranking_tv_continue_all_kw.groupby('keyword').minutes.sum()

ranking_tv_continue_all_kw = ranking.query("media2=='TV - Généraliste' and day_str > '2022-11-05'")
minute_covered_tv_generalist = ranking_tv_continue_all_kw.groupby('keyword').minutes.sum()

ranking_tv_continue_all_kw = ranking.query("media2=='TV - Information en continu' and day_str > '2022-11-05'")
minute_covered_tv_info = ranking_tv_continue_all_kw.groupby('keyword').minutes.sum().sort_values()



all_minutes_df = pd.DataFrame([minute_covered_tv_info, minute_covered_tv_generalist, minute_covered_radio])
all_minutes_df.index = ['minute_covered_tv_info', 'minute_covered_tv_generalist', 'minute_covered_radio' ]

In [None]:
all_minutes_df.columns

Index(['xvdefrance', 'oceanviking', 'routedurhum', 'qatar', 'ecologie',
       'COP27', 'migrants', 'midterm'],
      dtype='object', name='keyword')

In [None]:
all_minutes_df_metled = pd.melt(all_minutes_df.reset_index(), id_vars='index')
all_minutes_df_metled.columns = ['Media', 'keyword', 'minutes']

In [None]:
all_minutes_df.T
fig = px.bar(
        all_minutes_df_metled,
        x='keyword',
        y="minutes",
        color='Media'

    )

fig.update_xaxes(tickangle=-45, title=None)
fig.update_yaxes(title='Nombre de citation')
fig.update_layout(margin={"b": 100}, title='Nombre de citation de différent mot cléf par media sur la premiere semaine de la COP')
