# COP27
> Ce notebook sert de marche à suivre pour analyser les données d'une journée de COP

In [1]:
import pandas as pd
import numpy as np
import os
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import datetime

import sys
sys.path.append("../../")

%load_ext autoreload
%autoreload 2

from quotaclimat.utils.plotly_theme import *

In [2]:
%%html
<style type="text/css">
@import url('http://fonts.googleapis.com/css?family=Poppins');
</style>

# Récupération et préparation des données

## Récupérer et préparer les données de la journée
Choisissez bien le bon fichier

In [3]:
from quotaclimat.data_processing.read_format_deduplicate import read_and_format_one
from quotaclimat.data_processing.read_format_deduplicate import read_and_format_all_data_dump
from quotaclimat.data_processing.read_format_deduplicate import deduplicate_extracts

def make_ws_palette(n = 10):
    return [f"rgb({int(x*255)},{int(y*255)},{int(z*255)})" for x,y,z in list(sns.color_palette("RdBu_r",n_colors=n))]

In [4]:
day = "11"
folder = f"../../data/COP27/{day}11"
paths = [os.path.join(folder,x) for x in os.listdir(folder)]
paths

['../../data/COP27/1111/20221111_20221111_COP27.xlsx',
 '../../data/COP27/1111/20221111_20221111_écologie.xlsx']

In [5]:
path_cop27 = [x for x in paths if "_COP27" in x][0]
path_ecologie = [x for x in paths if "écologie" in x.lower()][0]

# IMPORTANT DE RESPECTER CE FORMAT POUR LES FICHIERS
assert "COP27" in path_cop27
assert 'écologie'  in path_ecologie

In [6]:
data = read_and_format_one(path_file = path_cop27,path_channels=None)
data.shape

(1122, 14)

In [7]:
data.keyword.unique()

array(['COP27'], dtype=object)

## Filtrer sur les 50 chaînes TV et Radio avec le plus d'audience

In [8]:
top_audiences = pd.read_excel("../../data/channels.xlsx",sheet_name = "top_audiences")
top_audiences["channel_id"] = top_audiences["channel_name"] + "_" + top_audiences["media"]
top_channels_tv = top_audiences.query("media=='TV'")["channel_name"].tolist()
top_channels_radio = top_audiences.query("media=='Radio'")["channel_name"].tolist()

top_channels_tv_gen = top_audiences.query("type=='Généraliste'")["channel_name"].tolist()
top_channels_tv_info = top_audiences.query("type=='Information en continu'")["channel_name"].tolist()

In [9]:
data["channel_id"] = data["channel_name"] + "_" + data["media"]
data = data.merge(top_audiences[["channel_id","type"]],on = ["channel_id"],how = "inner")

data.loc[data["media"]=="TV","media2"] = "TV" + " - " + data["type"]
data.loc[data["media"]=="Radio","media2"] = "Radio"

data.shape

(467, 17)

##### Nombre de chaînes TV ou Radio dans l'échantillon
Vérifier combien de chaînes sur les 50 parlent du sujet 

In [10]:
data.drop_duplicates(subset = ["channel_name"]).groupby(["media"])["channel_name"].count()

media
Radio    15
TV       11
Name: channel_name, dtype: int64

In [11]:
data.drop_duplicates(subset = ["channel_name"]).groupby(["media2"])["channel_name"].count()

media2
Radio                          15
TV - Généraliste                6
TV - Information en continu     5
Name: channel_name, dtype: int64

## Filtrer dans les horaires d'antenne entre 6h et minuit

In [12]:
from quotaclimat.data_analytics.exploration import filter_data_between_hours

In [13]:
data = filter_data_between_hours(data,"06:00","24:00")

In [14]:
data.shape

(440, 17)

# Préparation des analyses

In [15]:
from quotaclimat.utils.channels import TOP_25_CHANNELS,TOP_CHANNELS_TV,TOP_CHANNELS_TV_8
from quotaclimat.data_analytics.exploration import show_mentions_by_channel
from quotaclimat.data_analytics.exploration import show_mentions_by_time_of_the_day
from quotaclimat.data_analytics.exploration import show_mentions_over_time
from quotaclimat.data_analytics.exploration import show_mentions_treemap
from quotaclimat.data_analytics.exploration import show_piechart_split_tv_radio

In [16]:
from quotaclimat.utils.plotly_theme import WARMING_STRIPES_SEQUENCE

COLOR_RADIO = WARMING_STRIPES_SEQUENCE[0]
COLOR_TV= WARMING_STRIPES_SEQUENCE[1]
COLOR_ECO = WARMING_STRIPES_SEQUENCE[3]

## Analyse 1 - volume médiatique total sur les 50 chaînes

In [17]:
# Multiplier = n_mentions * 2 min / (n_channels * 60 minutes * 18h * n_days)
n_days = 1
n_channels = 25 #TV and Radio

media_time = data.groupby(["media"]).agg({"count":"sum","channel_name":"nunique"})
media_time["n_channels"] = 25
media_time = media_time.append(pd.DataFrame(media_time.sum(axis = 0).rename("Total")).T)
media_time["media_time"] = media_time["count"] * 2
media_time["total_time"] = media_time["n_channels"] * n_days * 18 * 60
media_time["media_part"] = media_time["media_time"] / media_time["total_time"]

media_time

  media_time = media_time.append(pd.DataFrame(media_time.sum(axis = 0).rename("Total")).T)


Unnamed: 0,count,channel_name,n_channels,media_time,total_time,media_part
Radio,274,15,25,548,27000,0.020296
TV,166,11,25,332,27000,0.012296
Total,440,26,50,880,54000,0.016296


In [18]:
fig = px.bar(media_time.reset_index(),x = "index",y = "media_part",height = 400,text_auto = ".1%")
fig.update_layout(yaxis_tickformat='0%',
                  title = "Volume médiatique total sur les 50 chaînes TV et Radio",
                  font_family="Poppins",yaxis_title="% du volume médiatique",xaxis_title = "")
fig.update_traces(marker_color=[COLOR_RADIO,COLOR_TV])
fig

## Analyse 1.2 split par type de TV

In [19]:
# Multiplier = n_mentions * 2 min / (n_channels * 60 minutes * 18h * n_days)
n_days = 1
n_channels = 25 #TV and Radio

media_time = data.groupby(["media2"]).agg({"count":"sum","channel_name":"nunique"})
media_time.loc["Radio","n_channels"] = 25
media_time.loc["TV - Généraliste","n_channels"] = 19
media_time.loc["TV - Information en continu","n_channels"] = 6
media_time = media_time.append(pd.DataFrame(media_time.sum(axis = 0).rename("Total")).T)
media_time["media_time"] = media_time["count"] * 2
media_time["total_time"] = media_time["n_channels"] * n_days * 18 * 60
media_time["media_part"] = media_time["media_time"] / media_time["total_time"]

media_time


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



Unnamed: 0,count,channel_name,n_channels,media_time,total_time,media_part
Radio,274.0,15.0,25.0,548.0,27000.0,0.020296
TV - Généraliste,18.0,6.0,19.0,36.0,20520.0,0.001754
TV - Information en continu,148.0,5.0,6.0,296.0,6480.0,0.045679
Total,440.0,26.0,50.0,880.0,54000.0,0.016296


In [20]:
fig = px.bar(media_time.reset_index(),x = "index",y = "media_part",height = 400,text_auto = ".1%")
fig.update_layout(yaxis_tickformat='0%',
                  title = "Volume médiatique total sur les 50 chaînes TV et Radio",
                  font_family="Poppins",yaxis_title="% du volume médiatique",xaxis_title = "")
fig.update_traces(marker_color=[COLOR_RADIO,COLOR_TV,COLOR_TV])
fig

## Analyse 2 - TOP 3 TV et Radio

In [21]:
# Multiplier for one channel = n_mentions * 2 min / (60 minutes * 18h * n_days)
multiplier = 2 / (1 * 60 * 18 * 1)

fig = show_mentions_by_channel(data,list_of_channels=top_channels_tv_info,n = 3,
                               method = multiplier,height = 400,text_auto = ".1%")
fig.update_layout(yaxis_tickformat='0%',
                  title = "Podium TV - Chaînes d'information en continu",
                  font_family="Poppins",yaxis_title="% du volume médiatique",xaxis_title = "")
fig.update_traces(marker_color=COLOR_TV)
fig

In [22]:
# Multiplier for one channel = n_mentions * 2 min / (60 minutes * 18h * n_days)
multiplier = 2 / (1 * 60 * 18 * 1)

fig = show_mentions_by_channel(data,list_of_channels=top_channels_tv_gen,n = 3,
                               method = multiplier,height = 400,text_auto = ".1%")
fig.update_layout(yaxis_tickformat='0%',
                  title = "Podium TV -  Chaînes généralistes",
                  font_family="Poppins",yaxis_title="% du volume médiatique",xaxis_title = "")
fig.update_traces(marker_color=COLOR_TV)
fig

In [23]:
# Multiplier for one channel = n_mentions * 2 min / (60 minutes * 18h * n_days)
multiplier = 2 / (1 * 60 * 18 * 1)

fig = show_mentions_by_channel(data,list_of_channels=top_channels_radio,n = 3,
                               method = multiplier,height = 400,text_auto = ".1%")

fig.update_layout(yaxis_tickformat='0%',
                  title = "Podium Radio",
                  font_family="Poppins",yaxis_title="% du volume médiatique",xaxis_title = "")
fig.update_traces(marker_color=COLOR_RADIO)
fig

## Analyse 3 - Classement complet TV et Radio

In [24]:
# Multiplier for one channel = n_mentions * 2 min / (60 minutes * 18h * n_days)
multiplier = 2 / (1 * 60 * 18 * 1)

fig = show_mentions_by_channel(data,list_of_channels=top_channels_tv_info,n = 25,split="keyword",
                               method = multiplier,height = 400,text_auto = ".1%",
                                )
fig.update_layout(yaxis_tickformat='0%',font_family="Poppins",yaxis_title="% du volume médiatique",legend_title = "",title = "Classement TV - Chaînes d'information en continu")
fig.update_traces(marker_color=COLOR_TV)
fig.show()

# Multiplier for one channel = n_mentions * 2 min / (60 minutes * 18h * n_days)
multiplier = 2 / (1 * 60 * 18 * 1)

fig = show_mentions_by_channel(data,list_of_channels=top_channels_tv_gen,n = 25,split="keyword",
                               method = multiplier,height = 400,text_auto = ".1%",
                               )
fig.update_layout(yaxis_tickformat='0%',font_family="Poppins",yaxis_title="% du volume médiatique",legend_title = "",title = "Classement TV -  Chaînes généralistes")
fig.update_traces(marker_color=COLOR_TV)
fig.show()

# Multiplier for one channel = n_mentions * 2 min / (60 minutes * 18h * n_days)
multiplier = 2 / (1 * 60 * 18 * 1)

fig = show_mentions_by_channel(data,list_of_channels=top_channels_radio,n = 25,split = "keyword",
                               method = multiplier,height = 400,text_auto = ".1%",
                              )
fig.update_layout(yaxis_tickformat='0%',font_family="Poppins",yaxis_title="% du volume médiatique",legend_title = "",title = "Classement Radio")
fig.update_traces(marker_color=COLOR_RADIO)
fig.show()

# Autres sujets écologiques

In [27]:
data_total = read_and_format_all_data_dump(path_folder = "../../data/cop27/0911/",path_channel_metadata=None)

data_total["channel_id"] = data_total["channel_name"] + "_" + data_total["media"]
data_total = data_total.merge(top_audiences[["channel_id","type"]],on = ["channel_id"],how = "inner")

data_total.loc[data_total["media"]=="TV","media2"] = "TV" + " - " + data_total["type"]
data_total.loc[data_total["media"]=="Radio","media2"] = "Radio"

data_total = filter_data_between_hours(data_total,"06:00","24:00")

data_total.loc[data_total["keywords"].map(lambda x : "COP27" in x),"keyword"] = "COP27"

data_total.shape

(595, 18)

In [28]:
# Multiplier = n_mentions * 2 min / (n_channels * 60 minutes * 18h * n_days)
n_days = 1

media_time = data_total.groupby(["media"]).agg({"count":"sum","channel_name":"nunique"})
media_time["n_channels"] = 25
media_time = media_time.append(pd.DataFrame(media_time.sum(axis = 0).rename("Total")).T)
media_time["media_time"] = media_time["count"] * 2
media_time["total_time"] = media_time["n_channels"] * n_days * 18 * 60
media_time["media_part"] = media_time["media_time"] / media_time["total_time"]

fig = px.bar(media_time.reset_index(),x = "index",y = "media_part",height = 400,text_auto = ".1%")
fig.update_layout(yaxis_tickformat='0%',
                  title = "Volume médiatique total sur les 50 chaînes TV et Radio",
                  font_family="Poppins",yaxis_title="% du volume médiatique",xaxis_title = "")
fig.update_traces(marker_color=[COLOR_RADIO,COLOR_TV])
fig.show()


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



In [29]:
# Multiplier = n_mentions * 2 min / (n_channels * 60 minutes * 18h * n_days)
n_days = 1

media_time = data_total.groupby(["media2"]).agg({"count":"sum","channel_name":"nunique"})
media_time.loc["Radio","n_channels"] = 25
media_time.loc["TV - Généraliste","n_channels"] = 19
media_time.loc["TV - Information en continu","n_channels"] = 6
media_time = media_time.append(pd.DataFrame(media_time.sum(axis = 0).rename("Total")).T)
media_time["media_time"] = media_time["count"] * 2
media_time["total_time"] = media_time["n_channels"] * n_days * 18 * 60
media_time["media_part"] = media_time["media_time"] / media_time["total_time"]

fig = px.bar(media_time.reset_index(),x = "index",y = "media_part",height = 400,text_auto = ".1%")
fig.update_layout(yaxis_tickformat='0%',
                  title = "Volume médiatique total sur les 50 chaînes TV et Radio",
                  font_family="Poppins",yaxis_title="% du volume médiatique",xaxis_title = "")
fig.update_traces(marker_color=[COLOR_RADIO,COLOR_TV,COLOR_TV])
fig.show()


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



## Analyse 3

In [30]:
DISCRETE_MAP = {
    "COP27":WARMING_STRIPES_SEQUENCE[2],
    "écologie":WARMING_STRIPES_SEQUENCE[0],
}

# Multiplier for one channel = n_mentions * 2 min / (60 minutes * 18h * n_days)
multiplier = 2 / (1 * 60 * 18 * 1)

fig = show_mentions_by_channel(data_total,list_of_channels=top_channels_tv_info,n = 25,split="keyword",
                               method = multiplier,height = 500,text_auto = ".1%",
                               color_discrete_map = DISCRETE_MAP,
                              )
fig.update_layout(yaxis_tickformat='0%',font_family="Poppins",yaxis_title="% du volume médiatique",
                  title = "Classement TV - Chaînes d'information en continu",legend_title = "")
# fig.update_traces(marker_color='#e6381b')
fig.show()

# Multiplier for one channel = n_mentions * 2 min / (60 minutes * 18h * n_days)
multiplier = 2 / (1 * 60 * 18 * 1)

fig = show_mentions_by_channel(data_total,list_of_channels=top_channels_tv_gen,n = 25,split="keyword",
                               method = multiplier,height = 500,text_auto = ".1%",
                               color_discrete_map = DISCRETE_MAP,
                              )
fig.update_layout(yaxis_tickformat='0%',font_family="Poppins",yaxis_title="% du volume médiatique",
                  title = "Classement TV - Chaînes généralistes",legend_title = "")
# fig.update_traces(marker_color='#e6381b')
fig.show()


# Multiplier for one channel = n_mentions * 2 min / (60 minutes * 18h * n_days)
multiplier = 2 / (1 * 60 * 18 * 1)

fig = show_mentions_by_channel(data_total,list_of_channels=top_channels_radio,n = 25,split = "keyword",
                               method = multiplier,height = 500,text_auto = ".1%",
                               color_discrete_map = DISCRETE_MAP
                              )
fig.update_layout(yaxis_tickformat='0%',font_family="Poppins",yaxis_title="% du volume médiatique",title = "Classement Radio",legend_title = "")
# fig.update_traces(marker_color='#f49182')
fig.show()

# Analyse des sujets mentionnés

In [31]:
from quotaclimat.data_processing.keyword_tool import KeywordsTool

kwt = KeywordsTool(case_sensitive=False,lowercase = True)

kwt.load_from_airtable(airtable_table_name="COP27",keyword_col = "name",variants_col=["alternatives_mediatree","alternatives"])

AssertionError: 

In [None]:
counts_total = kwt.count_keywords_on_corpus(data["text"],as_melted = True)
counts_total = counts_total.query("name!='Pays'")

NameError: name 'kwt' is not defined

In [None]:
px.treemap(
    counts_total.groupby(["name","category"])["count"].sum().reset_index(),
    path = ["category","name"],
    values = "count",
    color_discrete_sequence = WARMING_STRIPES_SEQUENCE
)

NameError: name 'counts_total' is not defined

In [None]:
data_dsm = data.reset_index(drop=True).loc[counts_total.query("name=='Fonds marins'")["text_id"].tolist()]

fig = show_mentions_by_channel(data_dsm,n = 25,
                               method = "minutes",height = 400,text_auto = ".1s")
fig.update_layout(
                  font_family="Poppins",yaxis_title="% du volume médiatique",xaxis_title = "")
fig.update_traces(marker_color='#e6381b')
fig

NameError: name 'counts_total' is not defined

# Focus 3 jours + classement

In [160]:
from tqdm.auto import tqdm

data_all_days = []
for folder in tqdm(["05","06","07","08","09", "10", "11"]):
    
    day = f"{folder}11"

    data_total = read_and_format_all_data_dump(path_folder = f"../../data/COP27/{day}/",path_channel_metadata=None)

    data_total["channel_id"] = data_total["channel_name"] + "_" + data_total["media"]
    data_total = data_total.merge(top_audiences[["channel_id","type"]],on = ["channel_id"],how = "inner")

    data_total.loc[data_total["media"]=="TV","media2"] = "TV" + " - " + data_total["type"]
    data_total.loc[data_total["media"]=="Radio","media2"] = "Radio"

    data_total = filter_data_between_hours(data_total,"06:00","24:00").reset_index(drop = True)
    #data_total = data_total[data_total["keyword"].isin(['COP27', 'écologie'])]
    #assert sorted(data_total["keyword"].unique()) == ['cop27', 'écologie']
    
    data_total.loc[data_total["keywords"].map(lambda x : "cop27" in x),"keyword"] = "COP27"
    
    
    data_total["day_extract"] = day
    data_all_days.append(data_total)
    
data_all_days = pd.concat(data_all_days,axis = 0,ignore_index = True)

data_all_days["day_dt"] = data_all_days["date"].dt.date
data_all_days["day_str"] = data_all_days["day_dt"].map(str)

  0%|          | 0/7 [00:00<?, ?it/s]

In [161]:
method = "first" #average or first
ranking = (data_all_days
           .groupby(["channel_name","media","media2","keyword","day_dt"])["count"].sum()
           .unstack("day_dt").fillna(0.0).stack()
           .unstack("keyword").fillna(0.0).stack()
#            .unstack("channel_name").fillna(0.0).stack()
           .reset_index(drop = False)
           .rename(columns = {0:"count"})
          )

ranking["minutes"] = ranking["count"] * 2
ranking["total_time"] = 18 * 60 * 1
ranking["media_part"] = ranking["minutes"] / ranking["total_time"]

ranking["rank"] = ranking.groupby(["media2","day_dt","keyword"])["count"].transform("rank",ascending = False,method = method)
ranking["count_total"] = ranking.groupby(["channel_name","media2","day_dt"])["count"].transform("sum")
ranking["rank_total"] = ranking.groupby(["media2","day_dt","keyword"])["count_total"].transform("rank",ascending = False,method = method)
ranking["media_part_total"] = ranking["count_total"] * 2 / ranking["total_time"]
ranking["day_str"] = ranking["day_dt"].map(str)
ranking.query("media2=='TV - Généraliste' and keyword=='COP27'").head()

Unnamed: 0,channel_name,media,media2,day_dt,keyword,count,minutes,total_time,media_part,rank,count_total,rank_total,media_part_total,day_str
98,C8,TV,TV - Généraliste,2022-11-05,COP27,1.0,2.0,1080,0.001852,2.0,9.0,2.0,0.016667,2022-11-05
105,C8,TV,TV - Généraliste,2022-11-06,COP27,0.0,0.0,1080,0.0,3.0,4.0,5.0,0.007407,2022-11-06
112,C8,TV,TV - Généraliste,2022-11-07,COP27,0.0,0.0,1080,0.0,5.0,9.0,7.0,0.016667,2022-11-07
119,C8,TV,TV - Généraliste,2022-11-08,COP27,0.0,0.0,1080,0.0,8.0,12.0,6.0,0.022222,2022-11-08
126,C8,TV,TV - Généraliste,2022-11-09,COP27,0.0,0.0,1080,0.0,3.0,10.0,8.0,0.018519,2022-11-09


In [70]:
import seaborn as sns
ranking_data_chart = ranking.query("media2=='TV - Généraliste' and keyword=='cop27'")
def show_ranking_chart(ranking_data,title = "",height = 500,total = False):
    
    rank_col = "rank" if not total else "rank_total"
    percent_col = "media_part" if not total else "media_part_total"
    
    
    annot_labels_data = (ranking_data
                         .loc[ranking_data["day_dt"]==ranking_data["day_dt"].max()]
                         .sort_values(rank_col,ascending = True)
                        )
    
    fig = px.line(
        ranking_data,
        x = "day_dt",
        y = rank_col,
        color = "channel_name",
        text = rank_col,
        markers = True,
        color_discrete_sequence = make_ws_palette(len(annot_labels_data)),
        category_orders = {"channel_name":annot_labels_data["channel_name"].tolist()}
    )

    # fig.update_traces(marker=dict(size=15),selector=dict(mode='markers'))
    fig.update_layout(
        xaxis_tickmode = "linear",yaxis_autorange = "reversed",
        xaxis_showgrid = False,yaxis_showgrid = False,
        xaxis_title = "Date de la COP27",
        yaxis_title = None,
        yaxis_showticklabels = False,
    )
    fig.update_traces(marker_size= 20,
                      marker = dict(line=dict(width=2)),textposition="middle center",
                      textfont_size=12,
                      textfont_color="white",
                     )



    annotations = []





    # Adding labels    
    for i,row in annot_labels_data.iterrows():
        annotations.append(dict(xref='paper', x=0.95, y=row[rank_col],
                                      xanchor='left', yanchor='middle',
                                      text=row["channel_name"] + " ("+ f'{row[percent_col]:.1%}'+")",
                                      font=dict(family='Poppins',
                                                size=12),
                                      showarrow=False))
    #     # labeling the right_side of the plot
    #     annotations.append(dict(xref='paper', x=0.95, y=y_trace[11],
    #                                   xanchor='left', yanchor='middle',
    #                                   text='{}%'.format(y_trace[11]),
    #                                   font=dict(family='Arial',
    #                                             size=16),


    fig.update_layout(
    #     width = 1000,
        margin_r = 200,
        height = height,
        annotations=annotations,
        showlegend = False,title = title,
        font_family = "Poppins"
                     )
    return fig





In [71]:

show_ranking_chart(
    ranking.query("media2=='TV - Généraliste' and keyword=='COP27'"),
    "Evolution du classement des chaînes TV généralistes sur la COP27",
    height = 600,
).show()

show_ranking_chart(
    ranking.query("media2=='TV - Généraliste' and keyword=='COP27'"),
    "Evolution du classement des chaînes TV généralistes sur la COP27 et l'écologie",
    height = 600,
    total = True
).show()

In [56]:
show_ranking_chart(
    ranking.query("media2=='TV - Information en continu' and keyword=='COP27'"),
    "Evolution du classement des chaînes TV d'info en continu sur la COP27",
    height = 400,
).show()

show_ranking_chart(
    ranking.query("media2=='TV - Information en continu' and keyword=='COP27'"),
    "Evolution du classement des chaînes TV d'info en continu sur la COP27 et l'écologie",
    height = 400,
    total = True,
).show()

In [57]:
show_ranking_chart(
    ranking.query("media2=='Radio' and keyword=='COP27'"),
    "Evolution du classement des chaînes Radio sur la COP27",
    height = 700
).show()

show_ranking_chart(
    ranking.query("media2=='Radio' and keyword=='COP27'"),
    "Evolution du classement des chaînes Radio sur la COP27 et l'écologie",
    height = 700,
    total = True,
).show()

## Evolution du volume médiatique

In [58]:
# Multiplier = n_mentions * 2 min / (n_channels * 60 minutes * 18h * n_days)
n_days = 1
multiplier = 2 / (50 * 18 * 60)

fig = show_mentions_over_time(
    data_all_days.query("keyword=='COP27'"),
    freq = "1D",
    text_auto = ".1%",
    method = multiplier,
    color_discrete_sequence = WARMING_STRIPES_SEQUENCE
)
# yaxis_tickformat='0%',
fig.update_layout(font_family="Poppins",
                  xaxis_tickmode = "linear",
                  xaxis_title = "Date de la COP27",
                  yaxis_title="% du volume médiatique",
                  title = "Evolution du volume médiatique sur la COP27",legend_title = "",yaxis_tickformat='0%',
                 )
fig.show()

# Multiplier = n_mentions * 2 min / (n_channels * 60 minutes * 18h * n_days)
n_days = 1
multiplier = 2 / (50 * 18 * 60)

fig = show_mentions_over_time(
    data_all_days,
    freq = "1D",
    text_auto = ".1%",
    method = multiplier,
    color_discrete_sequence = WARMING_STRIPES_SEQUENCE
)
# yaxis_tickformat='0%',
fig.update_layout(font_family="Poppins",
                  xaxis_tickmode = "linear",
                  xaxis_title = "Date de la COP27",
                  yaxis_title="% du volume médiatique",
                  title = "Evolution du volume médiatique sur la COP27 et l'écologie",legend_title = "",yaxis_tickformat='0%',
                 )
# fig.update_traces(marker_color=WARMING_STRIPES_SEQUENCE)
fig.show()

In [59]:
# Multiplier = n_mentions * 2 min / (n_channels * 60 minutes * 18h * n_days)
n_days = 1
multiplier = 2 / (50 * 18 * 60)

fig = show_mentions_over_time(
    data_all_days.query("keyword=='COP27'"),
    freq = "1D",
    split = "media2",
    text_auto = ".1%",
    method = multiplier,
    color_discrete_sequence = WARMING_STRIPES_SEQUENCE
)
# yaxis_tickformat='0%',
fig.update_layout(font_family="Poppins",
                  xaxis_tickmode = "linear",
                  xaxis_title = "Date de la COP27",
                  yaxis_title="% du volume médiatique",
                  title = "Evolution du volume médiatique sur la COP27",legend_title = "",yaxis_tickformat='0%',
                 )
fig.show()

# Multiplier = n_mentions * 2 min / (n_channels * 60 minutes * 18h * n_days)
n_days = 1
multiplier = 2 / (50 * 18 * 60)

fig = show_mentions_over_time(
    data_all_days,
    freq = "1D",
    split = "media2",
    text_auto = ".1%",
    method = multiplier,
    color_discrete_sequence = WARMING_STRIPES_SEQUENCE
)
# yaxis_tickformat='0%',
fig.update_layout(font_family="Poppins",
                  xaxis_tickmode = "linear",
                  xaxis_title = "Date de la COP27",
                  yaxis_title="% du volume médiatique",
                  title = "Evolution du volume médiatique sur la COP27 et l'écologie",legend_title = "",yaxis_tickformat='0%',
                 )
# fig.update_traces(marker_color=WARMING_STRIPES_SEQUENCE)
fig.show()

In [60]:
# Multiplier = n_mentions * 2 min / (n_channels * 60 minutes * 18h * n_days)
n_days = 1
multiplier = 2 / (50 * 18 * 60)

fig = show_mentions_over_time(
    data_all_days,
    freq = "1D",
    split = "keyword",
    text_auto = ".1%",
    method = multiplier,
    color_discrete_sequence = WARMING_STRIPES_SEQUENCE
)
# yaxis_tickformat='0%',
fig.update_layout(font_family="Poppins",
                  xaxis_tickmode = "linear",
                  xaxis_title = "Date de la COP27",
                  yaxis_title="% du volume médiatique",
                  title = "Evolution du volume médiatique sur la COP27 et l'écologie",legend_title = "",yaxis_tickformat='0%',
                 )
fig.show()

In [61]:
# Multiplier = n_mentions * 2 min / (n_channels * 60 minutes * 18h * n_days)
n_days = 1
multiplier = 2 / (50 * 1 * 60)

fig = show_mentions_over_time(
    data_all_days,
    freq = "1H",
    split = "media",
    method = multiplier,
    color_discrete_sequence = WARMING_STRIPES_SEQUENCE
)
# yaxis_tickformat='0%',
fig.update_layout(font_family="Poppins",
                  xaxis_tickmode = "linear",
                  xaxis_title = "Date de la COP27",
                  yaxis_title="% du volume médiatique",
                  title = "Evolution du volume médiatique sur la COP27 et l'écologie",legend_title = "",yaxis_tickformat='0%',
                 )
fig.show()

## Evolution des volumes médiatiques par chaîne

In [62]:
ranking_chart_data = ranking.query("media2=='TV - Généraliste' and keyword=='COP27'")
rank_col = "media_part"

annot_labels_data = (ranking_chart_data
                     .loc[ ranking_chart_data["day_dt"].map(lambda x : x.day)==7]
                     .sort_values(rank_col,ascending = False)
                    )
px.colors.sequential.RdBu_r
fig = px.line(
    ranking_chart_data,
    x = "day_dt",
    y = rank_col,
    color = "channel_name",
    text = rank_col,
    markers = True,
#     color_discrete_map = "RdBu_r",
    color_discrete_sequence = make_ws_palette(len(annot_labels_data)),
    category_orders = {"channel_name":annot_labels_data["channel_name"].tolist()}
)

# fig.update_traces(marker=dict(size=15),selector=dict(mode='markers'))
fig.update_layout(
    yaxis_tickformat='0%',font_family="Poppins",yaxis_title="% du volume médiatique",
    title = "Evolution des volumes médiatiques par chaîne TV généralistes sur la COP27",
    xaxis_tickmode = "linear",
    xaxis_title = "Date de la COP27",
    legend_title = "",
)
fig.update_layout()
fig.update_traces(
#     marker_size= 40,
    texttemplate="%{y:.1%}",
#     marker = dict(line=dict(width=2)),
    textposition="top center",
    textfont_size=12,
#     textfont_color="white"
)

fig

In [63]:
ranking_chart_data = ranking.query("media2=='TV - Information en continu' and keyword=='COP27'")
rank_col = "media_part"

annot_labels_data = (ranking_chart_data
                     .loc[ ranking_chart_data["day_dt"].map(lambda x : x.day)==7]
                     .sort_values(rank_col,ascending = False)
                    )

fig = px.line(
    ranking_chart_data,
    x = "day_dt",
    y = rank_col,
    color = "channel_name",
    text = rank_col,
    markers = True,
    color_discrete_sequence = make_ws_palette(len(annot_labels_data)),
    category_orders = {"channel_name":annot_labels_data["channel_name"].tolist()}
)

# fig.update_traces(marker=dict(size=15),selector=dict(mode='markers'))
fig.update_layout(
    yaxis_tickformat='0%',font_family="Poppins",yaxis_title="% du volume médiatique",
    title = "Evolution des volumes médiatiques par chaîne TV d'infos en continu sur la COP27",
    xaxis_tickmode = "linear",
    xaxis_title = "Date de la COP27",
    legend_title = "",
)
fig.update_layout()
fig.update_traces(
#     marker_size= 40,
    texttemplate="%{y:.1%}",
#     marker = dict(line=dict(width=2)),
    textposition="top center",
    textfont_size=13,
#     textfont_color="white"
)

fig

In [64]:
ranking_chart_data = ranking.query("media2=='Radio' and keyword=='COP27'")
rank_col = "media_part"

annot_labels_data = (ranking_chart_data
                     .loc[ ranking_chart_data["day_dt"].map(lambda x : x.day)==7]
                     .sort_values(rank_col,ascending = False)
                    )

fig = px.line(
    ranking_chart_data,
    x = "day_dt",
    y = rank_col,
    color = "channel_name",
    text = rank_col,
    markers = True,
    color_discrete_sequence = make_ws_palette(len(annot_labels_data)),
    category_orders = {"channel_name":annot_labels_data["channel_name"].tolist()}
)

# fig.update_traces(marker=dict(size=15),selector=dict(mode='markers'))
fig.update_layout(
    yaxis_tickformat='0%',font_family="Poppins",yaxis_title="% du volume médiatique",
    title = "Evolution des volumes médiatiques par chaîne Radio sur la COP27",
    xaxis_tickmode = "linear",
    xaxis_title = "Date de la COP27",
    legend_title = "",
)
fig.update_layout()
fig.update_traces(
#     marker_size= 40,
    texttemplate="%{y:.1%}",
#     marker = dict(line=dict(width=2)),
    textposition="top center",
    textfont_size=12,
#     textfont_color="white"
)

fig

# Comparison autre sujet

In [162]:
# Multiplier = n_mentions * 2 min / (n_channels * 60 minutes * 18h * n_days)
n_days = 1
multiplier = 2 / (50 * 18 * 60)

fig = show_mentions_over_time(
    data_all_days,
    freq = "1D",
    split = "keyword",
    text_auto = ".1%",
    method = multiplier,
    color_discrete_sequence = WARMING_STRIPES_SEQUENCE
)
# yaxis_tickformat='0%',
fig.update_layout(font_family="Poppins",
                  xaxis_tickmode = "linear",
                  xaxis_title = "Date de la COP27",
                  yaxis_title="% du volume médiatique",
                  title = "Evolution du volume médiatique sur la COP27 et l'écologie",legend_title = "",yaxis_tickformat='0%',
                 )
fig.show()

In [99]:
ranking_tv_continue_all_kw = ranking.query("media2=='TV - Information en continu' and day_str > '2022-11-05'")


In [185]:
'écologie' == 'écologie'

False

In [186]:
len('écologie')

8

In [187]:
len('écologie')

9

In [163]:
data_all_days.groupby('keyword').day_str.min()

keyword
COP27          2022-11-05
écologie      2022-11-09
midterm        2022-11-05
oceanviking    2022-11-07
qatar          2022-11-06
routedurhum    2022-11-06
écologie       2022-11-05
Name: day_str, dtype: object

In [148]:
#ranking.keyword.unique()


False

In [168]:
ranking_tv_continue_all_kw = ranking.query("media2=='Radio' and day_str > '2022-11-05'")
minute_covered_radio = ranking_tv_continue_all_kw.groupby('keyword').minutes.sum()

ranking_tv_continue_all_kw = ranking.query("media2=='TV - Généraliste' and day_str > '2022-11-05'")
minute_covered_tv_generalist = ranking_tv_continue_all_kw.groupby('keyword').minutes.sum().sort_values()

ranking_tv_continue_all_kw = ranking.query("media2=='TV - Information en continu' and day_str > '2022-11-05'")
minute_covered_tv_info = ranking_tv_continue_all_kw.groupby('keyword').minutes.sum()



all_minutes_df = pd.DataFrame([minute_covered_tv_generalist, minute_covered_radio, minute_covered_tv_info])
all_minutes_df.index = ['minute_covered_radio', 'minute_covered_tv_generalist', 'minute_covered_tv_info']

In [175]:
all_minutes_df.columns

Index(['COP27', 'oceanviking', 'routedurhum', 'écologie', 'qatar', 'écologie',
       'midterm'],
      dtype='object', name='keyword')

In [177]:
all_minutes_df = all_minutes_df.loc[:, ['COP27', 'oceanviking', 'routedurhum', 'écologie', 'qatar',
       'midterm']]

In [178]:
all_minutes_df_metled = pd.melt(all_minutes_df.reset_index(), id_vars='index')
all_minutes_df_metled.columns = ['Media', 'keyword', 'minutes']

In [179]:
all_minutes_df_metled

Unnamed: 0,Media,keyword,minutes
0,minute_covered_radio,COP27,80.0
1,minute_covered_tv_generalist,COP27,772.0
2,minute_covered_tv_info,COP27,538.0
3,minute_covered_radio,oceanviking,144.0
4,minute_covered_tv_generalist,oceanviking,698.0
5,minute_covered_tv_info,oceanviking,932.0
6,minute_covered_radio,routedurhum,144.0
7,minute_covered_tv_generalist,routedurhum,968.0
8,minute_covered_tv_info,routedurhum,556.0
9,minute_covered_radio,écologie,268.0


In [184]:
all_minutes_df.T
fig = px.bar(
        all_minutes_df_metled,
        x='keyword',
        y="minutes",
        color='Media'

    )

fig.update_xaxes(tickangle=-45, title=None)
fig.update_yaxes(title='Nombre de citation')
fig.update_layout(margin={"b": 100}, title='Nombre de citation de différent mot cléf par media sur la premiere semaine de la COP')


In [118]:
fig = px.pie(ranking.query("media2=='Radio' and day_str > '2022-11-05'")[['keyword', 'minutes']], names="keyword", values="minutes", title="Split TV / Radio")
fig.update_traces(textposition="inside", textinfo="percent+label")