In [1]:
%load_ext autoreload
%autoreload 2

# COP27`

In [2]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import datetime

import sys
sys.path.append("../")

%load_ext autoreload
%autoreload 2

from quotaclimat.utils.plotly_theme import *


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# 1. Update les data de la journée dans ../data/keywords/cop27/
Vous pouvez restrindre l'analyse à une journée en placant seulement data de la journée dan le ficher ou en changant DATA_PATH en dessous

# 2. Récupération des données

In [5]:
DATA_PATH = "../data/keywords/cop27/"
os.listdir(DATA_PATH)


['20221105_20221029_gregoirefournas.xlsx',
 '20221105_20221029_cop27.xlsx',
 '20221105_20221104_routeduRhum.xlsx',
 '20221104_20221102_routeduRhum.xlsx',
 '20221101_20221029_routeduRhum.xlsx',
 '20221105_20221029_elonmusk.xlsx']

In [6]:
from quotaclimat.data_processing.read_format_deduplicate import read_and_format_one
from quotaclimat.data_processing.read_format_deduplicate import read_and_format_all_data_dump
from quotaclimat.data_processing.read_format_deduplicate import deduplicate_extracts
from quotaclimat.data_analytics.exploration import filter_data_between_hours
from quotaclimat.utils.channels import TOP_25_CHANNELS,TOP_CHANNELS_TV,TOP_CHANNELS_TV_8
from quotaclimat.data_analytics.exploration import show_mentions_by_channel
from quotaclimat.data_analytics.exploration import show_mentions_by_time_of_the_day
from quotaclimat.data_analytics.exploration import show_mentions_over_time
from quotaclimat.data_analytics.exploration import show_mentions_treemap
from quotaclimat.data_analytics.exploration import show_piechart_split_tv_radio

In [7]:
data = read_and_format_all_data_dump(path_folder=DATA_PATH, path_channel_metadata=None)
data.shape

(10788, 15)

## Processing

In [8]:
top_audiences = pd.read_excel("../data/channels.xlsx",sheet_name = "top_audiences")
top_audiences["channel_id"] = top_audiences["channel_name"] + "_" + top_audiences["media"]
top_channels_tv = top_audiences.query("media=='TV'")["channel_name"].tolist()
top_channels_radio = top_audiences.query("media=='Radio'")["channel_name"].tolist()

In [59]:
# Boolean for top audiance TV and Radios
data['is_top_audiance'] = False
data.loc[data.channel_name.isin(top_channels_tv + top_channels_radio), 'is_top_audiance'] = True

data_at_pick_hours_6_00 = filter_data_between_hours(data, min_hour="06:00", max_hour="24:00")
data_at_pick_hours_6_10 = filter_data_between_hours(data, min_hour="06:00", max_hour="10:00")
data_at_pick_hours_19_21 = filter_data_between_hours(data, min_hour="19:00", max_hour="21:00")


# 3. Analyse mot clef COP 27

In [11]:
data_cop27 = data_at_pick_hours_6_00[data_at_pick_hours_6_00.keyword == 'cop27']

## Courverture total

In [12]:
# Multiplier = n_mentions * 2 min / (n_channels * 60 minutes * 18h * n_days)
n_days = (data_cop27.date.max() - data_cop27.date.min()).days
media_time = data_cop27.groupby(["media"]).agg({"count":"sum","channel_name":"nunique"})
media_time = media_time.append(pd.DataFrame(media_time.sum(axis = 0).rename("Total")).T)
media_time["media_time"] = media_time["count"] * 2
media_time["total_time"] = media_time["channel_name"] * n_days * 18 * 60
media_time["media_part"] = media_time["media_time"] / media_time["total_time"]

media_time

  media_time = media_time.append(pd.DataFrame(media_time.sum(axis = 0).rename("Total")).T)


Unnamed: 0,count,channel_name,media_time,total_time,media_part
Radio,568,132,1136,855360,0.001328
TV,471,25,942,162000,0.005815
Total,1039,157,2078,1017360,0.002043


##  TOP 3 TV // radio

In [14]:
# Multiplier for one channel = n_mentions * 2 min / (60 minutes * 18h * n_days)
multiplier = 2 / (1 * 60 * 18 * n_days)

fig = show_mentions_by_channel(data_cop27,list_of_channels=top_channels_tv,n = 3,
                               title = "TOP 3 TV COP27",
                               method = multiplier,height = 400,text_auto = ".1%")
fig.update_layout(yaxis_tickformat='0%')

In [15]:
# Multiplier for one channel = n_mentions * 2 min / (60 minutes * 18h * n_days)
multiplier = 2 / (1 * 60 * 18 * n_days)

fig = show_mentions_by_channel(data_cop27,list_of_channels=top_channels_radio,n = 3,
                               title = "TOP 3 Radio COP27",
                               method = multiplier,height = 400,text_auto = ".1%")
fig.update_layout(yaxis_tickformat='0%')

## Classement complet radio et tv

In [16]:
# Multiplier for one channel = n_mentions * 2 min / (60 minutes * 18h * n_days)
multiplier = 2 / (1 * 60 * 18 * n_days)

fig = show_mentions_by_channel(data_cop27,list_of_channels=top_channels_tv,n = 25,
                               title = "Classement TV COP26",
                               method = multiplier,height = 400,text_auto = ".1%")
fig.update_layout(yaxis_tickformat='0%').show()


# Multiplier for one channel = n_mentions * 2 min / (60 minutes * 18h * n_days)
multiplier = 2 / (1 * 60 * 18 * n_days)

fig = show_mentions_by_channel(data_cop27,list_of_channels=top_channels_radio,n = 25,
                               title = "Classement Radio COP26",
                               method = multiplier,height = 400,text_auto = ".1%")
fig.update_layout(yaxis_tickformat='0%').show()

In [18]:
## Multiplier = n_mentions * 2 min / (n_channels * 60 minutes * 20h)
multiplier = 2 / (data["channel_name"].nunique() * 60 * 18)

fig = show_mentions_over_time(data_cop27,freq = "D",method = multiplier,height = 500,text_auto = ".1%")
fig.update_layout(yaxis_tickformat='0%')

# 4. COP 27 et autres actualités

In [20]:
data_elonmusk = data[data.keyword == 'elonmusk']

In [65]:
data_at_pick_hours_19_21.date.dt.hour

12       19
39       19
54       19
72       19
73       20
         ..
10742    19
10756    19
10757    20
10758    20
10779    19
Name: date, Length: 902, dtype: int64

2

In [76]:
def get_percentage_per_day(df):
    minutes_per_sample = 2
    minutes_covered_per_day = ((max(df.date.dt.hour) - min(df.date.dt.hour)) + 1) * 60 * df.media.nunique()
    nb_days = (df.date.max() - df.date.min()).days

    df_percentage = df.set_index('date').groupby(pd.Grouper(freq='d')).count()['index'] * minutes_per_sample / (minutes_covered_per_day * nb_days) * 100
    df_percentage.name = 'percentage'
    return df_percentage



In [77]:
fig = go.Figure()
data_percentage_elon = get_percentage_per_day(data[data.keyword == 'elonmusk'])
data_percentage_cop27 = get_percentage_per_day(data[data.keyword == 'cop27'])
data_percentage_routeduRhum = get_percentage_per_day(data[data.keyword == 'routeduRhum'])
data_percentage_gregoirefournas = get_percentage_per_day(data[data.keyword == 'gregoirefournas'])
fig.add_trace(go.Scatter(y=data_percentage_elon, x=data_percentage_elon.index, name='Elon Musk'))
fig.add_trace(go.Scatter(y=data_percentage_cop27, x=data_percentage_cop27.index, name='COP 27'))
fig.add_trace(go.Scatter(y=data_percentage_gregoirefournas, x=data_percentage_gregoirefournas.index, name='Gregoire Fournas'))
fig.add_trace(go.Scatter(y=data_percentage_routeduRhum, x=data_percentage_routeduRhum.index, name='route du Rhum'))
fig.update_layout(title='Couverture en % des sujet d actualité TV + Radio de 6h à 00h')
fig.show()

In [78]:
fig = go.Figure()
data_percentage_elon = get_percentage_per_day(data[(data.keyword == 'elonmusk')&data.radio])
data_percentage_cop27 = get_percentage_per_day(data[(data.keyword == 'cop27')&data.radio])
data_percentage_routeduRhum = get_percentage_per_day(data[(data.keyword == 'routeduRhum')&data.radio])
data_percentage_gregoirefournas = get_percentage_per_day(data[(data.keyword == 'gregoirefournas')&data.radio])
fig.add_trace(go.Scatter(y=data_percentage_elon, x=data_percentage_elon.index, name='Elon Musk'))
fig.add_trace(go.Scatter(y=data_percentage_cop27, x=data_percentage_cop27.index, name='COP 27'))
fig.add_trace(go.Scatter(y=data_percentage_gregoirefournas, x=data_percentage_gregoirefournas.index, name='Gregoire Fournas'))
fig.add_trace(go.Scatter(y=data_percentage_routeduRhum, x=data_percentage_routeduRhum.index, name='route du Rhum'))
fig.update_layout(title='Couverture en % des sujet d actualité Radio de 6h à 00h')
fig.show()

In [79]:
data_to_use = data_at_pick_hours_19_21.copy()

fig = go.Figure()
data_percentage_elon = get_percentage_per_day(data_to_use[(data_to_use.keyword == 'elonmusk')& ~data_to_use.radio])
data_percentage_cop27 = get_percentage_per_day(data_to_use[(data_to_use.keyword == 'cop27')& ~data_to_use.radio])
data_percentage_routeduRhum = get_percentage_per_day(data_to_use[(data_to_use.keyword == 'routeduRhum')& ~data_to_use.radio])
data_percentage_gregoirefournas = get_percentage_per_day(data_to_use[(data_to_use.keyword == 'gregoirefournas')& ~data_to_use.radio])
fig.add_trace(go.Scatter(y=data_percentage_elon, x=data_percentage_elon.index, name='Elon Musk'))
fig.add_trace(go.Scatter(y=data_percentage_cop27, x=data_percentage_cop27.index, name='COP 27'))
fig.add_trace(go.Scatter(y=data_percentage_gregoirefournas, x=data_percentage_gregoirefournas.index, name='Gregoire Fournas'))
fig.add_trace(go.Scatter(y=data_percentage_routeduRhum, x=data_percentage_routeduRhum.index, name='route du Rhum'))
fig.update_layout(title='Couverture en % des sujet d actualité TV de 6h à 00h')
fig.show()

## Quelle media parle de quoi?