# COP26

![](../coverquotaclimat.png)

> Notebook python d'exploration pour fournir une base d'analyse et de visualisation pour toute l'équipe

In [12]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import datetime

import sys
sys.path.append("../")

%load_ext autoreload
%autoreload 2

from quotaclimat.utils.plotly_theme import *


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Récupération des données

>  A changer plus tard une fois la base de données SQL mise en place

In [20]:
os.listdir("../data/cop26/")

['20221031_20211030_20211031_all_COP26.xlsx',
 '20221031_20211101_20211101_all_COP26.xlsx',
 '20221031_20211102_20211102_all_COP26.xlsx',
 '20221031_20211103_20211103_all_COP26.xlsx',
 '20221031_20211104_20211105_all_COP26.xlsx',
 '20221031_20211106_20211108_all_COP26.xlsx',
 '20221031_20211109_20211111_all_COP26.xlsx',
 '20221031_20211112_20211113_all_COP26.xlsx',
 '20221031_20211114_20211114_all_COP26.xlsx']

In [21]:
from quotaclimat.data_processing.read_format_deduplicate import read_and_format_one
from quotaclimat.data_processing.read_format_deduplicate import read_and_format_all_data_dump
from quotaclimat.data_processing.read_format_deduplicate import deduplicate_extracts

In [22]:
data = read_and_format_all_data_dump(path_folder = "../data/cop26/",path_channel_metadata=None)
data.shape

(24728, 14)

# Data exploration

In [36]:
from quotaclimat.utils.channels import TOP_25_CHANNELS,TOP_CHANNELS_TV,TOP_CHANNELS_TV_8
from quotaclimat.data_analytics.exploration import show_mentions_by_channel
from quotaclimat.data_analytics.exploration import show_mentions_by_time_of_the_day
from quotaclimat.data_analytics.exploration import show_mentions_over_time
from quotaclimat.data_analytics.exploration import show_mentions_treemap
from quotaclimat.data_analytics.exploration import show_piechart_split_tv_radio

In [37]:
show_mentions_over_time(data,freq = "D",method = "minutes")

In [123]:
show_mentions_by_channel(data,n = 30,method = "minutes")

In [42]:
show_mentions_by_channel(data,list_of_channels=TOP_CHANNELS_TV_8,method = "minutes").show()
show_mentions_by_channel(data,list_of_channels=TOP_CHANNELS_TV,method = "minutes").show()

In [55]:
show_mentions_over_time(
    data,split = "channel_name",
    list_of_channels = TOP_CHANNELS_TV,kind = "bar",height = 700,method = "minutes"
)

In [56]:
show_mentions_by_time_of_the_day(
    data,split = "channel_name",
    list_of_channels = TOP_CHANNELS_TV,kind = "bar",height = 700,method = "minutes"
)

# Préparation du Baromètre

## Travail préliminaire sur les données

**Méthodologie** : 
- Sélectionner les heures d'écoute les plus importantes sur TV et Radio
- TV : 19h-22h
- Radio : 6h30-9h30

In [69]:
from quotaclimat.data_analytics.exploration import filter_data_between_hours

In [104]:
data_tv = filter_data_between_hours(data,"19:00","22:00").query("media=='TV'")
data_radio = filter_data_between_hours(data,"06:30","09:30").query("media=='Radio'")
data_france_info = data_radio.query("channel_name=='France Info'")

## Niveau 1

### Calcul du % du temps médiatique

In [105]:
n_days = 16

In [106]:
# Multiplier is 2min / (n_channel * 60min * n_days)
show_mentions_by_time_of_the_day(
    data_tv,freq = "1H",
    method = 2 / (data_tv["channel_name"].nunique() * 60 * n_days)
)

In [107]:
# Multiplier is 2min / (n_channel * 60min)
show_mentions_by_time_of_the_day(
    data_radio,freq = "1H",
    method = 2 / (data_radio["channel_name"].nunique() * 60 * n_days)
)

In [108]:
# Multiplier is 2min / (n_channel * 60min)
show_mentions_by_time_of_the_day(
    data_france_info,freq = "1H",
    method = 2 / (data_france_info["channel_name"].nunique() * 60 * n_days)
)

### Podiums TOP5 et FLOP5

#### TOP 5 et TOP 25 TV (toute audience confondues)

In [122]:
# Multiplier is 2min / (3h * 60min * n_days)
show_mentions_by_channel(
    data_tv,
    method = 2 / (3 * 60 * n_days),
    n = 5,
    text_auto = ".2%"
).update_layout(yaxis_tickformat='0%').show()

# Multiplier is 2min / (3h * 60min * n_days)
show_mentions_by_channel(
    data_tv,
    method = 2 / (3 * 60 * n_days),
    n = 30,
    text_auto = ".2%"
).update_layout(yaxis_tickformat='0%').show()

In [127]:
# Multiplier is 2min / (3h * 60min * n_days)
show_mentions_by_channel(
    data_tv,
    method = 2 / (3 * 60 * n_days),
    list_of_channels = TOP_CHANNELS_TV,
    text_auto = ".1%"
).update_layout(yaxis_tickformat='0%').show()