# IPCC Analysis

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import json
import os
import bertopic

%load_ext autoreload
%autoreload 2

pd.options.display.max_columns = 100

In [2]:
# tweets.data.to_pickle("../data/FINALDATA.pkl")

In [2]:
import sys
sys.path.append("../")

import climatewatch

In [4]:
from climatewatch.data import process_raw_data,process_pretrained_classifiers,process_sentiment_vader
from climatewatch.data import open_jsonl_data

from climatewatch.utils import CodeCarbon

# Preparing Data from jsonl files

In [14]:
folder = "../data/IPCC"

with CodeCarbon():
    
    data = []
    
    for name in ["IPCC_WG1","IPCC_WG2","GIEC_WG1","GIEC_WG2"]:
        
        source,wg = name.split("_")
    
        ipcc = open_jsonl_data(os.path.join(folder,f"{name}.json"),encoding = "utf16")
        ipcc = process_raw_data(ipcc)
        ipcc = process_sentiment_vader(ipcc)
        ipcc["source_file"] = name
        ipcc["hashtag"] = source
        ipcc["workgroup"] = wg 
        
        data.append(ipcc)
    
    data = pd.concat(data,ignore_index = True)
    data.to_pickle("../data/IPCC/full_data_without_emotions.pkl")

CODECARBON : No CPU tracking mode found. Falling back on CPU constant mode.
CODECARBON : Failed to match CPU TDP constant. Falling back on a global constant.


[INFO] Measuring carbon emissions with CodeCarbon


In [58]:
data["date"].iloc[0]

Timestamp('2021-08-15 23:51:09+0000', tz='UTC')

In [61]:
data.loc[data["workgroup"] == "WG1","after_release"] = (data["date"] >= "2021-08-09")
data.loc[data["workgroup"] == "WG2","after_release"] = (data["date"] >= "2022-02-28")

data["date_day"] = pd.to_datetime(data["date"].dt.date)

data.loc[data["workgroup"] == "WG1","date_release_index"] = (data["date_day"] - pd.to_datetime("2021-08-09")).dt.days
data.loc[data["workgroup"] == "WG2","date_release_index"] = (data["date_day"] - pd.to_datetime("2022-02-28")).dt.days

data.loc[data["workgroup"] == "WG1","date_release"] = (data["date"] - pd.to_datetime("2021-08-09 00:00:00+0000"))
data.loc[data["workgroup"] == "WG2","date_release"] = (data["date"] - pd.to_datetime("2022-02-28 00:00:00+0000"))

In [19]:
data.to_pickle("../data/IPCC/full_data_without_emotions.pkl")

In [20]:
data.groupby(["workgroup","after_release"])["date"].count()

workgroup  after_release
WG1        False             1336
           True             32159
WG2        False              977
           True             13519
Name: date, dtype: int64

# Quick explo

In [9]:
data.shape

(47991, 20)

In [10]:
data["source_file"].value_counts()

IPCC1    26471
IPCC2    10407
GIEC1     7024
GIEC2     4089
Name: source_file, dtype: int64

In [12]:
data[["clean_sentiment"]].to_csv("../data/IPCC/full_data_only_text.csv",index = False)

# Add emotions

In [23]:
folder = "../data/IPCC"
files = os.listdir(folder)
files = [os.path.join(folder,x) for x in files if x.startswith("emotion") and "emoji" not in x]
files

['../data/IPCC\\emotions_emotion.csv',
 '../data/IPCC\\emotions_irony.csv',
 '../data/IPCC\\emotions_sentiment.csv',
 '../data/IPCC\\emotions_stance-climate.csv']

In [29]:
data = pd.read_pickle("../data/IPCC/full_data_without_emotions.pkl")

In [33]:
all_tasks = []

for task in ["emotion","irony","sentiment","stance-climate"]:
    task_data = pd.read_csv(os.path.join(folder,f"emotions_{task}.csv"))
    task_data = task_data.iloc[:,1:]
    task_data[task.replace("-","_")] = task.replace("-","_") + "_" + task_data.idxmax(axis = 1)
    task_data.columns = [(task.replace("-","_") + "_" + x) if not x.startswith(task) else x for x in task_data.columns]
    
    all_tasks.append(task_data)
all_tasks = pd.concat(all_tasks,axis = 1)

In [36]:
data = pd.concat([data,all_tasks],axis = 1)

In [37]:
data

Unnamed: 0,url,date,content,id,replyCount,retweetCount,likeCount,quoteCount,conversationId,lang,outlinks,hashtags,username,clean_bertopic,clean_sentiment,likeCat,retweetCat,vader_sentiment_class,vader_sentiment_score,source_file,hashtag,workgroup,after_release,emotion_anger,emotion_joy,emotion_optimism,emotion_sadness,emotion,irony_non_irony,irony,sentiment_negative,sentiment_neutral,sentiment_positive,sentiment,stance_climate_none,stance_climate_against,stance_climate_favor,stance_climate_stance_climate
0,https://twitter.com/7VAMPIR/status/14270552921...,2021-08-15 23:51:09+00:00,#IPCCReport2021 #IPCC\n#Klimakatastrophe \nhtt...,1427055292148236290,0,0,0,0,1427055292148236290,und,[https://youtu.be/r7CicXavjH8],"[IPCCReport2021, IPCC, Klimakatastrophe]",7VAMPIR,IPCCReport2021 IPCC Klimakatastrophe,IPCCReport2021 IPCC Klimakatastrophe,<5,<5,neutral,0.0000,IPCC_WG1,IPCC,WG1,True,0.317594,0.238831,0.055110,0.388464,emotion_sadness,0.473522,irony_irony,0.030126,0.941534,0.028340,sentiment_neutral,0.046032,0.035799,0.918169,stance_climate_favor
1,https://twitter.com/BrianMcHugh2011/status/142...,2021-08-15 23:50:26+00:00,We get to decide our future.\n\nIt's up to us ...,1427055109591142403,0,0,1,0,1427055109591142403,en,,"[wildfires, Canada, ClimateEmergency, NoPlanet...",BrianMcHugh2011,We get to decide our future. It's up to us to ...,We get to decide our future. It's up to us to ...,<5,<5,neutral,0.4939,IPCC_WG1,IPCC,WG1,True,0.064742,0.073959,0.509968,0.351330,emotion_optimism,0.911690,irony_non_irony,0.074137,0.727112,0.198751,sentiment_neutral,0.017307,0.042754,0.939939,stance_climate_favor
2,https://twitter.com/KenyaEmissionsT/status/142...,2021-08-15 23:49:54+00:00,Reducing GHG emissions for developing countrie...,1427054977541935112,0,0,0,0,1427054977541935112,en,,"[EmissionsTradingSystemKenya, CarbonPricing, I...",KenyaEmissionsT,Reducing GHG emissions for developing countrie...,Reducing GHG emissions for developing countrie...,<5,<5,neutral,0.9246,IPCC_WG1,IPCC,WG1,True,0.077136,0.431390,0.404921,0.086553,emotion_joy,0.907990,irony_non_irony,0.010140,0.198305,0.791555,sentiment_positive,0.017881,0.039218,0.942901,stance_climate_favor
3,https://twitter.com/fedwer/status/142705395594...,2021-08-15 23:45:51+00:00,Lo que duró la atención que la TV estadouniden...,1427053955947941894,0,0,0,0,1427053955947941894,es,[https://twitter.com/AlexSteffen/status/142700...,"[IPCC, crisisclimatica, disociacioncognitiva]",fedwer,Lo que duró la atención que la TV estadouniden...,Lo que duró la atención que la TV estadouniden...,<5,<5,neutral,0.0000,IPCC_WG1,IPCC,WG1,True,0.066074,0.054523,0.077113,0.802290,emotion_sadness,0.837189,irony_non_irony,0.023452,0.938772,0.037776,sentiment_neutral,0.018587,0.039982,0.941431,stance_climate_favor
4,https://twitter.com/KyleaTink/status/142705044...,2021-08-15 23:31:54+00:00,“As scientists we’ve done our job” …. The #IPC...,1427050447903150082,0,2,6,0,1427050447903150082,en,[https://7ampodcast.com.au/episodes/a-climate-...,"[IPCC, ClimateReport, now]",KyleaTink,“As scientists we’ve done our job” …. The IPCC...,“As scientists we’ve done our job” …. The IPCC...,<50,<5,neutral,0.0000,IPCC_WG1,IPCC,WG1,True,0.077985,0.136410,0.720307,0.065298,emotion_optimism,0.852641,irony_non_irony,0.023922,0.619700,0.356378,sentiment_neutral,0.017357,0.041768,0.940875,stance_climate_favor
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47986,https://twitter.com/GARREAU75/status/149565828...,2022-02-21 07:14:58+00:00,#Rapport du #Giec : voici comment la #France p...,1495658288226377730,0,0,1,0,1495658288226377730,fr,,"[Rapport, Giec, France, changement, climatique]",GARREAU75,Rapport du Giec : voici comment la France pour...,Rapport du Giec : voici comment la France pour...,<5,<5,neutral,0.0000,GIEC_WG2,GIEC,WG2,False,0.045647,0.858050,0.063657,0.032645,emotion_joy,0.919710,irony_non_irony,0.013777,0.935062,0.051160,sentiment_neutral,0.953082,0.016740,0.030177,stance_climate_none
47987,https://twitter.com/GARREAU75/status/149565802...,2022-02-21 07:13:55+00:00,#Rapport du #Giec : voici comment la #France p...,1495658023326523395,0,2,2,0,1495658023326523395,fr,,"[Rapport, Giec, France, changement, climatique]",GARREAU75,Rapport du Giec : voici comment la France pour...,Rapport du Giec : voici comment la France pour...,<5,<5,neutral,0.0000,GIEC_WG2,GIEC,WG2,False,0.054116,0.813007,0.093716,0.039161,emotion_joy,0.882576,irony_non_irony,0.013084,0.933354,0.053562,sentiment_neutral,0.952036,0.016551,0.031413,stance_climate_none
47988,https://twitter.com/GARREAU75/status/149565773...,2022-02-21 07:12:47+00:00,#Rapport du #Giec : voici comment la #France p...,1495657739925823490,0,1,2,0,1495657739925823490,fr,[https://www.novethic.fr/actualite/environneme...,"[Rapport, Giec, France, changement, climatique]",GARREAU75,Rapport du Giec : voici comment la France pour...,Rapport du Giec : voici comment la France pour...,<5,<5,neutral,0.0000,GIEC_WG2,GIEC,WG2,False,0.054116,0.813007,0.093716,0.039161,emotion_joy,0.882576,irony_non_irony,0.013084,0.933354,0.053562,sentiment_neutral,0.952036,0.016551,0.031413,stance_climate_none
47989,https://twitter.com/PCF_82/status/149564711278...,2022-02-21 06:30:34+00:00,#Climat\n💪 Avec @Fabien_Roussel la France se d...,1495647112784191491,1,14,21,0,1495647112784191491,fr,[https://www.fabienroussel2022.fr/47_la_france...,"[Climat, GIEC, JoursHeureux]",PCF_82,Climat 💪 Avec Fabien_Roussel la France se dote...,Climat 💪 Avec @user la France se dotera d'une ...,<50,<50,neutral,0.0000,GIEC_WG2,GIEC,WG2,False,0.045427,0.857424,0.060798,0.036351,emotion_joy,0.914907,irony_non_irony,0.013665,0.937661,0.048674,sentiment_neutral,0.955179,0.017443,0.027378,stance_climate_none


In [65]:
data.to_pickle("../data/IPCC/full_data.pkl")