In [3]:
# auto reload modules
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
import os
from dotenv import load_dotenv

load_dotenv()
import re
from datetime import datetime
from pathlib import Path


import geopandas as gpd
import numpy as np
import pandas as pd
import pycountry
from IPython.core.display_functions import display
from IPython.display import Markdown
from wbgapi import economy
from wordcloud import STOPWORDS
import altair as alt

from utils import (
    create_stacked_chart,
    create_choropleth_map,
    plot_ngrams,
    plot_submission_type_frequencies,
    create_geodataframe,
    preprocess_concept_df,
    process_spans,
    create_geodataframe_disaggregated,
)

In [None]:
import warnings

warnings.filterwarnings("ignore")

In [None]:
concept = "renewables"
date_of_nb = datetime.now().strftime("%d-%m-%Y")
formatted_concept = concept.replace("-", " ").title()

In [None]:
display(Markdown(f"# GST Report on {formatted_concept}"))
display(Markdown("## Summary"))
display(
    Markdown(
        f"This report contains summary statistics and visualisations for all identified mentions of {formatted_concept} across UNFCCC input documents, as of {date_of_nb}. Highlights from this report: Loss and Damage in UNFCCC input documents appear most frequently alongside mentions of X (context), in document type X and by Party members, particularly those located in the X region."
    )
)
display(
    Markdown(
        "[Link to Methodology](https://www.notion.so/climatepolicyradar/Concept-tracker-internal-a879dfc5c2fd49159838af86cd5e8955)"
    )
)
display(
    Markdown(
        "[Link to Linguistic input file](https://github.com/climatepolicyradar/global-stocktake/tree/main/concepts/fossil-fuels/input.xlsx)"
    )
)

In [None]:
# set the display options to allow resizing columns
pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)

In [None]:
concepts_path = Path().absolute().parent / "concepts"

In [None]:
date_of_nb = datetime.now().strftime("%d-%m-%Y")
df_concepts = pd.read_excel(concepts_path / concept / "output_with_metadata.xlsx")
df_spans = pd.read_csv(concepts_path / concept / "spans.csv")
date_of_nb = datetime.today().strftime("%d-%m-%Y")
df_input = pd.read_excel(concepts_path / concept / "input.xlsx")

In [None]:
scraper_csv_path = os.getenv("SCRAPER_CSV_PATH", "scraper_csv")

In [None]:
df_eco = pd.DataFrame(economy.list())
df_worldbank = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))

In [None]:
df_concepts_processed = preprocess_concept_df(df_concepts, df_worldbank, df_eco)

In [None]:
df_worldbank = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))
df_world_economics = pd.DataFrame(economy.list())
df_concepts_processed = preprocess_concept_df(
    df_concepts, df_worldbank, df_world_economics
)
df_worldbank = df_worldbank.rename(columns={"name": "country"})

In [None]:
df_spans = process_spans(df_spans, df_concepts_processed)

In [None]:
df_concepts_geoplot = create_geodataframe_disaggregated(
    df_concepts_processed, df_worldbank
)

In [None]:
df_spans = process_spans(df_spans, df_concepts_processed)

In [None]:
display(
    Markdown(
        f"## UNFCCC Party members mentioning {concept.replace('-', ' ').title()} across all input documents"
    )
)

In [None]:
# Iterate over the unique concepts and create a separate choropleth map for each
df_concepts_geoplot = df_concepts_geoplot[~df_concepts_geoplot.Concept.isna()]
unique_concepts = sorted(df_concepts_geoplot["Concept"].unique())
for conc in unique_concepts:
    choropleth_map = create_choropleth_map(df_concepts_geoplot, conc)
    choropleth_map.display()

In [None]:
total_stakeholders = df_concepts_processed["Author"].nunique()
num_stakeholders_mentioning = (
    df_concepts_processed[df_concepts_processed.value == 1]
    .groupby(["Concept", "Author Type"])["Author"]
    .nunique()
)

In [None]:
percentage_stakeholders_mentioning = (
    num_stakeholders_mentioning / total_stakeholders * 100
)
num_stakeholders_mentioning = num_stakeholders_mentioning.rename("count").reset_index()
percentage_stakeholders_mentioning = percentage_stakeholders_mentioning.rename(
    "percentage"
).reset_index()
df_counts_and_percentages = num_stakeholders_mentioning.merge(
    percentage_stakeholders_mentioning, on=["Concept", "Author Type"]
)

In [None]:
display(
    Markdown(
        f"## Number and percentage of UNFCCC input documents that mention {concept.replace('-', ' ').title()} by Party type"
    )
)

In [None]:
create_stacked_chart(df_counts_and_percentages)

In [None]:
display(
    Markdown(
        f"## Document types that mention {concept.replace('-', ' ').title()} in submitted documents"
    )
)

In [None]:
NUM_TOP_SUBMISSION_TYPES = 5
plot_submission_type_frequencies(df_concepts_processed, NUM_TOP_SUBMISSION_TYPES)

# Frequent Word Combinations

In [None]:
import nltk

nltk.download("punkt", quiet=True);

## N-gram tables

In [None]:
from IPython.display import display, HTML

help_text = """
An n-gram is a contiguous sequence of n items from a given sample of text or speech. Here, 'n' can be any integer. When n is 1, we refer to it as a "unigram". Similarly, a 2-gram (bigram) is a two-word sequence of words like "please turn", "turn your", and so on, and a 3-gram (trigram) is a three-word sequence of words like "please turn your", "turn your computer", etc.
"""

html = """
<div style="position:relative;">
    <span style="font-size: 24px; font-weight: bold;">?</span>
    <a style="text-decoration:none;color:blue;display:inline-block;" title="{0}">
        What is an n-gram?
    </a>
    <span style="font-size: 24px; font-weight: bold;">?</span>
</div>
""".format(
    help_text
)

display(HTML(html))

In [25]:
plot_ngrams(df_spans)

## Top 10 bigrams and trigrams (frequent word combinations) relating to Hydropower across UNFCCC input documents


Unnamed: 0,Total,Total Bigrams,trigram,Total Trigrams
0,"(power, plants)",521,"(hydroelectric, power, plants)",153
1,"(hydroelectric, power)",402,"(renewable, energy, sources)",126
2,"(renewable, energy)",385,"(hydroelectric, power, stations)",80
3,"(hydropower, plants)",319,"(hydro, power, plants)",80
4,"(hydro, power)",250,"(thermal, power, plants)",79
5,"(hydroelectric, plants)",210,"(small, hydropower, plants)",51
6,"(electricity, generation)",201,"(hydroelectric, power, station)",48
7,"(energy, sources)",198,"(total, installed, capacity)",43
8,"(climate, change)",171,"(solar, power, plants)",36
9,"(per, cent)",165,"(small, hydroelectric, power)",31


## Top 10 bigrams and trigrams (frequent word combinations) relating to Wind Energy across UNFCCC input documents


Unnamed: 0,Total,Total Bigrams,trigram,Total Trigrams
0,"(wind, power)",969,"(offshore, wind, farms)",108
1,"(wind, energy)",816,"(wind, power, plants)",97
2,"(offshore, wind)",688,"(offshore, wind, power)",84
3,"(wind, farms)",357,"(offshore, wind, energy)",75
4,"(wind, turbines)",301,"(renewable, energy, sources)",73
5,"(renewable, energy)",292,"(solar, wind, energy)",72
6,"(power, plants)",224,"(solar, wind, power)",67
7,"(onshore, wind)",200,"(wind, power, generation)",50
8,"(wind, farm)",171,"(floating, offshore, wind)",43
9,"(solar, wind)",150,"(potential, wind, energy)",42


## Top 10 bigrams and trigrams (frequent word combinations) relating to Solar Energy across UNFCCC input documents


Unnamed: 0,Total,Total Bigrams,trigram,Total Trigrams
0,"(solar, PV)",1442,"(renewable, energy, sources)",282
1,"(solar, energy)",1395,"(solar, water, heaters)",248
2,"(renewable, energy)",1298,"(solar, power, plant)",226
3,"(solar, power)",1000,"(solar, power, plants)",184
4,"(solar, photovoltaic)",586,"(photovoltaic, solar, power)",170
5,"(power, plants)",526,"(solar, PV, systems)",165
6,"(solar, water)",483,"(MW, photovoltaic, solar)",152
7,"(solar, thermal)",423,"(solar, water, heating)",135
8,"(wind, solar)",406,"(use, renewable, energy)",114
9,"(energy, sources)",404,"(renewable, energy, technologies)",111


## Top 10 bigrams and trigrams (frequent word combinations) relating to Bioenergy across UNFCCC input documents


Unnamed: 0,Total,Total Bigrams,trigram,Total Trigrams
0,"(biomass, burning)",1052,"(emissions, biomass, burning)",358
1,"(renewable, energy)",1008,"(2006, IPCC, Guidelines)",321
2,"(carbon, stocks)",899,"(dead, organic, matter)",298
3,"(CO₂, emissions)",704,"(renewable, energy, sources)",263
4,"(GHG, emissions)",694,"(biomass, carbon, stocks)",253
5,"(emissions, biomass)",675,"(CO₂, emissions, biomass)",199
6,"(forest, land)",636,"(carbon, stocks, biomass)",191
7,"(living, biomass)",601,"(change, carbon, stocks)",161
8,"(landfill, gas)",596,"(carbon, stocks, due)",149
9,"(per, cent)",530,"(carbon, stock, changes)",144


## Top 10 bigrams and trigrams (frequent word combinations) relating to Geothermal Energy across UNFCCC input documents


Unnamed: 0,Total,Total Bigrams,trigram,Total Trigrams
0,"(geothermal, energy)",404,"(renewable, energy, sources)",72
1,"(renewable, energy)",227,"(use, renewable, energy)",24
2,"(energy, sources)",139,"(geothermal, energy, sources)",19
3,"(geothermal, resources)",96,"(potential, geothermal, energy)",17
4,"(geothermal, power)",94,"(Geothermal, Power, Plant)",17
5,"(solar, energy)",61,"(geothermal, power, plants)",15
6,"(Geothermal, energy)",60,"(use, geothermal, energy)",14
7,"(wind, energy)",58,"(geothermal, power, plant)",14
8,"(per, cent)",54,"(renewable, energy, resources)",14
9,"(natural, gas)",53,"(geothermal, energy, development)",12


## Top 10 bigrams and trigrams (frequent word combinations) relating to Renewable Energy across UNFCCC input documents


Unnamed: 0,Total,Total Bigrams,trigram,Total Trigrams
0,"(share, renewables)",145,"(final, energy, consumption)",45
1,"(energy, efficiency)",125,"(renewables, electricity, generation)",23
2,"(renewable, energy)",122,"(share, renewables, energy)",23
3,"(electricity, generation)",98,"(increasing, share, renewables)",22
4,"(energy, consumption)",90,"(share, renewables, electricity)",19
5,"(renewables, share)",74,"(renewables, energy, efficiency)",18
6,"(natural, gas)",70,"(greenhouse, gas, emissions)",16
7,"(renewables, energy)",69,"(The, share, renewables)",15
8,"(per, cent)",64,"(Renewable, Energy, Directive)",15
9,"(fossil, fuels)",63,"(natural, gas, renewables)",14


## Top 10 bigrams and trigrams (frequent word combinations) relating to Heat Pumps across UNFCCC input documents


Unnamed: 0,Total,Total Bigrams,trigram,Total Trigrams
0,"(source, heat)",19,"(air, source, heat)",11
1,"(air, source)",11,"(source, heat, pumps)",11
2,"(heat, pumps)",11,"(source, heat, pump)",8
3,"(heat, pump)",9,"(ground, source, heat)",5
4,"(ground, source)",6,"(Boiler, Upgrade, Scheme)",4
5,"(Boiler, Upgrade)",4,"(will, provide, grants)",4
6,"(Upgrade, Scheme)",4,"(million, Boiler, Upgrade)",2
7,"(will, provide)",4,"(We, providing, subsidy)",2
8,"(provide, grants)",4,"(providing, subsidy, first)",2
9,"(energy, systems)",3,"(subsidy, first, movers)",2
