In [None]:
%pip install "altair"
%pip install "altair_viewer"
%pip install "pandas"

In [3]:
import pandas as pd
import altair as alt

alt.data_transformers.disable_max_rows()

data = pd.read_csv("environment-raw-2021.csv", skiprows=3)

Posed Question: How does deforestation impact CO₂ emissions over time? - Let's focus on the United States first

Below, I am fixing the data so that I can split each indvidual year into its own row. This way it's easier to use this data in a way that altair can read.

In [8]:
CO2_US_emissions = data[data['Indicator Name'] == 'Total greenhouse gas emissions (kt of CO2 equivalent)']
CO2_US_emissions = CO2_US_emissions[data['Country Name'] == 'United States'].dropna(axis=1)
CO2_US_emissions = CO2_US_emissions.melt(id_vars=['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code'],
                   var_name='Year', value_name='CO2 emissions (kt)')

CO2_US_emissions.head()

  CO2_US_emissions = CO2_US_emissions[data['Country Name'] == 'United States'].dropna(axis=1)


Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,Year,CO2 emissions (kt)
0,United States,USA,Total greenhouse gas emissions (kt of CO2 equi...,EN.ATM.GHGT.KT.CE,1970,5400503.5
1,United States,USA,Total greenhouse gas emissions (kt of CO2 equi...,EN.ATM.GHGT.KT.CE,1971,5440420.62
2,United States,USA,Total greenhouse gas emissions (kt of CO2 equi...,EN.ATM.GHGT.KT.CE,1972,5704297.25
3,United States,USA,Total greenhouse gas emissions (kt of CO2 equi...,EN.ATM.GHGT.KT.CE,1973,5912971.38
4,United States,USA,Total greenhouse gas emissions (kt of CO2 equi...,EN.ATM.GHGT.KT.CE,1974,5756864.1


Now I am setting up the graph to see the variation of Greenhouse Gas Emissions (in the U.S.) per year. I used a line graph, but this graph doesn't show an cumulative total

In [9]:
chart1 = alt.Chart(CO2_US_emissions).mark_line().encode(
    x=alt.X('Year:O', title='Year'),
    y=alt.Y('CO2 emissions (kt):Q', title='CO2 emissions (kt)'),
).properties(
    title='Total Greenhouse Gas Emissions in the United States (kt of CO2 equivalent)',
    width=700,
    height=400
)

chart1.show()

Now it's time to see the amount of forest area, and see if it correlates.

In [10]:
forest_land = data[data['Indicator Code'] == 'AG.LND.FRST.K2']
forest_land = forest_land[forest_land['Country Name'] == 'United States'].dropna(axis=1)
forest_land = forest_land.melt(id_vars=['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code'],
                   var_name='Year', value_name='Forest_area')

forest_land.head()

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,Year,Forest_area
0,United States,USA,Forest area (sq. km),AG.LND.FRST.K2,1990,3024500.0
1,United States,USA,Forest area (sq. km),AG.LND.FRST.K2,1991,3025586.0
2,United States,USA,Forest area (sq. km),AG.LND.FRST.K2,1992,3026672.0
3,United States,USA,Forest area (sq. km),AG.LND.FRST.K2,1993,3027758.0
4,United States,USA,Forest area (sq. km),AG.LND.FRST.K2,1994,3028844.0


Below I started creating the graph on the overall changes in forest area within the U.S. and saw that it was pretty hard to see any differences in the amount over the years. It shows a slight increase, but doesn't really show any correlation with the previous graph, so there was no conclusions to really be drawn here. 

In [104]:
chart2 = alt.Chart(forest_land).mark_line().encode(
    x=alt.X('Year:O', title='Year'),
    y=alt.Y('Forest_area:Q', title='Forest Area (sq. km)'),
).properties(
    title='Forest Area in the United States (sq. km)',
    width=700,
    height=400,
).configure_mark(
    color='green'
)

chart2.show()

Posed Question: What is the trend of greenhouse gas emissions in different sectors (agriculture, industry, transport)?
Which sectors contribute the most to CO₂ emissions over time?


Setting up the data for the U.S.

In [94]:
#Create US_data DataFrame
US_data = data[data['Country Name'] == 'United States']

# Filter for CO2 emissions
CO2_US_data = US_data[
    (US_data['Indicator Code'] == 'EN.CO2.TRAN.ZS') |
    (US_data['Indicator Code'] == 'EN.CO2.OTHX.ZS') |
    (US_data['Indicator Code'] == 'EN.CO2.MANF.ZS') |
    (US_data['Indicator Code'] == 'EN.CO2.ETOT.ZS') |
    (US_data['Indicator Code'] == 'EN.CO2.BLDG.ZS')
].dropna(axis=1)
CO2_US_data = CO2_US_data.melt(id_vars=['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code'],
                   var_name='Year', value_name='CO2 emissions (% of total)')

CO2_US_data['Indicator Name'] = CO2_US_data['Indicator Name'].replace(
    {
        'CO2 emissions from transport (% of total fuel combustion)': 'Transportation',
        'CO2 emissions from other sectors, excluding residential buildings and commercial and public services (% of total fuel combustion)': 'Other Sectors',
        'CO2 emissions from manufacturing industries and construction (% of total fuel combustion)': 'Manufacturing',
        'CO2 emissions from electricity and heat production, total (% of total fuel combustion)': 'Electricity and Heat',
        'CO2 emissions from residential buildings and commercial and public services (% of total fuel combustion)': 'Residential Buildings, Commercial & public Services'
    }
)

CO2_US_data.reset_index()

CO2_US_data.head()

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,Year,CO2 emissions (% of total)
0,United States,USA,Transportation,EN.CO2.TRAN.ZS,1960,23.731404
1,United States,USA,Other Sectors,EN.CO2.OTHX.ZS,1960,1.908181
2,United States,USA,Manufacturing,EN.CO2.MANF.ZS,1960,27.184828
3,United States,USA,Electricity and Heat,EN.CO2.ETOT.ZS,1960,27.635213
4,United States,USA,"Residential Buildings, Commercial & public Ser...",EN.CO2.BLDG.ZS,1960,19.540373


Below I've created a stacked bar chart to visualize the CO2 emissions by sector in the United States. The chart shows the percentage of total CO2 emissions for each sector over the years.

In [90]:
# Create a Stacked bar char for CO2 emissions by sector

sector_colors = {
    'Transportation': '#2ca02c',  # Green
    'Other Sectors': '#d62728',  # Red
    'Manufacturing': '#ff7f0e',  # Orange
    'Electricity and Heat': '#1f77b4',  # Blue
    'Residential Buildings, Commercial & public Services': '#17becf'  # Light Blue
}

options = list(sector_colors.keys())
labels = [option + ' ' for option in options]

input_radio = alt.binding_radio(
    options=options + [None],
    labels=labels + ['All'],
    name='Sector: '
)
selection = alt.selection_point(
    fields=['Indicator Name'], 
    bind=input_radio,
)

chart3 = alt.Chart(CO2_US_data).mark_bar().encode(
    x=alt.X('Year:O', title='Year'),
    y=alt.Y('CO2 emissions (% of total)', title='CO2 emissions (% of total)', scale=alt.Scale(domain=[0, 100])),
    color=alt.Color(
        'Indicator Name',
        title='Sector',
        scale=alt.Scale(domain=list(sector_colors.keys()), range=list(sector_colors.values()))  # Set consistent colors
    ),
    tooltip=['Indicator Name:N', 'CO2 emissions (% of total)'],
).transform_filter(
    selection
).properties(
    title='CO2 Emissions by Sector in the United States (% of total)',
    width=700,
    height=400,
).configure_legend(
    labelLimit=0
).add_params(
    selection
)

chart3

Below I made the same graph for Argentina

In [None]:
Argentina_data = data[data['Country Name'] == 'Argentina']

# Filter for CO2 emissions
CO2_Arg_data = Argentina_data[
    (Argentina_data['Indicator Code'] == 'EN.CO2.TRAN.ZS') |
    (Argentina_data['Indicator Code'] == 'EN.CO2.OTHX.ZS') |
    (Argentina_data['Indicator Code'] == 'EN.CO2.MANF.ZS') |
    (Argentina_data['Indicator Code'] == 'EN.CO2.ETOT.ZS') |
    (Argentina_data['Indicator Code'] == 'EN.CO2.BLDG.ZS')
].dropna(axis=1)

CO2_Arg_data = CO2_Arg_data.melt(id_vars=['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code'],
                   var_name='Year', value_name='CO2 emissions (% of total)')

CO2_Arg_data['Indicator Name'] = CO2_Arg_data['Indicator Name'].replace(
    {
        'CO2 emissions from transport (% of total fuel combustion)': 'Transportation',
        'CO2 emissions from other sectors, excluding residential buildings and commercial and public services (% of total fuel combustion)': 'Other Sectors',
        'CO2 emissions from manufacturing industries and construction (% of total fuel combustion)': 'Manufacturing',
        'CO2 emissions from electricity and heat production, total (% of total fuel combustion)': 'Electricity and Heat',
        'CO2 emissions from residential buildings and commercial and public services (% of total fuel combustion)': 'Residential Buildings, Commercial & public Services'
    }
)

CO2_Arg_data.reset_index()

sector_colors = {
    'Transportation': '#2ca02c',  # Green
    'Other Sectors': '#d62728',  # Red
    'Manufacturing': '#ff7f0e',  # Orange
    'Electricity and Heat': '#1f77b4',  # Blue
    'Residential Buildings, Commercial & public Services': '#17becf'  # Light Blue
}

options = list(sector_colors.keys())
labels = [option + ' ' for option in options]

input_radio = alt.binding_radio(
    options=options + [None],
    labels=labels + ['All'],
    name='Sector: '
)
selection = alt.selection_point(
    fields=['Indicator Name'], 
    bind=input_radio,
)

chart4 = alt.Chart(CO2_Arg_data).mark_bar().encode(
    x=alt.X('Year:O', title='Year'),
    y=alt.Y('CO2 emissions (% of total)', title='CO2 emissions (% of total)', scale=alt.Scale(domain=[0, 100])),
    color=alt.Color(
        'Indicator Name',
        title='Sector',
        scale=alt.Scale(domain=list(sector_colors.keys()), range=list(sector_colors.values()))  # Set consistent colors
    ),
    tooltip=['Indicator Name:N', 'CO2 emissions (% of total)'],
).transform_filter(
    selection
).properties(
    title='CO2 Emissions by Sector in Argentina (% of total)',
    width=700,
    height=400,
).configure_legend(
    labelLimit=0
).add_params(
    selection
)

chart4

And again for China

In [100]:
China_data = data[data['Country Name'] == 'China']

# Filter for CO2 emissions
CO2_China_data = China_data[
    (China_data['Indicator Code'] == 'EN.CO2.TRAN.ZS') |
    (China_data['Indicator Code'] == 'EN.CO2.OTHX.ZS') |
    (China_data['Indicator Code'] == 'EN.CO2.MANF.ZS') |
    (China_data['Indicator Code'] == 'EN.CO2.ETOT.ZS') |
    (China_data['Indicator Code'] == 'EN.CO2.BLDG.ZS')
].dropna(axis=1)

CO2_China_data = CO2_China_data.melt(id_vars=['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code'],
                   var_name='Year', value_name='CO2 emissions (% of total)')

CO2_China_data['Indicator Name'] = CO2_China_data['Indicator Name'].replace(
    {
        'CO2 emissions from transport (% of total fuel combustion)': 'Transportation',
        'CO2 emissions from other sectors, excluding residential buildings and commercial and public services (% of total fuel combustion)': 'Other Sectors',
        'CO2 emissions from manufacturing industries and construction (% of total fuel combustion)': 'Manufacturing',
        'CO2 emissions from electricity and heat production, total (% of total fuel combustion)': 'Electricity and Heat',
        'CO2 emissions from residential buildings and commercial and public services (% of total fuel combustion)': 'Residential Buildings, Commercial & public Services'
    }
)

CO2_China_data.reset_index()

sector_colors = {
    'Transportation': '#2ca02c',  # Green
    'Other Sectors': '#d62728',  # Red
    'Manufacturing': '#ff7f0e',  # Orange
    'Electricity and Heat': '#1f77b4',  # Blue
    'Residential Buildings, Commercial & public Services': '#17becf'  # Light Blue
}

options = list(sector_colors.keys())
labels = [option + ' ' for option in options]

input_radio = alt.binding_radio(
    options=options + [None],
    labels=labels + ['All'],
    name='Sector: '
)
selection = alt.selection_point(
    fields=['Indicator Name'], 
    bind=input_radio,
)

chart5 = alt.Chart(CO2_China_data).mark_bar().encode(
    x=alt.X('Year:O', title='Year'),
    y=alt.Y('CO2 emissions (% of total)', title='CO2 emissions (% of total)', scale=alt.Scale(domain=[0, 100])),
    color=alt.Color(
        'Indicator Name',
        title='Sector',
        scale=alt.Scale(domain=list(sector_colors.keys()), range=list(sector_colors.values()))  # Set consistent colors
    ),
    tooltip=['Indicator Name:N', 'CO2 emissions (% of total)'],
).transform_filter(
    selection
).properties(
    title='CO2 Emissions by Sector in China (% of total)',
    width=700,
    height=400,
).configure_legend(
    labelLimit=0
).add_params(
    selection
)

chart5

Lastly, the United Kingdom

In [105]:
UK_data = data[data['Country Name'] == 'United Kingdom']

# Filter for CO2 emissions
CO2_UK_data = UK_data[
    (UK_data['Indicator Code'] == 'EN.CO2.TRAN.ZS') |
    (UK_data['Indicator Code'] == 'EN.CO2.OTHX.ZS') |
    (UK_data['Indicator Code'] == 'EN.CO2.MANF.ZS') |
    (UK_data['Indicator Code'] == 'EN.CO2.ETOT.ZS') |
    (UK_data['Indicator Code'] == 'EN.CO2.BLDG.ZS')
].dropna(axis=1)

CO2_UK_data = CO2_UK_data.melt(id_vars=['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code'],
                   var_name='Year', value_name='CO2 emissions (% of total)')

CO2_UK_data['Indicator Name'] = CO2_UK_data['Indicator Name'].replace(
    {
        'CO2 emissions from transport (% of total fuel combustion)': 'Transportation',
        'CO2 emissions from other sectors, excluding residential buildings and commercial and public services (% of total fuel combustion)': 'Other Sectors',
        'CO2 emissions from manufacturing industries and construction (% of total fuel combustion)': 'Manufacturing',
        'CO2 emissions from electricity and heat production, total (% of total fuel combustion)': 'Electricity and Heat',
        'CO2 emissions from residential buildings and commercial and public services (% of total fuel combustion)': 'Residential Buildings, Commercial & public Services'
    }
)

CO2_UK_data.reset_index()

sector_colors = {
    'Transportation': '#2ca02c',  # Green
    'Other Sectors': '#d62728',  # Red
    'Manufacturing': '#ff7f0e',  # Orange
    'Electricity and Heat': '#1f77b4',  # Blue
    'Residential Buildings, Commercial & public Services': '#17becf'  # Light Blue
}

options = list(sector_colors.keys())
labels = [option + ' ' for option in options]

input_radio = alt.binding_radio(
    options=options + [None],
    labels=labels + ['All'],
    name='Sector: '
)
selection = alt.selection_point(
    fields=['Indicator Name'], 
    bind=input_radio,
)

chart6 = alt.Chart(CO2_UK_data).mark_bar().encode(
    x=alt.X('Year:O', title='Year'),
    y=alt.Y('CO2 emissions (% of total)', title='CO2 emissions (% of total)', scale=alt.Scale(domain=[0, 100])),
    color=alt.Color(
        'Indicator Name',
        title='Sector',
        scale=alt.Scale(domain=list(sector_colors.keys()), range=list(sector_colors.values()))  # Set consistent colors
    ),
    tooltip=['Indicator Name:N', 'CO2 emissions (% of total)'],
).transform_filter(
    selection
).properties(
    title='CO2 Emissions by Sector in The United Kingdom (% of total)',
    width=700,
    height=400,
).configure_legend(
    labelLimit=0
).add_params(
    selection
)

chart6