# Initial EDA of the Dataset


## Plot the global Temperature Increase and CO2 Emission over the years

In [57]:
import pandas as pd
import plotly.express as px
import altair as alt
import numpy as np

In [6]:
df = pd.read_csv("../data/raw/owid-co2-data.csv")
df.head()

Unnamed: 0,country,year,iso_code,population,gdp,cement_co2,cement_co2_per_capita,co2,co2_growth_abs,co2_growth_prct,...,share_global_other_co2,share_of_temperature_change_from_ghg,temperature_change_from_ch4,temperature_change_from_co2,temperature_change_from_ghg,temperature_change_from_n2o,total_ghg,total_ghg_excluding_lucf,trade_co2,trade_co2_share
0,Afghanistan,1850,AFG,3752993.0,,,,,,,...,,,,,,,,,,
1,Afghanistan,1851,AFG,3767956.0,,,,,,,...,,0.165,0.0,0.0,0.0,0.0,,,,
2,Afghanistan,1852,AFG,3783940.0,,,,,,,...,,0.164,0.0,0.0,0.0,0.0,,,,
3,Afghanistan,1853,AFG,3800954.0,,,,,,,...,,0.164,0.0,0.0,0.0,0.0,,,,
4,Afghanistan,1854,AFG,3818038.0,,,,,,,...,,0.163,0.0,0.0,0.0,0.0,,,,


In [7]:
# Group by year
df_year = (
    df.groupby("year")
    .aggregate({"co2": "sum", "temperature_change_from_co2": "mean"})
    .reset_index()
)
df_year.head()

Unnamed: 0,year,co2,temperature_change_from_co2
0,1750,55.836,
1,1751,56.442,
2,1752,57.03,
3,1753,57.66,
4,1754,58.404,


In [15]:
# px.line(df_year, x="year", y="co2", title="CO2 emissions over time")
alt.Chart(df_year).mark_line().encode(
    x=alt.X("year", title="Year"), y=alt.Y("co2", title="CO2 Emissions")
).properties(title="Global CO2 emissions over time", width=800, height=400)

In [17]:
alt.Chart(df_year).mark_line().encode(
    x=alt.X("year", title="Year"),
    y=alt.Y("temperature_change_from_co2", title="Temperature Change"),
).properties(title="Global temperature change over time", width=800, height=400)

In [48]:
df_year.shape[0]

171

In [67]:
start_year = 1850
end_year = 2024

co2_color = "red"
temp_color = "blue"

df_year = (
    df.query(f"{start_year} <= year <= {end_year}")
    .groupby("year")
    .aggregate({"co2": "sum", "temperature_change_from_co2": "mean"})
    .reset_index()
    .dropna()
)

base = alt.Chart(df_year).encode(
    alt.X("year:O", title="Year").axis(
        labelAngle=0, values=list(np.linspace(start_year, end_year, 20).astype(int))
    )
)

co2_line = base.mark_line(color=co2_color).encode(
    y=alt.Y("co2").title("CO2 Emissions"),
    tooltip=[alt.Tooltip("year:O"), alt.Tooltip("co2", title="CO2 Emissions")],
)
temp_line = base.mark_line(stroke=temp_color).encode(
    y=alt.Y("temperature_change_from_co2").title("Temperature Change"),
    tooltip=[
        alt.Tooltip("year:O"),
        alt.Tooltip("temperature_change_from_co2", title="Temperature Change"),
    ],
)

(
    alt.layer(co2_line, temp_line)
    .resolve_scale(y="independent")
    .properties(
        title="Global CO2 emissions and temperature change over time",
        width=1000,
        height=400,
    )
    .configure_axisLeft(titleColor=co2_color, titleFontSize=12)
    .configure_axisRight(titleColor=temp_color, titleFontSize=12)

)