In [1]:
import pandas as pd
import numpy as np
import plotly.express as px

In [2]:
#import initial data
rawDF = pd.read_csv("owid-energy-data.csv")
descDF = pd.read_csv("owid-energy-codebook.csv")

In [3]:
#create temp sorted DF to determine top nuclear electricity countries in latest reading
sortDF = rawDF[rawDF["year"] == 2023]
sortDF = sortDF.sort_values(by=["nuclear_share_elec"], ascending=False)

In [4]:
#initial filtering and transformation for visual 1
filteredDF = rawDF[((rawDF["country"] == "France") | (rawDF["country"] == "World")) & (rawDF["year"] >= 1960)]
filteredDF = filteredDF[["country", "year", "biofuel_electricity", "coal_electricity", "gas_electricity", "hydro_electricity", "nuclear_electricity", 
                         "oil_electricity", "solar_electricity", "wind_electricity", "electricity_generation", "electricity_demand", "renewables_electricity"]]

genCols = ["biofuel_electricity", "coal_electricity", "gas_electricity", "hydro_electricity", "nuclear_electricity", "oil_electricity", "solar_electricity", "wind_electricity"]
filteredDF["calc_total"] = filteredDF[genCols].sum(axis=1)
filteredDF

Unnamed: 0,country,year,biofuel_electricity,coal_electricity,gas_electricity,hydro_electricity,nuclear_electricity,oil_electricity,solar_electricity,wind_electricity,electricity_generation,electricity_demand,renewables_electricity,calc_total
7439,France,1960,,,,,,,,,,,,0.00
7440,France,1961,,,,,,,,,,,,0.00
7441,France,1962,,,,,,,,,,,,0.00
7442,France,1963,,,,,,,,,,,,0.00
7443,France,1964,,,,,,,,,,,,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21423,World,2019,576.12,9802.03,6369.66,4225.93,2754.08,834.83,705.52,1419.80,26771.230,26771.230,7010.63,26687.97
21424,World,2020,609.27,9417.44,6332.21,4344.05,2648.37,773.00,853.37,1590.68,26654.820,26654.820,7483.80,26568.39
21425,World,2021,658.66,10156.81,6492.94,4276.07,2762.24,830.76,1055.68,1849.47,28169.881,28169.881,7927.13,28082.63
21426,World,2022,675.77,10288.29,6581.64,4297.74,2639.68,849.26,1323.32,2098.52,28843.500,28843.500,8484.63,28754.22


In [5]:
#transformation work to create specific nuclear DF for nuclear visual
nucDF1 = filteredDF[["country", "year", "nuclear_electricity"]]
nucDF1["type"] = "nuclear"
nucDF1 = nucDF1.rename(columns={"nuclear_electricity": "electricity"})

nucDF2 = filteredDF[["country", "year", "calc_total"]]
nucDF2["type"] = "total"
nucDF2 = nucDF2.rename(columns={"calc_total": "electricity"})

nucDF = pd.concat([nucDF1, nucDF2])
nucDF = nucDF[nucDF["year"] >= 1985]
nucDF

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nucDF1["type"] = "nuclear"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nucDF2["type"] = "total"


Unnamed: 0,country,year,electricity,type
7464,France,1985,224.100,nuclear
7465,France,1986,254.155,nuclear
7466,France,1987,265.520,nuclear
7467,France,1988,275.521,nuclear
7468,France,1989,303.931,nuclear
...,...,...,...,...
21423,World,2019,26687.970,total
21424,World,2020,26568.390,total
21425,World,2021,28082.630,total
21426,World,2022,28754.220,total


In [6]:
#create separate DFs for France and World
nucDFFr = nucDF[nucDF["country"] == "France"]
nucDFWo = nucDF[nucDF["country"] == "World"]

In [7]:
#define title of visual 1
v1Title = "Nuclear power generation has kept pace with overall electricity generation growth"

In [8]:
#visual 1 - deceptive
fig = px.line(nucDFFr, x='year', y='electricity', color='type', title=v1Title)
fig.show()

In [9]:
#visual 1 - earnest
fig = px.line(nucDFWo, x='year', y='electricity', color='type', title=v1Title)
fig.show()

In [10]:
#initial filtering and transformation for visual 2
filteredDF2 = rawDF[rawDF["year"] == 2022]
filteredDF2 = filteredDF2[["year", "country", "iso_code", "gdp", "population", "renewables_electricity", "renewables_share_elec", "electricity_generation"]]
filteredDF2 = filteredDF2.dropna(subset=["iso_code", "gdp"])
filteredDF2["gdp_per_cap"] = round(filteredDF2["gdp"] / filteredDF2["population"], 2)

In [56]:
#add quartiles for gdp and renewables share
filteredDF2["gdpQuart"] = pd.qcut(filteredDF2["gdp"], 4, labels=False)
filteredDF2["renewQuart"] = pd.qcut(filteredDF2["renewables_share_elec"], 4, labels=False)
filteredDF2

Unnamed: 0,year,country,iso_code,gdp,population,renewables_electricity,renewables_share_elec,electricity_generation,gdp_per_cap,gdpQuart,renewQuart
146,2022,Afghanistan,AFG,5.330347e+10,41128772.0,0.70,84.337,0.83,1296.01,1,3
635,2022,Albania,ALB,3.617101e+10,2842318.0,7.00,100.000,7.00,12725.88,1,3
758,2022,Algeria,DZA,5.958200e+11,44903228.0,0.68,0.771,88.18,13268.98,3,0
925,2022,Angola,AGO,1.583462e+11,35588996.0,12.71,75.030,16.94,4449.30,2,3
1134,2022,Argentina,ARG,8.549143e+11,45510324.0,43.33,28.728,150.83,18785.06,3,1
...,...,...,...,...,...,...,...,...,...,...,...
20987,2022,Venezuela,VEN,1.496821e+11,28301700.0,65.78,77.699,84.66,5288.80,2,3
21111,2022,Vietnam,VNM,8.338038e+11,98186856.0,130.73,50.285,259.98,8492.01,3,2
21470,2022,Yemen,YEM,6.085299e+10,33696612.0,0.60,20.339,2.95,1805.91,1,1
21686,2022,Zambia,ZMB,6.846960e+10,20017670.0,17.31,88.906,19.47,3420.46,1,3


In [109]:
#create pivot DF for aggregation
renewDF = pd.pivot_table(filteredDF2, values=["gdp", "renewables_electricity", "electricity_generation"], index="gdpQuart", aggfunc="sum")
renewDF["gdpPct"] = renewDF["gdp"] / sum(renewDF["gdp"])
renewDF["renewPct"] = renewDF["renewables_electricity"] / sum(renewDF["renewables_electricity"])
renewDF["renewShare"] = renewDF["renewables_electricity"] / renewDF["electricity_generation"]
renewDF["quart"] = renewDF.index
renewDF = renewDF.reset_index(drop=True)

#rename quartiles for better visualization
qDict = {'quart': [0, 1, 2, 3],
          'Q': ["Q1", "Q2", "Q3", "Q4"]}

qDictDF = pd.DataFrame.from_dict(qDict)

renewDF = renewDF.merge(qDictDF, how="left", left_on="quart", right_on="quart")
renewDF = renewDF.drop(columns={"quart"})
renewDF

Unnamed: 0,electricity_generation,gdp,renewables_electricity,gdpPct,renewPct,renewShare,Q
0,125.02,701558000000.0,74.27,0.00538,0.008783,0.594065,Q1
1,538.58,2555034000000.0,270.65,0.019595,0.032006,0.502525,Q2
2,1846.99,10235470000000.0,715.71,0.078497,0.084638,0.387501,Q3
3,26259.67,116901800000000.0,7395.54,0.896528,0.874573,0.281631,Q4


In [110]:
#reshape DF for better visualization
renewDF1 = renewDF[["gdpPct", "Q"]]
renewDF1["type"] = "% of Global GDP"
renewDF1 = renewDF1.rename(columns={"gdpPct": "value"})

renewDF2 = renewDF[["renewPct", "Q"]]
renewDF2["type"] = "% of Global Renewable Generation"
renewDF2 = renewDF2.rename(columns={"renewPct": "value"})

renewDF3 = renewDF[["renewShare", "Q"]]
renewDF3["type"] = "% of Energy Demand Met by Renewables"
renewDF3 = renewDF3.rename(columns={"renewShare": "value"})

renewDFFull = pd.concat([renewDF1, renewDF2, renewDF3])
renewDFFull



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,value,Q,type
0,0.00538,Q1,% of Global GDP
1,0.019595,Q2,% of Global GDP
2,0.078497,Q3,% of Global GDP
3,0.896528,Q4,% of Global GDP
0,0.008783,Q1,% of Global Renewable Generation
1,0.032006,Q2,% of Global Renewable Generation
2,0.084638,Q3,% of Global Renewable Generation
3,0.874573,Q4,% of Global Renewable Generation
0,0.594065,Q1,% of Energy Demand Met by Renewables
1,0.502525,Q2,% of Energy Demand Met by Renewables


In [86]:
v2Title = "How does the wealth of a country affect renewable energy generation?"

In [114]:
#visual 2 - earnest
fig = px.bar(renewDFFull, x='Q', y='value', color='type', barmode='group', width=1100, height=700, title=v2Title, text_auto=True)
fig.update_layout(legend=dict(yanchor='top', y=0.99, xanchor='right', x=0.35, bgcolor='White', title=None))
fig.update_layout(yaxis_title='Percentage', xaxis_title='Global GDP Quartile', title_x=0.5)
fig.update_layout(yaxis=dict(tickformat='.0%'))
fig.update_traces(textposition="outside")
fig.show()