In [1]:
import pandas as pd

filename = 'owid-energy-data.csv'

data = pd.read_csv(filename)
print(data.head())

         country  year iso_code  population  gdp  biofuel_cons_change_pct  \
0  ASEAN (Ember)  2000      NaN         NaN  NaN                      NaN   
1  ASEAN (Ember)  2001      NaN         NaN  NaN                      NaN   
2  ASEAN (Ember)  2002      NaN         NaN  NaN                      NaN   
3  ASEAN (Ember)  2003      NaN         NaN  NaN                      NaN   
4  ASEAN (Ember)  2004      NaN         NaN  NaN                      NaN   

   biofuel_cons_change_twh  biofuel_cons_per_capita  biofuel_consumption  \
0                      NaN                      NaN                  NaN   
1                      NaN                      NaN                  NaN   
2                      NaN                      NaN                  NaN   
3                      NaN                      NaN                  NaN   
4                      NaN                      NaN                  NaN   

   biofuel_elec_per_capita  ...  solar_share_elec  solar_share_energy  \
0      

In [2]:
# Linechart of US renewable energy consumption vs fossil fuel consumption over time

# clean data to be only columns that we care about
clean_data = data[['country', 'year', 'population', 'primary_energy_consumption', 'renewables_consumption', 'fossil_fuel_consumption']]

import altair as alt
us_data = clean_data[clean_data['country'] == 'United States']

year_min = 1965
print(year_min)
year_max = int(us_data["year"].max())
print(year_max)


renew_line = alt.Chart(us_data).mark_line(color='green').encode(
    alt.X('year', title='Year', scale=alt.Scale(domain=[year_min, year_max]), axis=alt.Axis(format="d")),
    alt.Y('renewables_consumption', title='Fuel Consumption in Terawatt-Hours'),
    tooltip=['year', 'renewables_consumption'],
    
)

fossil_line = alt.Chart(us_data).mark_line(color='brown').encode(
    alt.X('year', title='Year', scale=alt.Scale(domain=[year_min, year_max]), axis=alt.Axis(format="d")),
    alt.Y('fossil_fuel_consumption', title='Fuel Consumption in Terawatt-Hours'),
    tooltip=['year', 'fossil_fuel_consumption'],
)

us_linechart = renew_line + fossil_line

us_linechart = us_linechart.properties(
    title='US Renewable Energy Consumption vs Fossil Fuel Consumption Over Time',
    width = 700, height = 700)

us_linechart.show()

# how to add legend?

1965
2024


## GDP vs renewable energy adoption

In [10]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Load the data
filename = 'owid-energy-data.csv'
data = pd.read_csv(filename)


year_we_chose = 2022

# copying for specific year
scatter_data = data[data['year'] == year_we_chose].copy()

# columns required
scatter_data = scatter_data[['country', 'iso_code', 'gdp', 'population', 'renewables_share_energy', 'primary_energy_consumption']].copy()

# remove na columns data
scatter_data = scatter_data.dropna(subset=['gdp', 'population', 'renewables_share_energy', 'primary_energy_consumption'])

# gdp per capita calculation
scatter_data['gdp_per_capita'] = scatter_data['gdp'] / scatter_data['population']

# removing countries with no ISO code for the regions
scatter_data = scatter_data[scatter_data['iso_code'].notna()]

# country names -> region
def assign_region(country):
    
    european_countries = ['United Kingdom', 'France', 'Germany', 'Italy', 'Spain', 'Poland', 
                           'Romania', 'Netherlands', 'Belgium', 'Greece', 'Portugal', 'Sweden',
                           'Austria', 'Hungary', 'Switzerland', 'Denmark', 'Finland', 'Norway',
                           'Ireland', 'Croatia', 'Bosnia and Herzegovina', 'Albania', 'Lithuania',
                           'Slovenia', 'Latvia', 'Estonia', 'Luxembourg', 'Malta', 'Iceland',
                           'Montenegro', 'Cyprus', 'Czechia', 'Slovakia', 'Bulgaria', 'Serbia',
                           'North Macedonia', 'Moldova']
    
    asia_countries = ['China', 'India', 'Japan', 'South Korea', 'Indonesia', 'Thailand',
                         'Malaysia', 'Philippines', 'Vietnam', 'Bangladesh', 'Pakistan',
                         'Myanmar', 'Singapore', 'Taiwan', 'Hong Kong', 'Sri Lanka', 'Nepal',
                         'Cambodia', 'Mongolia', 'Laos', 'Brunei', 'Maldives', 'Bhutan',
                         'Timor', 'Afghanistan', 'Kazakhstan', 'Uzbekistan', 'Turkmenistan',
                         'Kyrgyzstan', 'Tajikistan']
    
    middle_east_countries = ['Saudi Arabia', 'United Arab Emirates', 'Iran', 'Iraq', 'Israel',
                            'Turkey', 'Qatar', 'Kuwait', 'Oman', 'Lebanon', 'Jordan',
                            'Bahrain', 'Yemen', 'Syria', 'Palestine', 'Armenia', 'Georgia',
                            'Azerbaijan']
    
    africa_countries = ['South Africa', 'Egypt', 'Nigeria', 'Kenya', 'Ethiopia', 'Ghana',
                       'Morocco', 'Algeria', 'Tunisia', 'Libya', 'Sudan', 'Tanzania',
                       'Uganda', 'Mozambique', 'Madagascar', 'Cameroon', 'Angola', 'Zimbabwe',
                       'Senegal', 'Mali', 'Zambia', 'Rwanda', 'Benin', 'Burkina Faso',
                       'Malawi', 'Mauritius', 'Namibia', 'Botswana', 'Gabon', 'Mauritania',
                       'Togo', 'Chad', 'Niger', 'Congo', 'Democratic Republic of Congo']
    
    north_america_countries = ['United States', 'Canada', 'Mexico', 'Guatemala', 'Cuba',
                              'Haiti', 'Dominican Republic', 'Honduras', 'Nicaragua',
                              'El Salvador', 'Costa Rica', 'Panama', 'Jamaica', 'Trinidad and Tobago']
    
    south_america_countries = ['Brazil', 'Argentina', 'Colombia', 'Chile', 'Peru', 'Venezuela',
                              'Ecuador', 'Bolivia', 'Paraguay', 'Uruguay', 'Guyana', 'Suriname']
    
    oceania_countries = ['Australia', 'New Zealand', 'Papua New Guinea', 'Fiji']
    
    # assingning the region based on above defined lists
    if country in european_countries:
        return 'Europe'
    elif country in asia_countries:
        return 'Asia'
    elif country in middle_east_countries:
        return 'Middle East'
    elif country in africa_countries:
        return 'Africa'
    elif country in north_america_countries:
        return 'North America'
    elif country in south_america_countries:
        return 'South America'
    elif country in oceania_countries:
        return 'Oceania'
    else:
        return 'Other'

#assingment for our data
scatter_data['region'] = scatter_data['country'].apply(assign_region)


# making the bubble chart scatter plot
fig = px.scatter(scatter_data, x='gdp_per_capita', y='renewables_share_energy',size='primary_energy_consumption',color='region',
                 hover_name='country',
                 hover_data={'gdp_per_capita': ':,.0f','renewables_share_energy': ':.1f','primary_energy_consumption': ':.1f','region': True},
                 labels={'gdp_per_capita': 'GDP per Capita (USD)',
                     'renewables_share_energy': 'Renewable Energy Share (%)',
                     'primary_energy_consumption': 'Total Energy Consumption (TWh)','region': 'Region'},
                 title=f'GDP vs Renewable Energy Adoption based on Country ({year_we_chose})',
                 size_max=50,color_discrete_sequence=px.colors.qualitative.Set2)

# changing layout as the fixed one is not as visible
fig.update_layout(
    width=1200,height=700,font=dict(size=12),
    xaxis=dict(title_font=dict(size=16),gridcolor='#D3D3D3',type='log'),
    yaxis=dict(title_font=dict(size=14),gridcolor='#D3D3D3',range=[-5, 105]),
    plot_bgcolor='white',
    hovermode='closest',
    legend=dict(title=dict(text='Region', font=dict(size=13)),font=dict(size=11),bordercolor='lightgray',borderwidth=1))

fig.update_traces(marker=dict(line=dict(width=0.5, color='white'),opacity=0.5))
fig.show()

### Code Drafts

In [4]:
# clean data to be only columns that we care about
clean_data = data[['country', 'year', 'population', 'gdp', 'primary_energy_consumption', 'renewables_consumption', 'fossil_fuel_consumption', 'net_elec_imports_share_demand']]
# clean_data['gdp_per_capita'] = clean_data['gdp'] / clean_data['population']
clean_data['renewables_consumption_pct'] = clean_data['renewables_consumption'] / clean_data['primary_energy_consumption'] * 100
print(clean_data.info)

<bound method DataFrame.info of              country  year  population           gdp  \
0      ASEAN (Ember)  2000         NaN           NaN   
1      ASEAN (Ember)  2001         NaN           NaN   
2      ASEAN (Ember)  2002         NaN           NaN   
3      ASEAN (Ember)  2003         NaN           NaN   
4      ASEAN (Ember)  2004         NaN           NaN   
...              ...   ...         ...           ...   
23190       Zimbabwe  2020  15526837.0  2.317871e+10   
23191       Zimbabwe  2021  15797165.0  2.514009e+10   
23192       Zimbabwe  2022  16069010.0  2.590159e+10   
23193       Zimbabwe  2023  16340778.0           NaN   
23194       Zimbabwe  2024  16634317.0           NaN   

       primary_energy_consumption  renewables_consumption  \
0                             NaN                     NaN   
1                             NaN                     NaN   
2                             NaN                     NaN   
3                             NaN                  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_data['renewables_consumption_pct'] = clean_data['renewables_consumption'] / clean_data['primary_energy_consumption'] * 100


In [None]:
# x = gdp_per_capita
# y = renewables_consumption_pct
# size = primary_energy_consumption

import altair as alt

# Use the latest year available
year = clean_data['year'].max() - 1
most_recent_data = clean_data.query("year == @year").copy()
print(most_recent_data.head(5))

# Figure out how many nulls are in the data
column_nan_count = most_recent_data.isnull().sum()
print(column_nan_count)

# turns out that gdp column is usually null


chart = alt.Chart(most_recent_data).mark_circle().encode(
    x=alt.X('net_elec_imports_share_demand', scale=alt.Scale(type='log')),
    y='renewables_consumption_pct',
    size='primary_energy_consumption',
    tooltip=['country', 'gdp_per_capita', 'renewables_consumption_pct', 'primary_energy_consumption']
)

           country  year    population  gdp  primary_energy_consumption  \
23   ASEAN (Ember)  2023           NaN  NaN                         NaN   
148    Afghanistan  2023  4.145471e+07  NaN                      41.060   
272         Africa  2023  1.480768e+09  NaN                    6050.292   
332    Africa (EI)  2023           NaN  NaN                    6050.292   
377   Africa (EIA)  2023           NaN  NaN                    6051.291   

     renewables_consumption  fossil_fuel_consumption  \
23                      NaN                      NaN   
148                     NaN                      NaN   
272                 554.704                 5444.936   
332                 554.704                 5444.936   
377                     NaN                      NaN   

     net_elec_imports_share_demand  renewables_consumption_pct  
23                             NaN                         NaN  
148                         86.509                         NaN  
272              