In [None]:
!pip install owid-catalog

In [None]:
from owid.catalog import RemoteCatalog
# find the default OWID catalog and fetch the catalog index over HTTPS
catalog = RemoteCatalog(channels=('garden', 'meadow', 'open_numbers'))

In [None]:
# All namespaces = main sources
catalog.datasets["namespace"].unique()

In [None]:
"""
# GET LATEST VERSIONS of datasets
import pandas as pd
import numpy as np
from dataclasses import asdict

# Get datasets from owid remote catalog
datasets = catalog.datasets

# Group the data by 'namespace, dataset' and get latest version
datasets = datasets.groupby(['namespace', 'dataset'])['version'].max()
datasets = datasets.reset_index()

# Filter for performance/tests
# datasets = datasets[datasets["dataset"].str.contains("energy|consumption")]

print(datasets)

# Dataframe result including dataset metadata
df = pd.DataFrame()

for index, row in datasets.iterrows():
    try: 
        print('Getting metadata: index='+ str(index) + ' namespace=' + row.namespace + ', dataset=' + row.dataset)
        dataset = catalog.find_latest(namespace=row.namespace, dataset=row.dataset)
        df = pd.concat([df, pd.DataFrame([asdict(dataset.metadata.dataset)])], ignore_index=True)
    except:
        print('ERROR: namespace=' + row.namespace + ', dataset=' + row.dataset)

# Explode sources
df = df.explode(["sources"])
# df = df.explode(["licenses"])
df = df.reset_index()

# Extract the dictionary values into new DataFrame columns using apply and lambda function
df = pd.concat([df.drop(columns='sources'), df['sources'].apply(pd.Series)], axis=1)
# df = pd.concat([df.drop(columns='licenses'), df['licenses'].apply(pd.Series)], axis=1)

df.to_csv('../../processed/owid_catalog.csv', index=False)
df
"""

In [None]:
import pandas as pd

# Read CSV file into a DataFrame
df = pd.read_csv('../../processed/owid_catalog.csv')
df= df.rename(columns={'short_name': 'dataset'})
df.head()

In [None]:
# Population
df_pop = df[df["dataset"].str.contains("population")]

data_pop = catalog.find_latest(namespace='gapminder', dataset='population')
data_pop.sort_values('year', ascending=False).head()


In [None]:

# GH CO2 Emmissions filter
df_gh = df[df["dataset"].str.contains("gh|green|house|carbon|co2|emission")]
df_gh


In [None]:
# NRJ filter
df_nrj = df[df["dataset"].str.contains("energy|consumption|final")]
df_nrj


In [None]:
#  !!!! FROM SHIFT DATA PORTAL (UP TO 2016)
sdp = catalog.find_latest(namespace='shift')
print(sdp.metadata.dataset)
sdp.sort_values('year', ascending=False).head()


In [21]:
# DEFAULT LATEST EIA

df = catalog.find_latest(namespace='eia', dataset='energy_consumption')
print(df.metadata.dataset)
df

DatasetMeta(namespace='eia', short_name='energy_consumption', title='Energy consumption (EIA, 2022)', description='Total energy consumption.', sources=[Source(name='U.S. Energy Information Administration', description=None, url='https://www.eia.gov/opendata/bulkfiles.php', source_data_url='https://api.eia.gov/bulk/INTL.zip', owid_data_url='https://walden.nyc3.digitaloceanspaces.com/eia/2022-07-27/international_energy_data.zip', date_accessed='2022-07-27', publication_date='2022-07-27', publication_year=2022, published_by=None, publisher_source=None)], licenses=[License(name='Public domain', url='https://www.eia.gov/about/copyrights_reuse.php')], is_public=True, additional_info=None, version='2022-07-27', source_checksum='ff47bf59dfb2300ba411d833c6e16f16')


Unnamed: 0_level_0,Unnamed: 1_level_0,members,values
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,1980,AFG,28046.779297
Afghanistan,1981,AFG,31598.349609
Afghanistan,1982,AFG,33653.976562
Afghanistan,1983,AFG,41170.183594
Afghanistan,1984,AFG,41360.554688
...,...,...,...
Zimbabwe,2015,ZWE,196743.890625
Zimbabwe,2016,ZWE,167997.359375
Zimbabwe,2017,ZWE,165369.781250
Zimbabwe,2018,ZWE,171007.437500


In [24]:
# DEFAULT LATEST BP

df = catalog.find_latest(namespace='bp', dataset='energy_mix')
print(df.metadata.dataset)
df

DatasetMeta(namespace='bp', short_name='energy_mix', title='Energy mix (BP, 2023)', description='Raw data on energy consumption is sourced from <a href="https://www.bp.com/en/global/corporate/energy-economics/statistical-review-of-world-energy.html">the BP Statistical Review of World Energy</a>.\n\nPrimary energy in exajoules (EJ) has been converted to TWh by Our World in Data based on a conversion factor of 1,000,000 / 3,600 (~277.778).\n\nFor non-fossil based electricity sources (nuclear, hydro, wind, solar, geothermal, biomass in power, and other renewable sources), BP\'s generation (in TWh) corresponds to gross generation and not accounting for cross-border electricity supply.\nAlso, for non-fossil based electricity, there are two ways to define primary energy:\n* One is "direct primary energy", which corresponds to the electricity generation (in TWh).\n* The other is "input-equivalent primary energy" (also called "primary energy using the substitution method").\n  This is the amou

Unnamed: 0_level_0,Unnamed: 1_level_0,country_code,hydro__twh__direct,nuclear__twh__direct,solar__twh__direct,wind__twh__direct,other_renewables__twh__direct,coal__twh,oil__twh,gas__twh,biofuels__twh,...,other_renewables_per_capita__kwh__direct,other_renewables_per_capita__kwh__equivalent,renewables_per_capita__kwh__direct,renewables_per_capita__kwh__equivalent,solar_per_capita__kwh__direct,solar_per_capita__kwh__equivalent,wind_per_capita__kwh__direct,wind_per_capita__kwh__equivalent,solar_and_wind_per_capita__kwh__direct,solar_and_wind_per_capita__kwh__equivalent
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Africa,1965,OWID_AFR,13.905635,0.0,,0.0,,323.496155,341.262787,9.543755,,...,,,43.259445,127.917709,,,0.0,0.0,0.0,0.0
Africa,1966,OWID_AFR,15.510005,0.0,,0.0,,323.122192,369.486572,10.669916,,...,,,47.048717,139.122559,,,0.0,0.0,0.0,0.0
Africa,1967,OWID_AFR,16.190636,0.0,,0.0,,330.291595,368.125244,10.545670,,...,,,47.878628,141.576599,,,0.0,0.0,0.0,0.0
Africa,1968,OWID_AFR,18.938341,0.0,,0.0,,343.512878,389.199829,10.688969,,...,,,54.580433,161.393753,,,0.0,0.0,0.0,0.0
Africa,1969,OWID_AFR,22.100891,0.0,,0.0,,346.642883,396.922852,12.491999,,...,,,62.068840,183.536865,,,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zimbabwe,2017,ZWE,,,,,,,,,,...,,,,,,,,,0.0,0.0
Zimbabwe,2018,ZWE,,,,,,,,,,...,,,,,,,,,0.0,0.0
Zimbabwe,2019,ZWE,,,,,,,,,,...,,,,,,,,,0.0,0.0
Zimbabwe,2020,ZWE,,,,,,,,,,...,,,,,,,,,0.0,0.0
