In [1]:
import pandas as pd
import numpy as np

import pycountry

import altair as alt
#import matplotlib.pyplot as plt
#import squarify

### DATA

In [2]:
fsi2021 = pd.read_csv('data/fsi-2021.csv')

In [3]:
#fsi2021.head()

In [4]:
bp2021 = pd.read_csv('data/bp-stats-review-2021-consolidated-dataset-narrow-format.csv')
#bp2021 = bp2021.loc[ bp2021.Var == 'oilreserves_bbl']
bp2021 = bp2021.loc[ bp2021.Year == 2020]

In [5]:
bp2021.rename(columns={
    'ISO3166_alpha3': 'alpha3'
}, inplace=True)

In [6]:
#bp2021.head()

### COUNTRIES AND MERGE

In [7]:
def get_alpha3(country):
    try:
        alpha3 = pycountry.countries.search_fuzzy(country)[0].alpha_3
    except LookupError as e:
        alpha3 = None
        
    return alpha3
        

fsi2021.loc[:, 'alpha3'] = fsi2021.loc[:, 'Country'].apply(
    get_alpha3
)

In [8]:
alpha3_codes = {
    'Congo Democratic Republic': 'COD',
    'Congo Republic': 'COG',
    'Guinea Bissau': 'GNB',
    'North Korea': 'PRK',
    'Laos': 'LAO',
    'South Korea': 'KOR',
    'Nigeria': 'NGA',
    'Niger': 'NER'
}

for country, code in alpha3_codes.items():
    fsi2021.loc[fsi2021.Country == country, 'alpha3'] = code

In [9]:
data = bp2021.merge(
    fsi2021.loc[:, ['Total', 'Rank', 'alpha3']],
    on='alpha3',
    how='left'
)

In [10]:
data = data.loc[
    ~data.Total.isna(),
    ['Country', 'Region', 'SubRegion', 'Var', 'Value', 'Total', 'Rank']
].rename(columns={
    'Total' : 'FSI', 'Rank': 'FSIRank'
})

In [11]:
data.head()

Unnamed: 0,Country,Region,SubRegion,Var,Value,FSI,FSIRank
0,Algeria,Africa,Northern Africa,wind_twh,0.008,73.6,74th
1,Argentina,S. & Cent. America,South America,wind_twh,9.470298,50.1,137th
2,Australia,Asia Pacific,Asia Pacific,wind_twh,22.606892,21.8,170th
3,Austria,Europe,Europe,wind_twh,6.79414,26.1,166th
4,Azerbaijan,CIS,CIS,wind_twh,0.0951,75.1,71st


###

### OIL RESERVES AND FRAGILITY

In [12]:
oil_reserves = data.loc[data.Var == 'oilreserves_bbl']
oil_reserves.loc[:, 'Share'] = oil_reserves.loc[:, 'Value'] / oil_reserves.loc[:, 'Value'].sum()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


In [13]:
def fsi_levels(fsi):
    if fsi < 30:
        return 'Very sustainable'
    elif fsi < 40:
        return 'Sustainable'
    
    elif fsi < 50:
        return 'More stable'
    elif fsi < 60:
        return 'Stable'
    elif fsi < 70:
        return 'Less stable'
    
    elif fsi < 80:
        return 'Warning'
    elif fsi < 90:
        return 'Elevated warning'
    elif fsi < 100:
        return 'High warning'
    
    elif fsi < 110:
        return 'Alert'
    elif fsi < 120:
        return 'High alert'
    else:
        return 'Very high alert'
    
    
def get_fsi_label(fsi):
    if fsi < 40:
        return 'Sustainable'
    if fsi < 70:
        return 'Stable'
    if fsi < 100:
        return 'Warning'
    
    return 'Alert'
    
oil_reserves.loc[
    :, 'FSILabel'
] = oil_reserves.FSI.apply(get_fsi_label)

oil_reserves.loc[
    :, 'FSILevel'
] = oil_reserves.FSI.apply(fsi_levels)

In [14]:
oil_reserves.sort_values('Share', ascending=False).head(10)

Unnamed: 0,Country,Region,SubRegion,Var,Value,FSI,FSIRank,Share,FSILabel,FSILevel
1548,Venezuela,S. & Cent. America,South America,oilreserves_bbl,303.805745,92.6,25th,0.176185,Warning,High warning
1525,Saudi Arabia,Middle East,Middle East,oilreserves_bbl,297.527,69.7,93rd,0.172544,Stable,Less stable
1498,Canada,North America,North America,oilreserves_bbl,168.087611,21.7,171st,0.097478,Sustainable,Very sustainable
1509,Iran,Middle East,Middle East,oilreserves_bbl,157.8,84.5,43rd,0.091512,Warning,Elevated warning
1510,Iraq,Middle East,Middle East,oilreserves_bbl,145.019,96.2,20th,0.0841,Warning,High warning
1524,Russian Federation,CIS,CIS,oilreserves_bbl,107.804239,73.6,74th,0.062519,Warning,Warning
1513,Kuwait,Middle East,Middle East,oilreserves_bbl,101.5,52.9,129th,0.058863,Stable,Stable
1545,United Arab Emirates,Middle East,Middle East,oilreserves_bbl,97.8,40.3,151st,0.056717,Stable,More stable
1544,US,North America,North America,oilreserves_bbl,68.757,44.6,143rd,0.039874,Stable,More stable
1514,Libya,Africa,Northern Africa,oilreserves_bbl,48.363,97.0,17th,0.028047,Warning,High warning


In [15]:
oil_reserves.groupby('FSILabel').agg({
    'Share': 'sum',
    'Country': 'count'
})

Unnamed: 0_level_0,Share,Country
FSILabel,Unnamed: 1_level_1,Unnamed: 2_level_1
Alert,0.006959,5
Stable,0.393397,21
Sustainable,0.103695,4
Warning,0.495949,19


In [16]:
colors = list(reversed(['#67001f','#b2182b','#d6604d','#f4a582','#fddbc7','#ffffff','#e0e0e0','#bababa','#878787','#4d4d4d','#1a1a1a']))
levels = [
    'Very sustainable',
    'Sustainable',
    'More stable',
    'Stable',
    'Less stable',
    'Warning',
    'Elevated warning',
    'High warning',
    'Alert',
    'High alert',
    'Very high alert',
]

background_color = '#00796B'
stroke_color = '#fff'

In [20]:
y_ticks = ['Alert', 'Warning', 'Stable', 'Sustainable']

alt.Chart(
    oil_reserves
).mark_bar(strokeWidth=0.5, stroke=background_color).encode(
    x=alt.X(
        'sum(Share)',
        axis=alt.Axis(
            grid=False,
            tickCount=4,
            labelExpr="datum.value*100 +'%'",
            labelColor=stroke_color,
            labelFontSize=12,
            labelFontWeight=400
        )
    ),
    y=alt.Y('FSILabel:O', sort=y_ticks, axis=alt.Axis( grid=False, labelColor=stroke_color, labelFontSize=12,  labelFontWeight=400)),
    color=alt.Color('FSILevel', scale=alt.Scale(
        domain=levels, range=colors
    )),
    detail='Country',
    order=alt.Order('Share'),
    tooltip=['Country', 'Share', 'FSILevel']
).configure_axis(
    domainOpacity=0,
).properties(
    width=1000,
    height=400
).configure(background=background_color)

#### TREEMAP

In [None]:
oil_reserves_agg = oil_reserves.groupby('FSILabel').agg({
    'Share': 'sum' 
}).reset_index()

fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(111)

squarify.plot(
    sizes=oil_reserves_agg['Share'].to_list(),
    label=oil_reserves_agg['FSILabel'].to_list(),
    color=['red', 'blue', 'green', 'yellow'],
    alpha=.8,
    pad=True,
    ax=ax
)
plt.axis('off')
plt.show()

#### STACKED AREA CHART OVER TIME?

### NATURAL GAS RESERVES AND FRAGILITY

In [21]:
# gasreserves_tcm

gas_reserves = data.loc[data.Var == 'gasreserves_tcm']
gas_reserves.loc[:, 'Share'] = gas_reserves.loc[:, 'Value'] / gas_reserves.loc[:, 'Value'].sum()

gas_reserves.loc[
    :, 'FSILabel'
] = gas_reserves.FSI.apply(get_fsi_label)

gas_reserves.loc[
    :, 'FSILevel'
] = gas_reserves.FSI.apply(fsi_levels)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


In [22]:
gas_reserves.sort_values('Share', ascending=False).head(10)

Unnamed: 0,Country,Region,SubRegion,Var,Value,FSI,FSIRank,Share,FSILabel,FSILevel
2874,Russian Federation,CIS,CIS,gasreserves_tcm,37.391524,73.6,74th,0.200851,Warning,Warning
2854,Iran,Middle East,Middle East,gasreserves_tcm,32.101379,84.5,43rd,0.172435,Warning,Elevated warning
2872,Qatar,Middle East,Middle East,gasreserves_tcm,24.665471,44.1,144th,0.132492,Stable,More stable
2890,Turkmenistan,CIS,CIS,gasreserves_tcm,13.60125,68.2,97th,0.07306,Stable,Less stable
2891,US,North America,North America,gasreserves_tcm,12.618704,44.6,143rd,0.067782,Stable,More stable
2847,China,Asia Pacific,Asia Pacific,gasreserves_tcm,8.39855,68.9,95th,0.045113,Stable,Less stable
2896,Venezuela,S. & Cent. America,South America,gasreserves_tcm,6.260173,92.6,25th,0.033627,Warning,High warning
2875,Saudi Arabia,Middle East,Middle East,gasreserves_tcm,6.01912,69.7,93rd,0.032332,Stable,Less stable
2893,United Arab Emirates,Middle East,Middle East,gasreserves_tcm,5.938725,40.3,151st,0.0319,Stable,More stable
2865,Nigeria,Africa,Western Africa,gasreserves_tcm,5.472979,98.0,12th,0.029398,Warning,High warning


In [23]:
gas_reserves.groupby('FSILabel').agg({
    'Share': 'sum',
    'Country': 'count'
})

Unnamed: 0_level_0,Share,Country
FSILabel,Unnamed: 1_level_1,Unnamed: 2_level_1
Alert,0.002871,2
Stable,0.439757,23
Sustainable,0.034111,6
Warning,0.523261,20


In [24]:
y_ticks = ['Alert', 'Warning', 'Stable', 'Sustainable']

alt.Chart(
    gas_reserves
).mark_bar(strokeWidth=0.5, stroke=background_color).encode(
    x=alt.X(
        'sum(Share)',
        axis=alt.Axis(
            grid=False,
            tickCount=4,
            labelExpr="datum.value*100 +'%'",
            labelColor=stroke_color,
            labelFontSize=12,
            labelFontWeight=400
        )
    ),
    y=alt.Y('FSILabel:O', sort=y_ticks, axis=alt.Axis( grid=False, labelColor=stroke_color, labelFontSize=12,  labelFontWeight=400)),
    color=alt.Color('FSILevel', scale=alt.Scale(
        domain=levels, range=colors
    )),
    detail='Country',
    order=alt.Order('Share'),
    tooltip=['Country', 'Share', 'FSILevel']
).configure_axis(
    domainOpacity=0,
).properties(
    width=1000,
    height=400
).configure(background=background_color)

In [22]:
alt.Chart(
    gas_reserves
).mark_bar(strokeWidth=1, stroke='#fff').encode(
    x=alt.X('sum(Share)'),
    y=alt.Y('FSILabel'),
    color=alt.Color('FSILabel', scale=alt.Scale(
        domain=['Alert', 'Stable', 'Sustainable', 'Warning'], range=['#D84315', '#78909C', '#455A64', '#FF5722']
    )),
    detail='Country',
    order=alt.Order('Share'),
    tooltip=['Country', 'Share', 'FSILabel']
).configure_axis(
    grid=False
).configure_view(
    strokeWidth=0
).properties(
    width=1000,
    height=400
)