In [27]:
import pandas as pd
import pytimetk as tk
import plotly.express as px

from itables import show


In [52]:
change_ener_consump = pd.read_csv("data/change-energy-consumption.csv")
fossil_fuel_consump = pd.read_csv("data/fossil-fuel-consumption-by-type.csv")
fossil_fuel = pd.read_csv("data/fossil-fuel-primary-energy.csv")
oil_prod_country = pd.read_csv("data/oil-production-by-country.csv")
oil_share = pd.read_csv("data/oil-share-energy.csv")
energy_percapita = pd.read_csv("data/per-capita-energy-use.csv")
share_eletric = pd.read_csv("data/share-electricity-fossil-fuels.csv")


In [4]:
fossil_fuel.glimpse()


<class 'pandas.core.frame.DataFrame'>: 4962 rows of 4 columns
Entity:              object            ['Africa', 'Africa', 'Africa', 'A ...
Code:                object            [nan, nan, nan, nan, nan, nan, na ...
Year:                int64             [1965, 1966, 1967, 1968, 1969, 19 ...
Fossil fuels (TWh):  float64           [675.2018, 704.2703, 710.05896, 7 ...


## Variação Anual no consumo de energia (%)

In [5]:
change_ener_consump.head()


Unnamed: 0,Entity,Code,Year,Annual change in primary energy consumption (%)
0,Afghanistan,AFG,1981,12.663031
1,Afghanistan,AFG,1982,6.505477
2,Afghanistan,AFG,1983,22.33379
3,Afghanistan,AFG,1984,0.462401
4,Afghanistan,AFG,1985,-2.365375


In [11]:
# change_ener_consump.groupby("Entity").max(
#     "Annual change in primary energy consumption (%)"
# ).sort_values(
#     by="Annual change in primary energy consumption (%)", ascending=False
# ).head(
#     30
# )

change_ener_consump["Entity"].unique()


array(['Afghanistan', 'Africa', 'Africa (EI)', 'Africa (EIA)', 'Albania',
       'Algeria', 'American Samoa', 'Angola', 'Antarctica',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Asia',
       'Asia & Oceania (EIA)', 'Asia Pacific (EI)', 'Australia',
       'Australia and New Zealand (EIA)', 'Austria', 'Azerbaijan',
       'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus',
       'Belgium', 'Belize', 'Benin', 'Bermuda', 'Bhutan', 'Bolivia',
       'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'British Virgin Islands', 'Brunei', 'Bulgaria', 'Burkina Faso',
       'Burundi', 'CIS (EI)', 'Cambodia', 'Cameroon', 'Canada',
       'Cape Verde', 'Cayman Islands', 'Central & South America (EIA)',
       'Central African Republic', 'Central America (EI)', 'Chad',
       'Chile', 'China', 'Colombia', 'Comoros', 'Congo', 'Cook Islands',
       'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cuba', 'Cyprus',
       'Czechia', 'Czechoslovakia', 'Democratic Republic of

In [5]:
change_ener_consump_gt_2001 = change_ener_consump.query(
    "Entity in ('OECD (EIA)', 'Lower-middle-income countries', 'Asia & Oceania (EIA)', 'Australia and New Zealand (EIA)', \
        'Europe (EIA)', 'Upper-middle-income countries', 'High-income countries','South and Central America (EI)') \
            and Year > 2001"
)
fig = px.line(
    change_ener_consump_gt_2001,
    x="Year",
    y="Annual change in primary energy consumption (%)",
    color="Entity",
)
# Update the layout to set the figure size
fig.update_layout(
    width=1080, height=600  # Adjust the width as needed  # Adjust the height as needed
)
fig.show()


## Uso de combustível fóssil como fonte primária de energia

In [8]:
fossil_fuel_gt_2001 = fossil_fuel.query("Year > 2001")


In [51]:
fig = px.choropleth(
    fossil_fuel_gt_2001,
    locations="Code",
    color="Fossil fuels (TWh)",  # lifeExp is a column of gapminder
    hover_name="Entity",  # column to add to hover information
    animation_frame="Year",
    color_continuous_scale=px.colors.sequential.Plasma,
)
# Update the layout to set the figure size
fig.update_layout(
    width=1000,
    height=600,  # Adjust the width as needed  # Adjust the height as needed
)
fig.update_layout(
    legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
    coloraxis_colorbar=dict(
        orientation="h",  # Define orientação horizontal
        yanchor="bottom",
        xanchor="center",
        x=0.5,
        y=-0.2,
    ),
)
fig.show()


## Consumo de energia per capita

In [10]:
energy_percapita.glimpse()


<class 'pandas.core.frame.DataFrame'>: 10604 rows of 4 columns
Entity:                                              object            [ ...
Code:                                                object            [ ...
Year:                                                int64             [ ...
Primary energy consumption per capita (kWh/person):  float64           [ ...


In [18]:
energy_percapita_gt_2001 = energy_percapita.query("Year > 2001")
fig = px.choropleth(
    energy_percapita_gt_2001,
    locations="Code",
    color="Primary energy consumption per capita (kWh/person)",  # lifeExp is a column of gapminder
    hover_name="Entity",  # column to add to hover information
    animation_frame="Year",
    color_continuous_scale=px.colors.sequential.Plasma,
)
fig.update_layout(
    width=1000,
    height=600,
    coloraxis_colorbar=dict(
        orientation="h",  # Define orientação horizontal
        yanchor="bottom",
        xanchor="center",
        x=0.5,
        y=-0.2,
    ),
    legend=dict(orientation="h", yanchor="top", y=0.99, xanchor="right", x=0.01),
)
fig.show()


In [28]:
show(
    energy_percapita_gt_2001.groupby("Entity")
    .sum("Primary energy consumption per capita (kWh/person)")
    .sort_values("Primary energy consumption per capita (kWh/person)", ascending=False)
)


Unnamed: 0_level_0,Year,Primary energy consumption per capita (kWh/person)
Entity,Unnamed: 1_level_1,Unnamed: 2_level_1
Loading ITables v2.1.1 from the internet... (need help?),,


## Produção de energia

In [35]:
oil_prod_country_gt_2001 = oil_prod_country.query("Year > 2001")


In [36]:
oil_prod_country_gt_2001.head()


Unnamed: 0,Entity,Code,Year,Oil production (TWh)
22,Afghanistan,AFG,2002,0.0
23,Afghanistan,AFG,2003,0.0
24,Afghanistan,AFG,2004,0.0
25,Afghanistan,AFG,2005,0.0
26,Afghanistan,AFG,2006,0.0


In [42]:
show(
    oil_prod_country_gt_2001.groupby("Entity")
    .sum("Oil production (TWh)")
    .sort_values("Oil production (TWh)", ascending=False)
)

top15 = (
    (
        oil_prod_country_gt_2001.groupby("Entity")
        .sum("Oil production (TWh)")
        .sort_values("Oil production (TWh)", ascending=False)
    )
    .reset_index()["Entity"]
    .head(15)
)


Unnamed: 0_level_0,Year,Oil production (TWh)
Entity,Unnamed: 1_level_1,Unnamed: 2_level_1
Loading ITables v2.1.1 from the internet... (need help?),,


In [49]:
oil_prod_country_gt_2001 = oil_prod_country_gt_2001.query(
    f"Entity in {top15.to_list()}"
)


In [50]:
fig = px.line(
    oil_prod_country_gt_2001,
    x="Year",
    y="Oil production (TWh)",
    color="Entity",
)
# Update the layout to set the figure size
fig.update_layout(
    width=1080, height=600  # Adjust the width as needed  # Adjust the height as needed
)
fig.show()


# Relação entre as tendências de produção e consumo com a série de preços

In [53]:
petr_brent = pd.read_csv("data/petr_brent.csv")
show(petr_brent)


DATE,CODE,RAW DATE,DAY,MONTH,YEAR,VALUE (US$)
Loading ITables v2.1.1 from the internet... (need help?),,,,,,


In [55]:
petr_brent_price = petr_brent[["DATE", "VALUE (US$)"]]
petr_brent_price.head()


Unnamed: 0,DATE,VALUE (US$)
0,2002-01-01,
1,2002-01-02,20.13
2,2002-01-03,20.47
3,2002-01-04,21.2
4,2002-01-05,


In [56]:
petr_brent_price["DATE"] = pd.to_datetime(petr_brent_price["DATE"])
petr_brent_price.set_index("DATE", inplace=True)
petr_brent_price.head()


Unnamed: 0_level_0,VALUE (US$)
DATE,Unnamed: 1_level_1
2002-01-01,
2002-01-02,20.13
2002-01-03,20.47
2002-01-04,21.2
2002-01-05,


In [67]:
petr_brent_price["VALUE (US$)"] = petr_brent_price["VALUE (US$)"].bfill().ffill()
price = petr_brent_price.copy()


In [68]:
px.line(
    price,
    title="Preço do Petróleo do tipo Brent em US$",
    x=price.index,
    y="VALUE (US$)",
)


In [70]:
# série diária para anual
price_y = price.resample("Y").mean()

px.line(
    price_y,
    title="Preço do Petróleo do Tipo Brent em US$ - Média anual",
    labels={"index": "Ano", "value": "Preço (US$)"},
)


In [71]:
# variação percentual anual do preço
price_y_percent_change = price.resample("Y").sum().pct_change() * 100

px.line(
    price_y_percent_change,
    title="Variação anual do preço do Petróleo do tipo Brent (%)",
    labels={"index": "Ano", "value": "Variação (%)"},
)


In [90]:
df_production = oil_prod_country_gt_2001.reset_index()[
    ["Entity", "Year", "Oil production (TWh)"]
].pivot(index="Year", columns="Entity", values="Oil production (TWh)")
# df_production.reset_index()


In [104]:
df_price = price_y.reset_index()
df_price["Year"] = df_price["DATE"].dt.year
df_price = df_price.drop(columns="DATE", axis=1)
df_price.set_index("Year", inplace=True)
df_price.rename(columns={"VALUE (US$)": "Price"}, inplace=True)
df_price


Unnamed: 0_level_0,Price
Year,Unnamed: 1_level_1
2002,24.995397
2003,28.861342
2004,38.137432
2005,54.332959
2006,65.104849
2007,72.296548
2008,97.236721
2009,61.28589
2010,79.455151
2011,111.297096


In [112]:
from sklearn.preprocessing import MinMaxScaler
import plotly.graph_objs as go

# Normalizing the production data
scaler = MinMaxScaler()
normalized_production = df_production.copy()
normalized_production.iloc[:, 1:] = scaler.fit_transform(
    df_production.iloc[:, 1:].to_numpy()
)
normalized_production


Entity,Asia,High-income countries,Middle East (EI),Middle East (Shift),Non-OECD (EI),Non-OPEC (EI),North America,North America (EI),OECD (EI),OECD (Shift),OPEC (EI),OPEC (Shift),Persian Gulf (Shift),Upper-middle-income countries,World
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2002,16786.791,0.0,0.0,0.0,0.0,0.0,0.080508,0.07782,0.322042,0.66343,0.0,0.0,0.0,0.0,0.0
2003,17903.973,0.122689,0.190508,0.150973,0.227354,0.0703,0.098189,0.094507,0.294589,0.622459,0.259864,0.144974,0.148733,0.084885,0.16918
2004,19027.342,0.163715,0.360049,0.371856,0.501173,0.136325,0.094688,0.092912,0.251925,0.520175,0.589256,0.412273,0.363161,0.442909,0.368383
2005,19546.465,0.159979,0.427838,0.46865,0.607833,0.122241,0.052433,0.049734,0.146972,0.320158,0.684537,0.556309,0.452739,0.52172,0.404098
2006,19877.346,0.144967,0.458904,0.432072,0.68504,0.140584,0.052278,0.049518,0.095666,0.227853,0.738378,0.52317,0.428033,0.648685,0.441644
2007,19808.943,0.096864,0.418867,0.374812,0.693287,0.155855,0.040308,0.039559,0.063485,0.163261,0.698732,0.503028,0.386647,0.708557,0.432997
2008,20578.13,0.126482,0.531144,0.539905,0.786911,0.139149,0.0,0.0,0.0,0.01901,0.817179,0.651671,0.536539,0.74209,0.477925
2009,19624.822,0.021909,0.340753,0.375617,0.651503,0.171496,0.018839,0.019363,0.003734,0.0,0.546033,0.492932,0.381689,0.729785,0.371183
2010,20187.592,0.058314,0.399883,0.475482,0.744498,0.255086,0.051304,0.052081,0.021291,0.007894,0.604585,0.586557,0.467033,0.824555,0.454031
2011,21359.91,0.200899,0.631097,0.628541,0.775617,0.257191,0.092774,0.093902,0.038125,0.001213,0.671323,0.730027,0.616832,0.697624,0.48695


In [132]:
# Normalizing the price data
normalized_price = df_price.copy().reset_index()
normalized_price.iloc[:, 1:] = scaler.fit_transform(normalized_price.iloc[:, 1:])
normalized_price.set_index("Year", inplace=True)
normalized_price


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23 entries, 0 to 22
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Year    23 non-null     int32  
 1   Price   23 non-null     float64
dtypes: float64(1), int32(1)
memory usage: 408.0 bytes


Unnamed: 0_level_0,Price
Year,Unnamed: 1_level_1
2002,0.0
2003,0.044627
2004,0.151708
2005,0.338664
2006,0.463011
2007,0.54603
2008,0.833931
2009,0.418926
2010,0.628666
2011,0.99624


In [136]:
# Create traces for each country's production values
traces = []
for country in normalized_production.columns[1:]:  # Exclude the 'Year' column
    trace = go.Scatter(
        x=normalized_production.index,
        y=normalized_production[country],
        mode="lines",
        name=f"{country} Production",
        line=dict(dash="solid"),  # Solid lines for production values
        opacity=0.6,
    )
    traces.append(trace)

# Create a trace for the yearly price series, using a secondary y-axis
trace_price = go.Scatter(
    x=normalized_price.index,
    y=normalized_price["Price"],
    mode="lines+markers",
    name="Price",
    yaxis="y2",
    line=dict(dash="dot", color="red"),  # Dotted line and red color for price values
)
traces.append(trace_price)

# Create the layout
layout = go.Layout(
    title="Normalized Production Values by Country and Yearly Price Series",
    xaxis=dict(title="Year"),
    yaxis=dict(title="Normalized Production Values"),
    yaxis2=dict(title="Normalized Price", overlaying="y", side="right"),
    # legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
)

# Combine the traces into a figure
fig = go.Figure(data=traces, layout=layout)

# Show the plot
fig.show()


In [140]:
# Calculate annual percent change for production data
df_production_pct_change = df_production.copy()
df_production_pct_change.iloc[:, 1:] = df_production.iloc[:, 1:].pct_change() * 100

# Calculate annual percent change for price data
df_price_pct_change = df_price.copy()
df_price_pct_change.iloc[:, 1:] = df_price.iloc[:, 1:].pct_change() * 100


In [141]:
# Reshape the data for plotting with Plotly Express
df_production_melted = df_production_pct_change.reset_index().melt(
    id_vars=["Year"], var_name="Country", value_name="Percent Change"
)
df_price_melted = df_price_pct_change.reset_index().melt(
    id_vars=["Year"], var_name="Type", value_name="Percent Change"
)

# Combine the production and price data for plotting
df_combined = pd.concat([df_production_melted, df_price_melted])
df_combined["Type"] = df_combined["Type"].fillna(df_combined["Country"])
df_combined.drop(columns=["Country"], inplace=True)

# Plot using Plotly Express
fig = px.line(
    df_combined,
    x="Year",
    y="Percent Change",
    color="Type",
    line_dash="Type",
    markers=True,
    title="Annual Percent Change: Production Values by Country and Yearly Price Series",
)

# Customize the opacity for production lines
for trace in fig.data:
    if "Price" not in trace.name:
        trace.opacity = 0.6

# Show the plot
fig.show()


In [144]:
# Calculate annual percent change for production data
df_production_pct_change = df_production.copy()
df_production_pct_change.iloc[:, 1:] = df_production.iloc[:, 1:].pct_change() * 100

# Calculate annual percent change for price data
df_price_pct_change = df_price.copy()
df_price_pct_change.iloc[:, 1:] = df_price.iloc[:, 1:].pct_change() * 100

# Normalizing the percent change data for production
scaler = MinMaxScaler()
normalized_production_pct_change = df_production_pct_change.copy()
normalized_production_pct_change.iloc[:, 1:] = scaler.fit_transform(
    df_production_pct_change.iloc[:, 1:]
)
normalized_production_pct_change.set_index("Year", inplace=True)
# normalized_production_pct_change.reset_index(inplace=True)
# Normalizing the percent change data for price


normalized_price = df_price_pct_change.copy().reset_index()
normalized_price.iloc[:, 1:] = scaler.fit_transform(normalized_price.iloc[:, 1:])
normalized_price_pct_change = df_price_pct_change.copy()
normalized_price_pct_change.iloc[:, 1:] = scaler.fit_transform(
    df_price_pct_change.iloc[:, 1:]
)
normalized_price_pct_change.set_index("Year", inplace=True)


# Create traces for each country's production percent change with adjusted opacity
traces = []
for country in normalized_production_pct_change.columns[
    1:
]:  # Exclude the 'Year' column
    trace = go.Scatter(
        x=normalized_production_pct_change.index,
        y=normalized_production_pct_change[country],
        mode="lines+markers",
        name=f"{country} Production % Change",
        line=dict(dash="solid"),  # Solid lines for production values
        opacity=0.6,  # Adjust opacity here
    )
    traces.append(trace)

# Create a trace for the yearly price percent change, using a secondary y-axis
trace_price = go.Scatter(
    x=normalized_production_pct_change.index,
    y=normalized_price_pct_change["Price"],
    mode="lines+markers",
    name="Price % Change",
    yaxis="y2",
    line=dict(dash="dot", color="red"),  # Dotted line and red color for price values
    opacity=1,  # Full opacity for the price series
)
traces.append(trace_price)

# Create the layout
layout = go.Layout(
    title="Normalized Annual Percent Change: Production Values by Country and Yearly Price Series",
    xaxis=dict(title="Year"),
    yaxis=dict(title="Normalized Production % Change"),
    yaxis2=dict(title="Normalized Price % Change", overlaying="y", side="right"),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
)

# Combine the traces into a figure
fig = go.Figure(data=traces, layout=layout)

# Show the plot
fig.show()


KeyError: 'Year'