In [3]:
import json
import datetime

import pandas as pd 
# import geopandas as gpd 
import numpy as np
import plotly.express as px 
import plotly

import data_processing as dp
import plot_computations as pc 

In [280]:
df = dp.load_dataset()
df

Unnamed: 0,State,Source,Sector,Unit,Year,BTU
0,Alaska,Aviation gasoline blending components,Industrial,Billion BTU,1960,0.0
1,Alaska,Asphalt and road oil,Industrial,Billion BTU,1960,312.0
2,Alaska,Asphalt and road oil,Total,Billion BTU,1960,312.0
3,Alaska,Aviation gasoline,Transportation,Billion BTU,1960,5209.0
4,Alaska,Aviation gasoline,Total,Billion BTU,1960,5209.0
...,...,...,...,...,...,...
463263,Wyoming,Wood and biomass waste,Total,Billion BTU,2018,4901.0
463264,Wyoming,Waxes,Industrial,Billion BTU,2018,0.0
463265,Wyoming,Wind energy,Commercial,Billion BTU,2018,0.0
463266,Wyoming,Wind energy,Industrial,Billion BTU,2018,0.0


# Creating United States choropleth

In [281]:
total_df = dp.data_subset(df, sources=["Total"], sectors=["Total consumption per capita"])

In [282]:
# gdf = gpd.read_file(r"cb_2018_us_state_20m\cb_2018_us_state_20m.shp")
# gdf.to_crs(epsg=4326)

# fig = px.choropleth_mapbox(total_df, locations="State", color="BTU", geojson=json.loads(gdf.to_json()), featureidkey="properties.NAME", color_continuous_scale=plotly.colors.diverging.Temps, range_color=(0,1000), animation_frame="Year", title="Total energy consumption (million BTU) per capita in the US")
# fig.update_layout(mapbox_style="carto-positron",
#                   mapbox_zoom=2.6, mapbox_center={"lat": 38, "lon": -98})
# fig.show()

# Visualizing energy resource usage in the United States 

In [283]:
total_df = dp.data_subset(df, states=["United States"], sectors=["Total", "Total consumption per capita"])
total_df = total_df[total_df["Source"] == "Total"]
total_df

Unnamed: 0,State,Source,Sector,Unit,Year,BTU
6777,United States,Total,Total,Billion BTU,1960,45033606.0
6778,United States,Total,Total consumption per capita,Million BTU,1960,249.3
14629,United States,Total,Total,Billion BTU,1961,45684153.0
14630,United States,Total,Total consumption per capita,Million BTU,1961,248.7
22481,United States,Total,Total,Billion BTU,1962,47773067.0
...,...,...,...,...,...,...
446490,United States,Total,Total consumption per capita,Million BTU,2016,301.3
454341,United States,Total,Total,Billion BTU,2017,97613024.0
454342,United States,Total,Total consumption per capita,Million BTU,2017,300.4
462193,United States,Total,Total,Billion BTU,2018,101083621.0


In [284]:
total_df = total_df.groupby(["Year", "Sector", "Unit", "Source"], as_index=False).sum()
per_cap_mask = total_df["Sector"] == "Total consumption per capita"
per_cap_df = total_df[per_cap_mask]
total_consumption_df = total_df[~per_cap_mask]

# per_cap_df

In [285]:
total_consumption_df["BTU"] = total_consumption_df["BTU"]/1_000_000
total_consumption_df = total_consumption_df.rename(columns={"BTU": "Quadrillion BTU"})
per_cap_df = per_cap_df.rename(columns={"BTU": "Million BTU"})

marker_size = (per_cap_df["Million BTU"]/per_cap_df["Million BTU"].max())**5
per_cap_unit = per_cap_df["Unit"].unique()[0]

fig = px.line(
    total_consumption_df, 
    x="Year",
    y="Quadrillion BTU", 
#     size=marker_size,
#     color_continuous_scale=px.colors.diverging.RdYlGn[::-1],
    hover_name="Year",
    range_x=[datetime.date(1960, 1, 1), datetime.date(2018, 1, 1)],
    height=500,
    title="Total energy consumption in the United States",
    color_discrete_sequence=["black"]
)
fig.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=58,
                     label="1960",
                     step="year",
                     stepmode="backward"),
                dict(count=48,
                     label="1970",
                     step="year",
                     stepmode="backward"),
                dict(count=38,
                     label="1980",
                     step="year",
                     stepmode="backward"),
                dict(count=28,
                     label="1990",
                     step="year",
                     stepmode="backward"),
                dict(count=18,
                     label="2000",
                     step="year",
                     stepmode="backward"),
                dict(count=8,
                     label="2010",
                     step="year",
                     stepmode="backward"),
            ])
        ),
        type="date"
    )
)
fig.update_xaxes(showspikes=True, showgrid=False)
fig.update_yaxes(showspikes=True, showgrid=False)
fig.update_layout(hovermode="x")
fig.update_layout(
    coloraxis_colorbar=dict(
        title="Million BTU per capita",
    )
)
# fig.add_shape(type="rect",
#     xref="x", yref="y",
#     x0="2000-01-01", y0=0,
#     x1="2010-01-01", y1=100,
#     line=dict(
#         color="RoyalBlue",
#         width=3,
#     ),
#     fillcolor="LightSkyBlue",
# )
fig.add_vrect(
    x0="1960-01-01", x1="1961-01-01",
    fillcolor="LightCoral", opacity=0.5,
    layer="below", line_width=1,
),
fig.add_vrect(
    x0="1961-01-01", x1="1963-01-01",
    fillcolor="SkyBlue", opacity=0.5,
    layer="below", line_width=1,
),
fig.add_vrect(
    x0="1963-01-01", x1="1969-01-01",
    fillcolor="SkyBlue", opacity=0.5,
    layer="below", line_width=1,
),
fig.add_vrect(
    x0="1969-01-01", x1="1974-01-01",
    fillcolor="LightCoral", opacity=0.5,
    layer="below", line_width=1,
),
fig.add_vrect(
    x0="1974-01-01", x1="1977-01-01",
    fillcolor="LightCoral", opacity=0.5,
    layer="below", line_width=1,
),
fig.add_vrect(
    x0="1977-01-01", x1="1981-01-01",
    fillcolor="SkyBlue", opacity=0.5,
    layer="below", line_width=1,
),
fig.add_vrect(
    x0="1981-01-01", x1="1989-01-01",
    fillcolor="LightCoral", opacity=0.5,
    layer="below", line_width=1,
),
fig.add_vrect(
    x0="1989-01-01", x1="1993-01-01",
    fillcolor="LightCoral", opacity=0.5,
    layer="below", line_width=1,
),
fig.add_vrect(
    x0="1993-01-01", x1="2001-01-01",
    fillcolor="SkyBlue", opacity=0.5,
    layer="below", line_width=1,
)
fig.add_vrect(
    x0="2001-01-01", x1="2009-01-01",
    fillcolor="LightCoral", opacity=0.5,
    layer="below", line_width=1,
)
fig.add_vrect(
    x0="2009-01-01", x1="2017-01-01",
    fillcolor="SkyBlue", opacity=0.5,
    layer="below", line_width=1,
)
fig.add_vrect(
    x0="2017-01-01", x1="2018-01-01",
    fillcolor="LightCoral", opacity=0.5,
    layer="below", line_width=1,
)

fig.update_xaxes(
    ticktext=["John F. Kennedy", "Lyndon B. Johnson","Richard Nixon","Gerald Ford","Jimmy Carter","Ronald Reagan","George H.W. Bush","Bill Clinton","George W. Bush","Barack Obama","Donald Trump",],
    tickvals=["1961-01-01", "1963-01-01", "1969-01-01", "1974-01-01", "1977-01-01", "1981-01-01", "1989-01-01", "1993-01-01", "2001-01-01", "2009-01-01", "2017-01-01"],
)
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [286]:
# total_df = dp.data_subset(df, states=["United States"], sources=["Renewable energy", "Fossil fuels", "Nuclear electric power"])
# test_df = total_df[total_df["Source"].isin(["Renewable energy", "Fossil fuels", "Nuclear electric power"])]
# test_df = test_df.groupby("Year", as_index=False).sum()
# print(f"{test_df[test_df['Year'] == '2018']['BTU'][58]:,}")

In [287]:
# total_df = dp.data_subset(df, states=["United States"])
# total_df[total_df["Source"].isin(["Fossil fuels"])]

In [288]:
total_df = dp.data_subset(df, states=["United States"])
total_df[total_df["Source"].isin(["Renewable energy", "Fossil fuels", "Nuclear electric power"])]

Unnamed: 0,State,Source,Sector,Unit,Year,BTU
6682,United States,Fossil fuels,Total,Billion BTU,1960,42083902.0
6731,United States,Nuclear electric power,Total,Billion BTU,1960,6026.0
6755,United States,Renewable energy,Total,Billion BTU,1960,2928205.0
14534,United States,Fossil fuels,Total,Billion BTU,1961,42704559.0
14583,United States,Nuclear electric power,Total,Billion BTU,1961,19678.0
...,...,...,...,...,...,...
454295,United States,Nuclear electric power,Total,Billion BTU,2017,8418968.0
454319,United States,Renewable energy,Total,Billion BTU,2017,11046868.0
462098,United States,Fossil fuels,Total,Billion BTU,2018,81212261.0
462147,United States,Nuclear electric power,Total,Billion BTU,2018,8438068.0


In [289]:
total_df

Unnamed: 0,State,Source,Sector,Unit,Year,BTU
6644,United States,Aviation gasoline blending components,Industrial,Billion BTU,1960,0.0
6645,United States,Asphalt and road oil,Industrial,Billion BTU,1960,733782.0
6646,United States,Asphalt and road oil,Total,Billion BTU,1960,733782.0
6647,United States,Aviation gasoline,Transportation,Billion BTU,1960,297903.0
6648,United States,Aviation gasoline,Total,Billion BTU,1960,297903.0
...,...,...,...,...,...,...
462206,United States,Wood and biomass waste,Total,Billion BTU,2018,2747853.0
462207,United States,Waxes,Industrial,Billion BTU,2018,12414.0
462208,United States,Wind energy,Commercial,Billion BTU,2018,1585.0
462209,United States,Wind energy,Industrial,Billion BTU,2018,882.0


In [290]:
total_df = dp.data_subset(df, states=["United States"], sources=["Renewable energy", "Natural gas (excluding supplemental gaseous fuels)", "Coal", "Nuclear electric power", "All petroleum products - excluding biofuels"])
# test_df = total_df[total_df["Source"].isin(["Renewable energy", "Fossil fuels", "Nuclear electric power"])]
test_df = total_df.groupby(["Year", "Source"], as_index=False).sum()
# print(f"{test_df[test_df['Year'] == '2018']['BTU'][58]:,}")
total_df[total_df["Year"] == 1970]

test_df["Source"] = test_df["Source"].replace(
        {
            "Renewable energy": "Renewables",
            "Natural gas (excluding supplemental gaseous fuels)": "Natural gas",
            "Nuclear electric power": "Nuclear",
            "All petroleum products - excluding biofuels": "Petroleum"
        },
        regex = False
    )

In [291]:
test_df = test_df.rename(columns={"BTU": "Quadrillion BTU"})
test_df["Quadrillion BTU"] = test_df["Quadrillion BTU"]/1_000_000
fig = px.line(test_df, x="Year", y="Quadrillion BTU", color="Source", color_discrete_map={
    "Coal": '#525B76',
    "Nuclear": '#F4A259',
    "Petroleum": '#7A89C2',
    # "Gas": '#F9B5AC',
    "Natural gas": '#EE7674',
    "Renewables": '#9DBF9E',
})
fig.update_xaxes(showspikes=True, showgrid=False, title="President")
fig.update_yaxes(showspikes=True, showgrid=False, title="Quadrillion BTU")
fig.update_layout(hovermode="x")
# fig.add_shape(type="rect",
#     xref="x", yref="y",
#     x0="2000-01-01", y0=0,
#     x1="2010-01-01", y1=100,
#     line=dict(
#         color="RoyalBlue",
#         width=3,
#     ),
#     fillcolor="LightSkyBlue",
# )
fig.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=58,
                     label="1960",
                     step="year",
                     stepmode="backward"),
                dict(count=48,
                     label="1970",
                     step="year",
                     stepmode="backward"),
                dict(count=38,
                     label="1980",
                     step="year",
                     stepmode="backward"),
                dict(count=28,
                     label="1990",
                     step="year",
                     stepmode="backward"),
                dict(count=18,
                     label="2000",
                     step="year",
                     stepmode="backward"),
                dict(count=8,
                     label="2010",
                     step="year",
                     stepmode="backward"),
            ])
        ),
        type="date"
    )
)
fig.add_vrect(
    x0="1960-01-01", x1="1961-01-01",
    fillcolor="LightCoral", opacity=0.5,
    layer="below", line_width=1,
),
fig.add_vrect(
    x0="1961-01-01", x1="1963-01-01",
    fillcolor="SkyBlue", opacity=0.5,
    layer="below", line_width=1,
),
fig.add_vrect(
    x0="1963-01-01", x1="1969-01-01",
    fillcolor="SkyBlue", opacity=0.5,
    layer="below", line_width=1,
),
fig.add_vrect(
    x0="1969-01-01", x1="1974-01-01",
    fillcolor="LightCoral", opacity=0.5,
    layer="below", line_width=1,
),
fig.add_vrect(
    x0="1974-01-01", x1="1977-01-01",
    fillcolor="LightCoral", opacity=0.5,
    layer="below", line_width=1,
),
fig.add_vrect(
    x0="1977-01-01", x1="1981-01-01",
    fillcolor="SkyBlue", opacity=0.5,
    layer="below", line_width=1,
),
fig.add_vrect(
    x0="1981-01-01", x1="1989-01-01",
    fillcolor="LightCoral", opacity=0.5,
    layer="below", line_width=1,
),
fig.add_vrect(
    x0="1989-01-01", x1="1993-01-01",
    fillcolor="LightCoral", opacity=0.5,
    layer="below", line_width=1,
),
fig.add_vrect(
    x0="1993-01-01", x1="2001-01-01",
    fillcolor="SkyBlue", opacity=0.5,
    layer="below", line_width=1,
)
fig.add_vrect(
    x0="2001-01-01", x1="2009-01-01",
    fillcolor="LightCoral", opacity=0.5,
    layer="below", line_width=1,
)
fig.add_vrect(
    x0="2009-01-01", x1="2017-01-01",
    fillcolor="SkyBlue", opacity=0.5,
    layer="below", line_width=1,
)
fig.add_vrect(
    x0="2017-01-01", x1="2018-01-01",
    fillcolor="LightCoral", opacity=0.5,
    layer="below", line_width=1,
)

fig.update_xaxes(
    ticktext=["John F. Kennedy", "Lyndon B. Johnson","Richard Nixon","Gerald Ford","Jimmy Carter","Ronald Reagan","George H.W. Bush","Bill Clinton","George W. Bush","Barack Obama","Donald Trump",],
    tickvals=["1961-01-01", "1963-01-01", "1969-01-01", "1974-01-01", "1977-01-01", "1981-01-01", "1989-01-01", "1993-01-01", "2001-01-01", "2009-01-01", "2017-01-01"],
)
fig.show()

In [292]:
z = test_df.groupby("Year", as_index=False).sum()
fig = px.line(z, x="Year", y="BTU")
fig.show()

ValueError: Value of 'y' is not the name of a column in 'data_frame'. Expected one of ['Year', 'Quadrillion BTU'] but received: BTU

In [None]:
pc.plot_us_total(df, dp.load_primary_energy_sources(df))

# df = dp.load_primary_energy_sources(df)
# df = df[(df["State"] == "United States") & (df["Sector"] == "Total")]
# test_df = df.groupby(["Year"], as_index=False).sum()
# fig = px.line(test_df, x="Year", y="BTU")
# fig.show()

In [None]:
b

In [None]:
figs = {}
for year in df["Year"].unique():
    figs[year] = px.bar(test_df[test_df["Year"] == year], x="Source", y="BTU")
    print(year)
    

In [None]:
pc.plot_us_total(df, dp.load_primary_energy_sources(df))

In [21]:
df = dp.load_dataset()
primary_df = dp.load_primary_energy_sources(df)
primary_df = dp.data_subset(primary_df, sectors=["Total"])
per_cap_df = dp.data_subset(df, states = [state for state in df["State"].unique() if state != "United States"], sources=["Total"], sectors=["Total consumption per capita"])
combined_df = pd.concat([primary_df, per_cap_df])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Source"] = df["Source"].replace(


Unnamed: 0,State,Source,Sector,Unit,Year,BTU
17,Alaska,Coal,Total,Billion BTU,1960,7189.0
86,Alaska,Natural gas,Total,Billion BTU,1960,2034.0
87,Alaska,Nuclear,Total,Billion BTU,1960,0.0
101,Alaska,Petroleum,Total,Billion BTU,1960,45410.0
111,Alaska,Renewables,Total,Billion BTU,1960,6800.0
134,Alaska,Total,Total consumption per capita,Million BTU,1960,268.3


In [25]:
primary_df.groupby(["State", "Year"]).sum()


Unnamed: 0_level_0,Unnamed: 1_level_0,BTU
State,Year,Unnamed: 2_level_1
Alabama,1960,934925.0
Alabama,1961,924371.0
Alabama,1962,993378.0
Alabama,1963,1018286.0
Alabama,1964,1083954.0
...,...,...
Wyoming,2014,861315.0
Wyoming,2015,826923.0
Wyoming,2016,804279.0
Wyoming,2017,834337.0
