In [6]:
import camelot
import pandas as pd
from homebrewedFunctions.functions import *
import datetime
# Path to the PDF file
pdf_path = "https://www.dmr.nd.gov/oilgas/stats/historicaloilprodstats.pdf"

# Extract tables from all pages
tables = camelot.read_pdf(pdf_path, pages='all', flavor='lattice')

combine_tables = pd.concat([table.df for table in tables])
header = ["Year", "Month", "BBLS Oil", "Daily Oil", "Wells Producing", "BBLS Per Well", "Daily Oil Per Well"]
combine_tables.rename(columns={col: header[i] for i, col in enumerate(combine_tables.columns)}, inplace = True)
combine_tables["Date"] = pd.to_datetime(combine_tables["Year"].astype(str) + "-" + combine_tables["Month"].astype(str) + "-01")
del combine_tables["Year"]
del combine_tables["Month"]
for key, col in combine_tables.items():
    if key in ["Date"]:
        continue
    combine_tables[key] = pd.to_numeric(combine_tables[key], errors='coerce')



In [33]:
nd_crude_price = "https://www.eia.gov/dnav/pet/hist_xls/F002038__3m.xls"
nd_crude_price = pd.read_excel(nd_crude_price, sheet_name="Data 1", skiprows=2)
nd_crude_price = nd_crude_price.resample("A", on="Date").mean()
nd_crude_price

Unnamed: 0_level_0,North Dakota Crude Oil First Purchase Price (Dollars per Barrel)
Date,Unnamed: 1_level_1
1977-12-31,9.045
1978-12-31,9.718333
1979-12-31,13.368333
1980-12-31,26.435
1981-12-31,35.626667
1982-12-31,31.949167
1983-12-31,29.265
1984-12-31,28.381667
1985-12-31,25.313333
1986-12-31,13.490833


In [8]:
# price_per_barrel = gather_data({"Texas Crude Price": "DCOILWTICO"}, 
#             start = datetime.datetime(1986,1,1),
#             end = datetime.datetime.now(),
#             freq = "A")

In [23]:
mean_tables = combine_tables.set_index("Date").resample("Y").mean()
annual = combine_tables.set_index("Date").resample("Y").sum()
annual["Daily Oil"] = mean_tables["Daily Oil"]
annual["Wells Producing"] = mean_tables["Wells Producing"]
annual["BBLS Per Well"] = annual["BBLS Oil"] / annual["Wells Producing"]
annual["Daily Oil Per Well"] = annual["Daily Oil"] / annual["Wells Producing"]
annual["Price Per Barrel"] = nd_crude_price["North Dakota Crude Oil First Purchase Price (Dollars per Barrel)"]
annual["Approximate Annual Revenue"] = annual["BBLS Oil"] * annual["Price Per Barrel"]

In [31]:
import plotly.express as px
from pandas.api.types import is_numeric_dtype

plot_df = annual.loc["1984":]
last_index = plot_df.index[-1]
# approximate 2024 revenues
names = ["BBLS Oil", "BBLS Per Well", "Approximate Annual Revenue"]
for name in names:
    plot_df.loc[last_index, name] = plot_df.loc[last_index, name] * (12/7)


fig = px.line(plot_df, x=plot_df.index, 
              y=names[-1], 
              title=names[-1])
initial_hovertemplate = f"%{{x}}<br>%{{yaxis.title.text}}: %{{y}}"
fig.update_traces(hovertemplate=initial_hovertemplate)

y_buttons = []
for col in plot_df.columns:
    if is_numeric_dtype(plot_df[col]):
        y_buttons.append(
            dict(
                args=[
                    {"y": [plot_df[col].dropna(axis = 0)]},
                    {"yaxis.title.text": col,
                     "title": col,
                    }                     
                ],
                label=col,
                method="update"
            )
        )
fig.update_layout(
    updatemenus=[
        dict(
            buttons=y_buttons,
            direction="down",
            showactive=True,
            x=0.1,
            xanchor="left",
            y=1.15,
            yanchor="top"
        )
    ]
)

fig.write_html("outputs/NDOilProduction/EstimatedOilRevenue.html")
fig.show()


Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '437581220.57142854' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result

