# Does Oil Well Production Decrease with Age?

In this notebook, we explore the relationship between the age of an oil well and its output.

In [54]:
import numpy as np
import pandas as pd
import geopandas as gpd
import shapely
import zipfile
import datetime
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
# Opens zip file and creates data directory
# If files get large, will need to check to see if exists, and only replace new ones (or updates)
#zip_ref = zipfile.ZipFile('./data.zip', 'r')
#zip_ref.extractall('./')
#zip_ref.close()

In [3]:
# read up the well data:
WellData_Raw = pd.read_csv("./data/welldata/WellData.csv",low_memory=False)

# get rid of the dup API nummber records by taking first in group
DimWell = WellData_Raw.groupby('API_num').first()

# ditch the wells for which there's no longitude or latitude:
DimWell.dropna(axis=0, how='any', subset=['long','lat'],inplace=True)

In [4]:
def makeapinum(cnty, seq, sidetrack, st='05'):
    APInum = st + '-' + str(cnty).zfill(3) + '-' + str(seq).zfill(5) + '-' + \
                                            str(sidetrack).zfill(2)
    return APInum

In [5]:
# read up the production data
ProductionData_Raw = pd.read_csv("./data/welldata/Production.csv", low_memory=False)

# tack on an API code:
ProductionData_Raw['API_num'] = ProductionData_Raw.apply(lambda c:makeapinum(c.api_county_code, c.api_seq_num, c.sidetrack_num), axis=1)

# sum up the production data for each well:
FactProduction = ProductionData_Raw[['API_num','Prod_days','oil_prod']].groupby('API_num').sum()

# ditch the records for which there's no production data:
FactProduction.dropna(axis=0, how='any',inplace=True)

# join the Well data to the produciton data:
dimcols = ['long','lat', 'formation_code']
oil_df = FactProduction.merge(DimWell[dimcols], left_index=True, right_index=True)

In [6]:
oil_df.head()

Unnamed: 0_level_0,Prod_days,oil_prod,long,lat,formation_code
API_num,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
05-001-05242-00,145.0,473.0,-103.74812,39.85915,DSND
05-001-05289-00,344.0,5336.0,-103.7482,39.86821,JSND
05-001-05299-00,58.0,58.0,-103.74813,39.87002,DSND
05-001-05534-00,360.0,2167.0,-103.811653,39.978286,DSND
05-001-05542-00,34.0,37.0,-103.818583,39.981528,DSND


In [17]:
DimWell.first_prod_date.isnull().sum()

18902

In [18]:
DimWell.complete_date.isnull().sum()

5511

In [23]:
DimWell.complete_date.values[0:10]

array(['1955-03-18 00:00:00', '1951-09-09 00:00:00', '1961-06-14 00:00:00',
       '1960-08-31 00:00:00', '1959-06-03 00:00:00', '1955-07-06 00:00:00',
       '1959-08-30 00:00:00', '1981-12-15 00:00:00', '1955-02-17 00:00:00',
       '1958-09-16 00:00:00'], dtype=object)

In [33]:
zz = pd.to_datetime(DimWell['complete_date'])

AttributeError: 'Series' object has no attribute 'date'

In [31]:
zz.isnull().sum()

5511

In [32]:
zz.min(),zz.max()

(Timestamp('1900-01-01 00:00:00'), Timestamp('2017-12-21 00:00:00'))

In [34]:
len(zz)

73810

In [35]:
zz.head()

API_num
05-001-05010-00   1955-03-18
05-001-05029-00   1951-09-09
05-001-05040-00   1961-06-14
05-001-05041-00   1960-08-31
05-001-05042-00   1959-06-03
Name: complete_date, dtype: datetime64[ns]

In [38]:
zz[zz>date(2017,12,1)]

API_num
05-073-06730-00   2017-12-21
05-123-39626-00   2017-12-08
05-123-39891-00   2017-12-13
Name: complete_date, dtype: datetime64[ns]

In [37]:
from datetime import date

In [39]:
def months_since(date2, date1):
    return(date2.year - date1.year) * 12 + (date2.month - date1.month)


Timestamp('1955-03-18 00:00:00')

In [58]:
xx=[d.date() for d in zz[0:10]]

In [61]:
yy=np.array(xx)
yy

array([datetime.date(1955, 3, 18), datetime.date(1951, 9, 9),
       datetime.date(1961, 6, 14), datetime.date(1960, 8, 31),
       datetime.date(1959, 6, 3), datetime.date(1955, 7, 6),
       datetime.date(1959, 8, 30), datetime.date(1981, 12, 15),
       datetime.date(1955, 2, 17), datetime.date(1958, 9, 16)], dtype=object)

In [63]:
yy.year()

AttributeError: 'numpy.ndarray' object has no attribute 'year'

In [44]:
np.array([123]).squeeze()

array(123)

In [55]:
np.array([x.date() for x in xx])

AttributeError: 'numpy.datetime64' object has no attribute 'date'

In [47]:
type(xx[0])

numpy.datetime64

In [48]:
xx.to_date()

AttributeError: 'numpy.ndarray' object has no attribute 'to_date'

In [52]:
datetime.datetime.now().date()

AttributeError: type object 'datetime.datetime' has no attribute 'datetime'