In [1]:
# ignore warnings
import warnings
warnings.filterwarnings("ignore")

# Wrangling
from acquire_prepare import acquire_oil
import pandas as pd

In [2]:
df = acquire_oil()

In [3]:
df.shape

(17156, 33)

In [4]:
df.columns

Index(['API14', 'Lease Name', 'Well Number', 'County', 'Type', 'Status',
       'Oper', 'Multi Well Lease', 'MajorPhase', 'Formation', 'Proppant PPF',
       'Prod Method', 'Frac Fluid gpf', 'Lateral Len', 'Frac Stages',
       'Frac Fluid Type', 'First Prod', 'Last Prod', 'Oil EUR', 'Gas EUR',
       'Oil Gravity', 'Peak BOEPD', 'Oil Hist', 'Gas Hist', 'GOR Hist',
       'IP90 BOEQPD', 'Landing Depth', 'Sur Lat', 'Sur Long', 'Well Id',
       'Mid Point Lat', 'Mid Point Long', 'Sub_Basin'],
      dtype='object')

#### Create recovery column

In [5]:
df['recovery'] = df['Oil EUR'] + df['Gas EUR']/6

#### Filter for recovery over 700

In [6]:
peak_df = df[df['recovery'] > 700]
peak_df.shape

(575, 34)

In [7]:
peak_df['Type'].value_counts()

Horizontal    570
Vertical        4
Other           1
Name: Type, dtype: int64

#### Filter for vertical wells

In [8]:
vertical = peak_df[peak_df['Type'] == 'Vertical']
vertical.shape

(4, 34)

#### Calculate means for recovery and lateral length

In [9]:
vert_rec_mean = vertical['recovery'].mean()
vert_rec_mean

492109.2526

In [10]:
vert_lat_len_mean = vertical['Lateral Len'].mean()
vert_lat_len_mean

289.5

In [11]:
vert_ratio = vert_rec_mean / vert_lat_len_mean
vert_ratio

1699.8592490500864

#### Filter for 14th lateral_class

In [12]:
# lateral_class are binned lengths per 1000 feet
labels=['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen']

df['lateral_class'] = pd.cut(df['Lateral Len'], [0, 1000, 2000, 3000, 4000, 5000, 6000, 7000,  8000, 9000, 10000, 11000, 12000, 13000, 14000], include_lowest=True)

In [13]:
df['lateral_class'].value_counts()

(-0.001, 1000.0]      5983
(7000.0, 8000.0]      2007
(4000.0, 5000.0]      1801
(1000.0, 2000.0]      1791
(2000.0, 3000.0]      1241
(3000.0, 4000.0]      1027
(9000.0, 10000.0]      801
(6000.0, 7000.0]       773
(5000.0, 6000.0]       564
(8000.0, 9000.0]       490
(10000.0, 11000.0]     473
(11000.0, 12000.0]      36
(12000.0, 13000.0]      18
(13000.0, 14000.0]       8
Name: lateral_class, dtype: int64

In [14]:
df[df['Lateral Len'] > 14000.0].shape

(36, 35)

In [15]:
fourteenth = peak_df[peak_df['Lateral Len'] > 14000.0]
fourteenth.shape

(0, 34)

#### Calculate means for recovery and lateral length

In [16]:
long_rec_mean = fourteenth['recovery'].mean()
long_rec_mean

nan

In [17]:
long_lat_len_mean = fourteenth['Lateral Len'].mean()
long_lat_len_mean

nan

In [18]:
fourteenth_ratio = long_rec_mean / long_lat_len_mean
fourteenth_ratio

nan

In [22]:
print('ratio of recovery over lateral length in vertical wells...................:', vert_ratio)
print('ratio of recovery over lateral length in the wells with the longest length:', fourteenth_ratio)

ratio of recovery over lateral length in vertical wells...................: 1699.8592490500864
ratio of recovery over lateral length in the wells with the longest length: nan
