# NSC 325 Sweet Spot Identification
Elijah Flores, Aditya Holla, Jayant Bhaskaruni, Beshoy Shaker, and Ameera Aslam

The University of Texas at Austin

In [240]:
# Import statements
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [241]:
# Read in file
filename = 'sweetspot_UT_Austin.csv'
df = pd.read_csv(filename)

In [242]:
# Size and top 8 rows in dataset
print('Data size:', df.shape)
df.head(n=8)

Data size: (55, 14)


Unnamed: 0,Well Number,Well Name,Bottomhole X,Bottomhole Y,Co [MSTB],Cw (bbl),POROS,KX,KY,TD(MD),Cg (mmcf),FACIES,P_2020-1-6,P_2029-1-1
0,1,PO1_1,5064.04,13746.23,55.1916,0.000161745,0.128029,0.020377,0.020377,8551.7,157.119447,3,3491.971374,2992.403113
1,2,PO1_2,7668.4,13746.23,27.5294,0.000139479,0.101876,0.015088,0.015088,8579.9,69.831076,4,3501.327981,2947.374808
2,3,PO1_3,2459.67,13746.23,48.354,0.000197399,0.11922,0.022396,0.022396,8557.1,134.78194,4,3493.763065,2904.053641
3,4,PO1_4,10272.76,13746.23,0.572057,1.44e-07,0.064132,9.1e-05,9.1e-05,8635.5,1.10121,5,3519.775759,3301.826052
4,5,PO1_5,12877.11,13746.23,0.430394,8.56e-08,0.063078,6.7e-05,6.7e-05,8718.6,0.435085,5,3547.347889,3365.597401
5,6,PO1_6,10272.76,11141.88,0.995444,3.84e-07,0.071007,0.000168,0.000168,8555.5,2.79222,5,3493.232193,3196.74689
6,7,PO1_7,11430.25,11141.88,0.215019,0.603003,0.069382,0.000162,0.000162,8589.1,0.523657,5,3504.380491,3126.40593
7,8,PO1_8,12877.11,11141.88,0.0,1.2446,0.074247,0.00027,0.00027,8638.6,0.0,5,3520.804323,2949.43445


In [243]:
# Print out the amount of missing values
print((df == 0).sum())
df[(df["Co [MSTB]"] == 0) | (df["Cg (mmcf)"] == 0)]

Well Number     0
Well Name       0
Bottomhole X    0
Bottomhole Y    0
Co [MSTB]       5
Cw (bbl)        0
POROS           0
KX              0
KY              0
TD(MD)          0
Cg (mmcf)       5
FACIES          0
P_2020-1-6      0
P_2029-1-1      0
dtype: int64


Unnamed: 0,Well Number,Well Name,Bottomhole X,Bottomhole Y,Co [MSTB],Cw (bbl),POROS,KX,KY,TD(MD),Cg (mmcf),FACIES,P_2020-1-6,P_2029-1-1
7,8,PO1_8,12877.11,11141.88,0.0,1.2446,0.074247,0.00027,0.00027,8638.6,0.0,5,3520.804323,2949.43445
8,9,PO1_9,14034.61,11141.88,0.0,1.30679,0.07338,0.000284,0.000284,8684.3,0.0,5,3535.967335,2954.485548
26,27,PO1_27,14034.61,8537.52,0.0,30.1566,0.116111,0.03313,0.010039,8645.4,0.0,4,3523.060526,2676.168143
27,28,PO1_28,14034.61,5933.16,0.0,37.7757,0.112872,0.012573,0.012573,8647.7,0.0,4,3523.823653,2694.598799
46,47,PO1_47,14034.61,3328.8,0.0,87.5774,0.143755,0.038,0.038,8691.2,0.0,3,3538.256717,2515.127738


In [245]:
# Well number and name are redundant (can use row index) and can be removed.
df.drop(columns=['Well Number', 'Well Name'], inplace=True)
df.head()

Unnamed: 0,Bottomhole X,Bottomhole Y,Co [MSTB],Cw (bbl),POROS,KX,KY,TD(MD),Cg (mmcf),FACIES,P_2020-1-6,P_2029-1-1
0,5064.04,13746.23,55.1916,0.000161745,0.128029,0.020377,0.020377,8551.7,157.119447,3,3491.971374,2992.403113
1,7668.4,13746.23,27.5294,0.000139479,0.101876,0.015088,0.015088,8579.9,69.831076,4,3501.327981,2947.374808
2,2459.67,13746.23,48.354,0.000197399,0.11922,0.022396,0.022396,8557.1,134.78194,4,3493.763065,2904.053641
3,10272.76,13746.23,0.572057,1.44e-07,0.064132,9.1e-05,9.1e-05,8635.5,1.10121,5,3519.775759,3301.826052
4,12877.11,13746.23,0.430394,8.56e-08,0.063078,6.7e-05,6.7e-05,8718.6,0.435085,5,3547.347889,3365.597401


In [246]:
# Rename column headers to be more human-readable
mappings = {
    'Bottomhole X': 'bh_x',         # Bottomhole X-coordinate
    'Bottomhole Y': 'bh_y',         # Bottomhole Y-coordinate
    'Co [MSTB]':'oil_prod_mstb',    # Cumulative oil production, thousand stock tank barrels
    'Cw (bbl)':'water_prod_bbl',    # Cumulative water production, barrels
    'POROS':'porosity',             # Porosity
    'KX':'perm_x',                  # X permeability
    'KY':'perm_y',                  # Y permeability
    'TD(MD)':'total_depth_md',      # Total depth (measured depth)
    'Cg (mmcf)':'gas_prod_mmcf',    # Gas production, million cubic feet
    'FACIES':'facies',              # Facies
    'P_2020-1-6':'past_pressure',   # Pressure on January 6, 2020
    'P_2029-1-1':'future_pressure'  # Predicted pressure on January 1, 2029
}

df.rename(columns=mappings, inplace=True)
df.head()

Unnamed: 0,bh_x,bh_y,oil_prod_mstb,water_prod_bbl,porosity,perm_x,perm_y,total_depth_md,gas_prod_mmcf,facies,past_pressure,future_pressure
0,5064.04,13746.23,55.1916,0.000161745,0.128029,0.020377,0.020377,8551.7,157.119447,3,3491.971374,2992.403113
1,7668.4,13746.23,27.5294,0.000139479,0.101876,0.015088,0.015088,8579.9,69.831076,4,3501.327981,2947.374808
2,2459.67,13746.23,48.354,0.000197399,0.11922,0.022396,0.022396,8557.1,134.78194,4,3493.763065,2904.053641
3,10272.76,13746.23,0.572057,1.44e-07,0.064132,9.1e-05,9.1e-05,8635.5,1.10121,5,3519.775759,3301.826052
4,12877.11,13746.23,0.430394,8.56e-08,0.063078,6.7e-05,6.7e-05,8718.6,0.435085,5,3547.347889,3365.597401
