### Preprocessing of Cushing, Oklahoma wastewater disposal and seismicity data



In [1]:
# Load relevant packages.
import numpy as np
import h5py
import pandas as pd

### Seismic data 

In [2]:
# Seismic data.
with h5py.File('../datasets_Jul_7/cushing_oklahoma/cushingSeismic.hdf5','r') as hdf:
    ls = list(hdf.keys())
    print("List of columns in this file: \n", ls)
    epoch = np.array(hdf.get('epoch'))
    latitude = np.array(hdf.get('latitude'))
    longitude = np.array(hdf.get('longitude'))    
    depth = np.array(hdf.get('depth'))
    easting = np.array(hdf.get('easting'))
    northing = np.array(hdf.get('northing'))
    magnitude = np.array(hdf.get('magnitude'))

List of columns in this file: 
 ['b_value', 'b_value_epoch', 'depth', 'dpdt', 'easting', 'epoch', 'eventID', 'latitude', 'longitude', 'magnitude', 'northing']


In [3]:
# Set up and write seismic catalog.
seismic = pd.DataFrame({'epoch': epoch,
                        'latitude': latitude,
                        'longitude': longitude,
                        'depth': depth,
                        'easting': easting,
                        'northing': northing,
                        'magnitude': magnitude
                      })

In [4]:
seismic.sample(8)

Unnamed: 0,epoch,latitude,longitude,depth,easting,northing,magnitude
891,1459508000.0,35.9357,-96.75237,3.927,702754.187443,3979151.0,1.84
826,1457266000.0,35.92937,-96.73756,4.615,704106.666266,3978480.0,1.81
207,1412900000.0,35.95173,-96.78008,6.458,700213.66359,3980872.0,1.64
210,1412909000.0,35.94608,-96.78452,3.979,699827.395129,3980237.0,1.3
157,1412702000.0,35.94511,-96.75716,5.0,702298.023893,3980185.0,1.42
473,1425364000.0,35.94862,-96.78442,3.892,699830.016245,3980519.0,1.9
911,1459747000.0,35.93637,-96.75243,4.482,702747.06199,3979226.0,2.57
752,1441245000.0,35.94864,-96.77019,4.832,701113.615629,3980550.0,1.42


In [5]:
seismic.to_csv('../staging/temporal/cushing_oklahoma/seismic.csv', index=None)

### Load well positions, injection data, and pressurization rate data.

In [6]:
# Epoch (s)
t = np.array(pd.read_csv('../datasets_Jul_7/cushing_oklahoma/pressure_table/t.csv', header=None)[0])
print('No. of samples along t = %d'% (len(t)))

# Well easting (m)
x = np.array(pd.read_csv('../datasets_Jul_7/cushing_oklahoma/pressure_table/x.csv', header=None)[0])
print('No. of samples along x = %d'% (len(x)))

# Well northing (m)
y = np.array(pd.read_csv('../datasets_Jul_7/cushing_oklahoma/pressure_table/y.csv', header=None)[0])
print('No. of samples along y = %d'% (len(y)))

# Well depth (m)
z = np.array(pd.read_csv('../datasets_Jul_7/cushing_oklahoma/pressure_table/z.csv', header=None)[0])
print('No. of samples along z = %d'% (len(z)))

No. of samples along t = 25000
No. of samples along x = 1
No. of samples along y = 1
No. of samples along z = 1


It looks like we have only 1 injection well.

In [7]:
# Pressurization rate data.
dpdt = np.array(pd.read_csv('../datasets_Jul_7/cushing_oklahoma/pressure_table/dpdt.csv', header=None)[0])
print('No. of dp/dt samples = %d'% (len(dpdt)))

No. of dp/dt samples = 25000


In [8]:
# Flow rate data
df_flow = pd.read_csv('../datasets_Jul_7/cushing_oklahoma/cushingClusterCenterDPDT.csv')

In [9]:
df_flow.head()

Unnamed: 0,epoch,flow_rate
0,786237800.0,0.0
1,786266900.0,0.0
2,786295900.0,0.0
3,786324900.0,0.0
4,786353900.0,0.0


In [10]:
injection = np.array(df_flow['flow_rate'])

### TO DO: Compute pressures using radial flow model in orion_light.

In [11]:
# Radial flow parameters

viscosity = 1.0       # cD
permeability = 194.5  # mD
storativity = 0.0014 
reservoir_depth = 100 # m

# min_radius = ?

A simple workaround is to compute pressures by integrating dp/dt over t.

In [12]:
from orion_light.pressure_table import PressureTableModel

In [13]:
model = PressureTableModel()

In [14]:
model.load_array(dpdt=dpdt, x=x, y=y, z=z, t=t)

In [15]:
model.table_data

{'dpdt': array([[[[0.        , 0.        , 0.        , ..., 0.01754773,
           0.017566  , 0.01758427]]]]),
 'x': array([0.]),
 'y': array([0.]),
 'z': array([0.]),
 't': array([7.86237840e+08, 7.86266854e+08, 7.86295869e+08, ...,
        1.51202621e+09, 1.51205523e+09, 1.51208424e+09]),
 'pressure': array([[[[      0.        ,       0.        ,       0.        , ...,
           7153543.25126303, 7154052.6511425 , 7154562.58097576]]]])}

Injection pressure is now computed.

### Set up and write injection data catalog.

In [16]:
# Set up injection catalog.
injection_cat = pd.DataFrame({'epoch': t,
                              'injection': injection,
                              'pressure': model.table_data['pressure'].squeeze(),
                              'dpdt': dpdt
                            })

In [17]:
injection_cat.tail(10)

Unnamed: 0,epoch,injection,pressure,dpdt
24990,1511823000.0,0.01742,7149992.0,0.01742
24991,1511852000.0,0.017438,7150498.0,0.017438
24992,1511881000.0,0.017456,7151004.0,0.017456
24993,1511910000.0,0.017475,7151511.0,0.017475
24994,1511939000.0,0.017493,7152018.0,0.017493
24995,1511968000.0,0.017511,7152526.0,0.017511
24996,1511997000.0,0.017529,7153034.0,0.017529
24997,1512026000.0,0.017548,7153543.0,0.017548
24998,1512055000.0,0.017566,7154053.0,0.017566
24999,1512084000.0,0.017584,7154563.0,0.017584


In [18]:
injection_cat.to_csv('../staging/temporal/cushing_oklahoma/injection.csv', index=None)