In [1]:
from PVT import *
from wellDistance import *
from well import *
from report import *

In [2]:
import numpy as np
import pyodbc
import pandas as pd
import math
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns; sns.set()
import matplotlib.ticker as mtick
import os
import errno
from fpdf import FPDF
from PIL import Image
import pickle

%matplotlib inline

## Access the Data with SQL

In [3]:
### Create data with SQL

# Only Select Particular Wells in the Formation we Choose

In [4]:
selectedPlay = "MARCELLUS"
selectedPlay2 = "Marcellus"

df = df[df.PLAY == selectedPlay]
df2 = df2[df2.PLAY == selectedPlay2]

In [5]:
# selectedPlay = "UTICA"
# selectedPlay2 = "Utica"

# df = df[df.PLAY == selectedPlay]
# df2 = df2[df2.PLAY == selectedPlay2]

## Serialize the Data as Binary pkl files

In [6]:
df.to_pickle('df.pkl')
df2.to_pickle('df2.pkl')
df3.to_pickle('df3.pkl')
df4.to_pickle('df4.pkl')
df5.to_pickle('df5.pkl')
reservoirDF.to_pickle('reservoirDF.pkl')
gasAnalysisDF.to_pickle('gasAnalysisDF.pkl')

## Load in Binary Pkl Files

In [7]:
df = pd.read_pickle('df.pkl')
df2 = pd.read_pickle('df2.pkl')
df3 = pd.read_pickle('df3.pkl')
df4 = pd.read_pickle('df4.pkl')
df5 = pd.read_pickle('df5.pkl')
reservoirDF = pd.read_pickle('reservoirDF.pkl')
gasAnalysisDF = pd.read_pickle('gasAnalysisDF.pkl')

## Prepare Data & Impute Nearest Values

In [8]:
#Calculate the Heel of the wellbores
heel_lat_list = []
heel_lon_list = []

for row in df2.iterrows():
    heel_lat, heel_lon = get_heel(row)
    heel_lat_list.append(heel_lat)
    heel_lon_list.append(heel_lon)

df2['heel_lat'] = heel_lat_list
df2['heel_lon'] = heel_lon_list

#Calculate the Midpoint of the wellbores
mid_lat_list = []
mid_lon_list = []

for row in df2.iterrows():
    mid_lat, mid_lon = get_midpoint(row)
    mid_lat_list.append(mid_lat)
    mid_lon_list.append(mid_lon)

df2['mid_lat'] = mid_lat_list
df2['mid_lon'] = mid_lon_list


In [9]:
#Summarize completion dataframe
df4 = df4.groupby('WELLID').agg({
    'STAGE': 'max',
    'FLUID_TOT': 'sum',
    'FRESH_VOLUME': 'sum',
    'TOTAL_SAND': 'sum',
    'PERF_CLUSTERS_CNT': 'sum'
}).reset_index()

In [10]:
#Summarize geology dataframe
df5 = df5.groupby('WELLID').agg({
    'TVD_AVG': 'mean',
    'PEF_AVG': 'mean',
    'GASFILLEDPHI_AVG': 'mean',
    'SUWI_AVG': 'mean',
    'WSM1_AVG': 'mean'
}).reset_index()

In [11]:
#Exclude the days prior to production
df = df[~pd.isna(df['PRODUCTION_DAY_GAS_COUNTER'])]
#Remove all zero producing days

In [12]:
#Impute All the Values

#We will Make use of the Fekete Harmony Datasets
df2 = pd.merge(df2, reservoirDF, how='left', left_on=['WELLID'], right_on=['WELL_KEY'])

#Impute Reservoir DF Values
df2['POROSITY'] = FillValues(df2, 'POROSITY')
df2['INITIAL_GAS_SATURATION'] = FillValues(df2, 'INITIAL_GAS_SATURATION')
df2['INITIAL_RESERVOIR_PRESSURE'] = FillValues(df2, 'INITIAL_RESERVOIR_PRESSURE')
df2['FORMATION_TEMPERATURE'] = FillValues(df2, 'FORMATION_TEMPERATURE')
df2['INITIAL_WATER_SATURATION'] = FillValues(df2, 'INITIAL_WATER_SATURATION')
df2['INITIAL_OIL_SATURATION'] = FillValues(df2, 'INITIAL_OIL_SATURATION')
df2['INITIAL_OIL_SATURATION'] = FillValues(df2, 'INITIAL_OIL_SATURATION')

#Gas Analysis DF
gasAnalysisDF = gasAnalysisDF[['WELL_KEY', 'DATE_TIME', 'GAS_GRAVITY', 'N2', 'CO2', 'H2S', 'C1', 'C2', 'C3']]
df2 = pd.merge(df2, gasAnalysisDF, how='left', left_on=['WELLID'], right_on=['WELL_KEY'])

#Impute Gas Analysis DF Values
df2['GAS_GRAVITY'] = FillValues(df2, 'GAS_GRAVITY')
df2['N2'] = FillValues(df2, 'N2')
df2['CO2'] = FillValues(df2, 'CO2')
df2['H2S'] = FillValues(df2, 'H2S')
df2['C1'] = FillValues(df2, 'C1')
df2['C2'] = FillValues(df2, 'C2')
df2['C3'] = FillValues(df2, 'C3')

#Well Inputs Need to be imputed for gradient

df2 = pd.merge(df2, df3, how='left', left_on=['WELLID'], right_on=['WELLID'])
df2['GRADIENT'] = FillValues(df2, 'GRADIENT')

In [13]:
df = df[df['CASING_PRESSURE_AVG'] > 0]

In [14]:
df.to_pickle('df.pkl')
df2.to_pickle('df2.pkl')
df3.to_pickle('df3.pkl')
df4.to_pickle('df4.pkl')
df5.to_pickle('df5.pkl')
reservoirDF.to_pickle('reservoirDF.pkl')
gasAnalysisDF.to_pickle('gasAnalysisDF.pkl')

## RTA Analysis

### Use Serialized & Pre-processed Data

In [15]:
df = pd.read_pickle('df.pkl')
df2 = pd.read_pickle('df2.pkl')
df3 = pd.read_pickle('df3.pkl')
df4 = pd.read_pickle('df4.pkl')
df5 = pd.read_pickle('df5.pkl')
reservoirDF = pd.read_pickle('reservoirDF.pkl')
gasAnalysisDF = pd.read_pickle('gasAnalysisDF.pkl')

### Create the Well List for Reports that We Want to Generate

In [16]:
wellList = df['FILENUM'].unique()

In [17]:
#List of Specific wells to exclude because of missing information
#This was determined only by attempting to run through every well then evaluating the errors:
excludeList = ['50543','50134', '53069']
wellList = np.setdiff1d(wellList, excludeList)

### Generate the wells

In [18]:
wells = []
for i, well in enumerate(wellList):
    if i % 10 == 0:
        print("Creating well #: ", i, " of ", len(wellList) - 1)
    wells.append(Well(well, df, df2, df3, df4, df5))

Creating well #:  0  of  454


  lintime = (t_n ** 0.5) * q_init / q_n
  dp2 = (nQ - Qc)
  dp1 = (Qc - pQ)
  dpdx = (dpdx1 * dx2 + dpdx2 * dx1) / (dx1 + dx2)
  test = abs((rho - rhoold) / rho)
  Z = 0.27 * pr / rho / tr
  lintime = (t_n ** 0.5) * q_init / q_n


Creating well #:  10  of  454
Creating well #:  20  of  454


  v_t = 1.593 * wellDict['surface_tension'] ** 0.25 * (dens_liq - dens_gas) ** 0.25 / dens_gas ** 0.5


Creating well #:  30  of  454


  bourdetDerivative[i] = (1 / dpdx)


Creating well #:  40  of  454
Creating well #:  50  of  454
Creating well #:  60  of  454
Creating well #:  70  of  454
Creating well #:  80  of  454
Creating well #:  90  of  454
Creating well #:  100  of  454
Creating well #:  110  of  454
Creating well #:  120  of  454
Creating well #:  130  of  454
Creating well #:  140  of  454
Creating well #:  150  of  454
Creating well #:  160  of  454
Creating well #:  170  of  454
Creating well #:  180  of  454
Creating well #:  190  of  454
Creating well #:  200  of  454
Creating well #:  210  of  454
Creating well #:  220  of  454
Creating well #:  230  of  454
Creating well #:  240  of  454
Creating well #:  250  of  454
Creating well #:  260  of  454
Creating well #:  270  of  454
Creating well #:  280  of  454
Creating well #:  290  of  454
Creating well #:  300  of  454
Creating well #:  310  of  454
Creating well #:  320  of  454
Creating well #:  330  of  454
Creating well #:  340  of  454
Creating well #:  350  of  454
Creating well 

In [19]:
#Write pickle to file

with open('marcellus_wells.pkl', 'wb') as f:
    pickle.dump(wells, f)
    
# with open('utica_wells.pkl', 'wb') as f:
#     pickle.dump(wells, f)

### Use Serialized & Pre-processed wells rather than recomputing

In [20]:
#Load in our file

with open('marcellus_wells.pkl', 'rb') as f:
    wells = pickle.load(f)

# with open('utica_wells.pkl', 'rb') as f:
#     wells = pickle.load(f)

# Extract Each Well Dict and Write to a CSV

In [21]:
#Extract Information
wellcsv = pd.DataFrame()

for well in wells:
    wellDict = well.wellDict
    wellDict = pd.Series(well.wellDict).to_frame().T
    wellcsv = wellcsv.append(wellDict, ignore_index = False)
    
#Write to CSV
# wellcsv.to_csv('utica.csv')
wellcsv.to_csv('marcellus.csv')

## Generate All PDFs & Reporting Graphs & Extract calculated Parameters

In [22]:
myReport = Report()

In [23]:
%%capture

for i, well in enumerate(wells):
    if i % 10 == 0:
        print("Creating Report #: ", i, " of ", len(wells) - 1)
    myReport.GeneratePlots(well)