In [35]:
import pandas as pd
import geopandas as gpd
from sqlalchemy import create_engine

import os 
# environment variables for postgresql connection


# Create a connection to the PostgreSQL database
engine = create_engine('postgresql://postgres:MandyLinkToby3@192.168.0.74:5432/texas_oil')

# Load the datasets
well_data = pd.read_sql("SELECT * FROM well_data", engine)
well_shapes = gpd.read_postgis("SELECT * FROM well_shapes", engine, geom_col='geometry')
orphaned_wells_shape = gpd.read_postgis("SELECT * FROM orphaned_wells_shape", engine, geom_col='geom')
well_inspections = pd.read_sql("SELECT * FROM well_inspections", engine)
well_violations = pd.read_sql("SELECT * FROM well_violations", engine)


In [36]:
# Standardize API numbers to 8 digits in orphaned_wells_shape
orphaned_wells_shape['api_no'] = orphaned_wells_shape['well_ident'].str[5:13]

# Confirm the changes
print(orphaned_wells_shape['api_no'].str.len().value_counts())


api_no
8    5854
Name: count, dtype: int64


In [37]:
# Merge well_data with well_shapes
well_data_merged = well_data.merge(well_shapes, left_on='api_num', right_on='api')

# Confirm the merge
print(well_data_merged.shape)


(953232, 24)


In [38]:
# Merge orphaned wells with well data
orphan_wells_detailed = orphaned_wells_shape.merge(well_data_merged, left_on='api_no', right_on='api_num')

# Confirm the merge
print(orphan_wells_detailed.shape)


(95, 50)


In [39]:
# Convert api_well_id to string in well_inspections and well_violations
well_inspections['api_well_id'] = well_inspections['api_well_id'].astype(str).str.zfill(8)
well_violations['api_well_id'] = well_violations['api_well_id'].astype(str).str.zfill(8)


In [40]:
# Check columns of all DataFrames before merging
print(orphan_wells_detailed.columns)
print(well_inspections.columns)
print(well_violations.columns)


Index(['gid', 'well_ident', 'state', 'county', 'well_name', 'well_numbe',
       'type', 'status', 'latitude', 'longitude', 'prime_meri', 'township',
       't_dir', 'range', 'r_dir', 'section_x', 'qtr', 'qtr_qtr', 'qtr_qtr_qt',
       'source', 'data_file', 'well_info', 'location_n', 'other_note', 'geom',
       'api_no', 'quad_num', 'api_num', 'survey', 'block', 'section_y',
       'abstract', 'operator', 'total_depth', 'well_number', 'lease_name',
       'permit_num', 'gas_rrcid', 'oil_lease_num', 'filler_1', 'field_name',
       'completion_date', 'plug_date', 'refer_to_api', 'on_off_schedule',
       'oil_gas_code', 'filler_2', 'api', 'geometry', 'properties'],
      dtype='object')
Index(['id', 'inspection_id', 'api_well_id'], dtype='object')
Index(['id', 'violation_id', 'api_well_id'], dtype='object')


In [41]:
# Merge orphan_wells_detailed with inspection data
orphan_wells_full = orphan_wells_detailed.merge(well_inspections, left_on='api_no', right_on='api_well_id', how='left', suffixes=('', '_inspection'))

# Confirm columns after first merge
print(orphan_wells_full.columns)

# Merge orphan_wells_full with violation data
orphan_wells_full = orphan_wells_full.merge(well_violations, left_on='api_no', right_on='api_well_id', how='left', suffixes=('', '_violation'))

# Confirm columns after second merge
print(orphan_wells_full.columns)


Index(['gid', 'well_ident', 'state', 'county', 'well_name', 'well_numbe',
       'type', 'status', 'latitude', 'longitude', 'prime_meri', 'township',
       't_dir', 'range', 'r_dir', 'section_x', 'qtr', 'qtr_qtr', 'qtr_qtr_qt',
       'source', 'data_file', 'well_info', 'location_n', 'other_note', 'geom',
       'api_no', 'quad_num', 'api_num', 'survey', 'block', 'section_y',
       'abstract', 'operator', 'total_depth', 'well_number', 'lease_name',
       'permit_num', 'gas_rrcid', 'oil_lease_num', 'filler_1', 'field_name',
       'completion_date', 'plug_date', 'refer_to_api', 'on_off_schedule',
       'oil_gas_code', 'filler_2', 'api', 'geometry', 'properties', 'id',
       'inspection_id', 'api_well_id'],
      dtype='object')
Index(['gid', 'well_ident', 'state', 'county', 'well_name', 'well_numbe',
       'type', 'status', 'latitude', 'longitude', 'prime_meri', 'township',
       't_dir', 'range', 'r_dir', 'section_x', 'qtr', 'qtr_qtr', 'qtr_qtr_qt',
       'source', 'data_file',

In [42]:
# Count of orphan wells by operator
orphan_well_count_by_operator = orphan_wells_full['operator'].value_counts()
print(orphan_well_count_by_operator)

# Distribution of plug dates
plug_date_distribution = orphan_wells_full['plug_date'].value_counts().sort_index()
print(plug_date_distribution)

# Latest inspection and violation dates
if 'inspection_date' in orphan_wells_full.columns:
    latest_inspection_date = orphan_wells_full['inspection_date'].max()
else:
    latest_inspection_date = "No inspection_date column found"
    
if 'violation_date' in orphan_wells_full.columns:
    latest_violation_date = orphan_wells_full['violation_date'].max()
else:
    latest_violation_date = "No violation_date column found"

print(f"Latest inspection date: {latest_inspection_date}")
print(f"Latest violation date: {latest_violation_date}")


operator
P1 ENERGY, LLC                      14
TEXACO INC.                         13
LARGE OPERATING LLC                  8
APACHE CORPORATION                   6
3-T EXPLORATION, INC.                4
REPUBLIC MINERAL CORPORATION         3
AMOCO PRODUCTION COMPANY             3
WEAVER, W. CARLTON                   3
LIVE OAK/OPERATIONS, INC.            2
INDEPENDENCE RESOURCES MGMT, LLC     2
BASA RESOURCES, INC.                 2
RILEY, BILLY OPERATING ACCOUNT       2
CHEVRON U. S. A. INC.                2
HARTOIL CORPORATION                  2
SIEGFRIED, R. H., INC.               2
COMSTOCK OIL & GAS, LLC              2
MEADCO PROPERTIES                    1
ANADARKO PETROLEUM CORP.             1
ARES ENERGY, LTD.                    1
JONCO                                1
SINGLETON, W.                        1
O-CAL ENERGY                         1
EXXON CORP.                          1
VENUS OIL COMPANY                    1
REMORA OPERATING, LLC                1
KEYSTONE PETROLE

In [45]:
import matplotlib.pyplot as plt
import geopandas as gpd

# Ensure the GeoDataFrame is correctly formatted
orphan_wells_gdf = gpd.GeoDataFrame(orphan_wells_full, geometry='geometry')

# Plotting with enhancements
fig, ax = plt.subplots(1, 1, figsize=(15, 10))
orphan_wells_gdf.plot(column='operator', legend=True, ax=ax, legend_kwds={'bbox_to_anchor': (1, 1), 'loc': 'upper left'})

# Add title and labels
plt.title('Orphan Wells in Texas by Operator', fontsize=20)
plt.xlabel('Longitude', fontsize=15)
plt.ylabel('Latitude', fontsize=15)

# Improve legend
leg = ax.get_legend()
leg.set_bbox_to_anchor((1, 0.5))
leg.set_title('Operators', prop={'size': 15})
for lh in leg.legendHandles:
    lh.set_alpha(1)
    lh._sizes = [100]

plt.show()


  for lh in leg.legendHandles:


KeyboardInterrupt: 

In [46]:
# Filter for wells that have not been plugged
unplugged_wells = orphan_wells_detailed[orphan_wells_detailed['plug_date'].isna()]

# Confirm the filter
print(unplugged_wells.shape)


(46, 50)


In [47]:
# Convert api_well_id to string in well_inspections and well_violations
well_inspections['api_well_id'] = well_inspections['api_well_id'].astype(str).str.zfill(8)
well_violations['api_well_id'] = well_violations['api_well_id'].astype(str).str.zfill(8)


In [48]:
# Check columns of all DataFrames before merging
print(unplugged_wells.columns)
print(well_inspections.columns)
print(well_violations.columns)


Index(['gid', 'well_ident', 'state', 'county', 'well_name', 'well_numbe',
       'type', 'status', 'latitude', 'longitude', 'prime_meri', 'township',
       't_dir', 'range', 'r_dir', 'section_x', 'qtr', 'qtr_qtr', 'qtr_qtr_qt',
       'source', 'data_file', 'well_info', 'location_n', 'other_note', 'geom',
       'api_no', 'quad_num', 'api_num', 'survey', 'block', 'section_y',
       'abstract', 'operator', 'total_depth', 'well_number', 'lease_name',
       'permit_num', 'gas_rrcid', 'oil_lease_num', 'filler_1', 'field_name',
       'completion_date', 'plug_date', 'refer_to_api', 'on_off_schedule',
       'oil_gas_code', 'filler_2', 'api', 'geometry', 'properties'],
      dtype='object')
Index(['id', 'inspection_id', 'api_well_id'], dtype='object')
Index(['id', 'violation_id', 'api_well_id'], dtype='object')


In [49]:
# Ensure the api_well_id columns are properly named
well_inspections = well_inspections.rename(columns={'api_well_id': 'api_no'})
well_violations = well_violations.rename(columns={'api_well_id': 'api_no'})


In [50]:
# Merge unplugged_wells with inspection data
unplugged_wells_full = unplugged_wells.merge(well_inspections, on='api_no', how='left', suffixes=('', '_inspection'))

# Confirm columns after first merge
print(unplugged_wells_full.columns)

# Merge unplugged_wells_full with violation data
unplugged_wells_full = unplugged_wells_full.merge(well_violations, on='api_no', how='left', suffixes=('', '_violation'))

# Confirm columns after second merge
print(unplugged_wells_full.columns)


Index(['gid', 'well_ident', 'state', 'county', 'well_name', 'well_numbe',
       'type', 'status', 'latitude', 'longitude', 'prime_meri', 'township',
       't_dir', 'range', 'r_dir', 'section_x', 'qtr', 'qtr_qtr', 'qtr_qtr_qt',
       'source', 'data_file', 'well_info', 'location_n', 'other_note', 'geom',
       'api_no', 'quad_num', 'api_num', 'survey', 'block', 'section_y',
       'abstract', 'operator', 'total_depth', 'well_number', 'lease_name',
       'permit_num', 'gas_rrcid', 'oil_lease_num', 'filler_1', 'field_name',
       'completion_date', 'plug_date', 'refer_to_api', 'on_off_schedule',
       'oil_gas_code', 'filler_2', 'api', 'geometry', 'properties', 'id',
       'inspection_id'],
      dtype='object')
Index(['gid', 'well_ident', 'state', 'county', 'well_name', 'well_numbe',
       'type', 'status', 'latitude', 'longitude', 'prime_meri', 'township',
       't_dir', 'range', 'r_dir', 'section_x', 'qtr', 'qtr_qtr', 'qtr_qtr_qt',
       'source', 'data_file', 'well_info', '

In [51]:
# Count of unplugged orphan wells by operator
unplugged_well_count_by_operator = unplugged_wells_full['operator'].value_counts()
print(unplugged_well_count_by_operator)

# Latest inspection and violation dates
if 'inspection_date' in unplugged_wells_full.columns:
    latest_inspection_date = unplugged_wells_full['inspection_date'].max()
else:
    latest_inspection_date = "No inspection_date column found"
    
if 'violation_date' in unplugged_wells_full.columns:
    latest_violation_date = unplugged_wells_full['violation_date'].max()
else:
    latest_violation_date = "No violation_date column found"

print(f"Latest inspection date: {latest_inspection_date}")
print(f"Latest violation date: {latest_violation_date}")


operator
P1 ENERGY, LLC                      14
TEXACO INC.                          6
REPUBLIC MINERAL CORPORATION         3
APACHE CORPORATION                   3
COMSTOCK OIL & GAS, LLC              2
SIEGFRIED, R. H., INC.               2
HARTOIL CORPORATION                  2
INDEPENDENCE RESOURCES MGMT, LLC     2
KEYSTONE PETROLEUM NM LLC            1
O-CAL ENERGY                         1
SINGLETON, W.                        1
JONCO                                1
BASA RESOURCES, INC.                 1
ARES ENERGY, LTD.                    1
SHELBY OPERATING COMPANY             1
HIGH DESERT RESOURCES, L.L.C.        1
ATLAS OPERATING LLC                  1
VALENCE OPERATING COMPANY            1
ENCANA OIL & GAS(USA) INC.           1
REMORA OPERATING, LLC                1
Name: count, dtype: int64
Latest inspection date: No inspection_date column found
Latest violation date: No violation_date column found
