In [11]:
import os
import geopandas as gpd
from pyproj import Proj, transform
import pandas as pd
from shapely.wkt import loads
from pyproj import Transformer
import csv

os.chdir('/Users/ichittumuri/Desktop/MINES/COGCC-Risk-Analysis/Data')
gdf = gpd.GeoDataFrame(gpd.read_file("ECMC_Flowline_Data_Access/COGCC_Form44_Crude_Oil_Produced_Water_Transfer_Flowlines_Approved_CONFIDENTIAL.gdb"))

In [12]:
gdf.to_csv("crudeOilProduced.csv")

In [17]:
# Read the CSV file into a DataFrame
crudeOilProduced = pd.read_csv("crudeOilProduced.csv")

In [19]:
# Define the source and destination coordinate systems using Transformer from PyProj
transformer = Transformer.from_crs("epsg:26913", "epsg:4326", always_xy=True)

In [20]:
# Open a CSV file for writing the transformed coordinates
with open('crudeOilProduced_transformed_coordinates.csv', 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)
    # Write header
    csvwriter.writerow(['Line', 'Longitude', 'Latitude'])

    # Iterate over each row in the DataFrame
    for index, row in crudeOilProduced.iterrows():
        # Parse the WKT to a Shapely geometry
        multiline = loads(row['geometry'])

        # Check if the geometry is indeed a MultiLineString
        if multiline.geom_type == 'MultiLineString':
            for line in multiline.geoms:
                for point in line.coords:
                    # Perform the coordinate transformation
                    lon, lat = transformer.transform(point[0], point[1])
                    # Write each point's coordinates to the CSV
                    csvwriter.writerow([index + 1, lon, lat])
        else:
            print(f"Geometry at index {index} is not a MultiLineString")

print("Coordinate transformation and CSV export done")

Coordinate transformation and CSV export done


In [21]:
# Read the CSV file into a DataFrame
crudeOilProduced_transformed_coordinates = pd.read_csv("crudeOilProduced_transformed_coordinates.csv")

In [22]:
print(crudeOilProduced.info())  # Descriptive info about the DataFrame
print(crudeOilProduced.shape) 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 106993 entries, 0 to 106992
Data columns (total 10 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   Unnamed: 0    106993 non-null  int64  
 1   Operator      106993 non-null  object 
 2   Fluid         83751 non-null   object 
 3   Material      83233 non-null   object 
 4   Diam_in       83754 non-null   float64
 5   Status        49853 non-null   object 
 6   Length_ft     106993 non-null  float64
 7   Doc_Num       106993 non-null  int64  
 8   SHAPE_Length  106993 non-null  float64
 9   geometry      106993 non-null  object 
dtypes: float64(3), int64(2), object(5)
memory usage: 8.2+ MB
None
(106993, 10)


In [23]:
print(crudeOilProduced_transformed_coordinates.info()) 
print(crudeOilProduced_transformed_coordinates.shape) 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2125829 entries, 0 to 2125828
Data columns (total 3 columns):
 #   Column     Dtype  
---  ------     -----  
 0   Line       int64  
 1   Longitude  float64
 2   Latitude   float64
dtypes: float64(2), int64(1)
memory usage: 48.7 MB
None
(2125829, 3)


In [45]:
crudeOilProduced_transformed_coordinates.head()

Unnamed: 0,Line,Longitude,Latitude
0,1,-107.923534,37.038603
1,1,-107.923501,37.038623
2,1,-107.923383,37.038719
3,1,-107.923301,37.038779
4,1,-107.923249,37.038806


### TEST


In [59]:
# Select the first 5 rows
test_df = crudeOilProduced.iloc[0:5]
# Drop first column 
test_df = test_df.iloc[:, 1:]
# Drop geometry
test_df = test_df.drop(columns=['geometry'])
# reset the index and add 1 to each value
test_df.index = test_df.index + 1
# Reset the index of test_df to make it a column
test_df.reset_index(inplace=True)
# rename index column
test_df.rename(columns={'index': 'Line'}, inplace=True)
test_df


Unnamed: 0,Line,Operator,Fluid,Material,Diam_in,Status,Length_ft,Doc_Num,SHAPE_Length
0,1,SIMCOE LLC,Produced Water,Carbon Steel,2.0,Active,531.01,402902791,161.921176
1,2,SIMCOE LLC,Produced Water,Fiberglass,3.0,Active,1916.17,402870670,584.211077
2,3,SIMCOE LLC,Produced Water,Fiberglass,3.0,,4.56,402852005,1.389643
3,4,SIMCOE LLC,Produced Water,Fiberglass,3.0,Active,5.23,402902791,1.594278
4,5,NOBLE ENERGY INC,Produced Water,Flexsteel,8.625,Active,122.93,403368004,37.456167


In [62]:
# Merge test_df with crudeOilProduced_transformed_coordinates on 'Line'
combined_df = pd.merge(test_df, crudeOilProduced_transformed_coordinates, on='Line', how='left')
combined_df.head()

Unnamed: 0,Line,Operator,Fluid,Material,Diam_in,Status,Length_ft,Doc_Num,SHAPE_Length,Longitude,Latitude
0,1,SIMCOE LLC,Produced Water,Carbon Steel,2.0,Active,531.01,402902791,161.921176,-107.923534,37.038603
1,1,SIMCOE LLC,Produced Water,Carbon Steel,2.0,Active,531.01,402902791,161.921176,-107.923501,37.038623
2,1,SIMCOE LLC,Produced Water,Carbon Steel,2.0,Active,531.01,402902791,161.921176,-107.923383,37.038719
3,1,SIMCOE LLC,Produced Water,Carbon Steel,2.0,Active,531.01,402902791,161.921176,-107.923301,37.038779
4,1,SIMCOE LLC,Produced Water,Carbon Steel,2.0,Active,531.01,402902791,161.921176,-107.923249,37.038806


### Combine original cvs and transformed

In [67]:
# Drop first column 
NEW_crudeOilProduced = crudeOilProduced.iloc[:, 1:]
# Drop geometry
NEW_crudeOilProduced = NEW_crudeOilProduced.drop(columns=['geometry'])
# reset the index and add 1 to each value
NEW_crudeOilProduced.index = NEW_crudeOilProduced.index + 1
# Reset the index of test_df to make it a column
NEW_crudeOilProduced.reset_index(inplace=True)
# rename index column
NEW_crudeOilProduced.rename(columns={'index': 'Line'}, inplace=True)
NEW_crudeOilProduced

Unnamed: 0,Line,Operator,Fluid,Material,Diam_in,Status,Length_ft,Doc_Num,SHAPE_Length
0,1,SIMCOE LLC,Produced Water,Carbon Steel,2.000,Active,531.01,402902791,161.921176
1,2,SIMCOE LLC,Produced Water,Fiberglass,3.000,Active,1916.17,402870670,584.211077
2,3,SIMCOE LLC,Produced Water,Fiberglass,3.000,,4.56,402852005,1.389643
3,4,SIMCOE LLC,Produced Water,Fiberglass,3.000,Active,5.23,402902791,1.594278
4,5,NOBLE ENERGY INC,Produced Water,Flexsteel,8.625,Active,122.93,403368004,37.456167
...,...,...,...,...,...,...,...,...,...
106988,106989,SIMCOE LLC,Produced Water,Fiberglass,3.000,Active,578.71,402902791,176.444916
106989,106990,SIMCOE LLC,Produced Water,Carbon Steel,2.000,Active,3003.09,402870639,915.614526
106990,106991,SIMCOE LLC,Produced Water,Polyethylene,3.000,,1.27,402852005,0.388421
106991,106992,SIMCOE LLC,Produced Water,Fiberglass,4.000,Active,3222.54,402870639,982.492610


In [69]:
# Merge test_df with crudeOilProduced_transformed_coordinates on 'Line'
combined_df = pd.merge(NEW_crudeOilProduced, crudeOilProduced_transformed_coordinates, on='Line', how='left')
combined_df

Unnamed: 0,Line,Operator,Fluid,Material,Diam_in,Status,Length_ft,Doc_Num,SHAPE_Length,Longitude,Latitude
0,1,SIMCOE LLC,Produced Water,Carbon Steel,2.0,Active,531.01,402902791,161.921176,-107.923534,37.038603
1,1,SIMCOE LLC,Produced Water,Carbon Steel,2.0,Active,531.01,402902791,161.921176,-107.923501,37.038623
2,1,SIMCOE LLC,Produced Water,Carbon Steel,2.0,Active,531.01,402902791,161.921176,-107.923383,37.038719
3,1,SIMCOE LLC,Produced Water,Carbon Steel,2.0,Active,531.01,402902791,161.921176,-107.923301,37.038779
4,1,SIMCOE LLC,Produced Water,Carbon Steel,2.0,Active,531.01,402902791,161.921176,-107.923249,37.038806
...,...,...,...,...,...,...,...,...,...,...,...
2125824,106993,ANADARKO WATTENBERG OIL COMPLEX LLC,OIL (RAW),STEEL,8.0,IDLE,549.82,402775410,167.519361,-104.866055,40.186654
2125825,106993,ANADARKO WATTENBERG OIL COMPLEX LLC,OIL (RAW),STEEL,8.0,IDLE,549.82,402775410,167.519361,-104.866056,40.186778
2125826,106993,ANADARKO WATTENBERG OIL COMPLEX LLC,OIL (RAW),STEEL,8.0,IDLE,549.82,402775410,167.519361,-104.866027,40.186778
2125827,106993,ANADARKO WATTENBERG OIL COMPLEX LLC,OIL (RAW),STEEL,8.0,IDLE,549.82,402775410,167.519361,-104.866012,40.186777


In [70]:
#export df into csv
combined_df.to_csv('FINAL_CrudeOilProduced.csv', index=False)