In [3]:
import os
import geopandas as gpd
from pyproj import Proj, transform
import pandas as pd
from shapely.wkt import loads
from pyproj import Transformer
import csv

os.chdir('/Users/ichittumuri/Desktop/MINES/COGCC-Risk-Analysis/Data')
gdf = gpd.GeoDataFrame(gpd.read_file("ECMC_Flowline_Data_Access/COGCC_Form44_Off_Location_Flowlines_Approved_CONFIDENTIAL.gdb"))

In [4]:
gdf.to_csv("Off_Location.csv")

In [5]:
# Read the CSV file into a DataFrame
Off_Location = pd.read_csv("Off_Location.csv")

In [7]:
# Define the source and destination coordinate systems using Transformer from PyProj
transformer = Transformer.from_crs("epsg:26913", "epsg:4326", always_xy=True)

In [8]:
# Open a CSV file for writing the transformed coordinates
with open('Off_Location_transformed_coordinates.csv', 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)
    # Write header
    csvwriter.writerow(['Line', 'Longitude', 'Latitude'])

    # Iterate over each row in the DataFrame
    for index, row in Off_Location.iterrows():
        # Parse the WKT to a Shapely geometry
        multiline = loads(row['geometry'])

        # Check if the geometry is indeed a MultiLineString
        if multiline.geom_type == 'MultiLineString':
            for line in multiline.geoms:
                for point in line.coords:
                    # Perform the coordinate transformation
                    lon, lat = transformer.transform(point[0], point[1])
                    # Write each point's coordinates to the CSV
                    csvwriter.writerow([index + 1, lon, lat])
        else:
            print(f"Geometry at index {index} is not a MultiLineString")

print("Coordinate transformation and CSV export done")

Coordinate transformation and CSV export done


In [9]:
# Read the CSV file into a DataFrame
Off_Location_transformed_coordinates = pd.read_csv("Off_Location_transformed_coordinates.csv")

In [10]:
print(Off_Location.info())  # Descriptive info about the DataFrame
print(Off_Location.shape) 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 152986 entries, 0 to 152985
Data columns (total 10 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   Unnamed: 0    152986 non-null  int64  
 1   Operator      152986 non-null  object 
 2   Fluid         146654 non-null  object 
 3   Material      143622 non-null  object 
 4   Diam_in       91776 non-null   float64
 5   Status        115544 non-null  object 
 6   Length_ft     152986 non-null  float64
 7   Doc_Num       152986 non-null  int64  
 8   SHAPE_Length  152986 non-null  float64
 9   geometry      152986 non-null  object 
dtypes: float64(3), int64(2), object(5)
memory usage: 11.7+ MB
None
(152986, 10)


In [11]:
print(Off_Location_transformed_coordinates.info()) 
print(Off_Location_transformed_coordinates.shape) 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7433310 entries, 0 to 7433309
Data columns (total 3 columns):
 #   Column     Dtype  
---  ------     -----  
 0   Line       int64  
 1   Longitude  float64
 2   Latitude   float64
dtypes: float64(2), int64(1)
memory usage: 170.1 MB
None
(7433310, 3)


In [12]:
Off_Location_transformed_coordinates.head()

Unnamed: 0,Line,Longitude,Latitude
0,1,-104.722313,37.19994
1,1,-104.722391,37.199734
2,1,-104.722313,37.199535
3,1,-104.722291,37.199483
4,1,-104.722289,37.199479


### TEST


In [13]:
# Select the first 5 rows
test_df = Off_Location.iloc[0:5]
# Drop first column 
test_df = test_df.iloc[:, 1:]
# Drop geometry
test_df = test_df.drop(columns=['geometry'])
# reset the index and add 1 to each value
test_df.index = test_df.index + 1
# Reset the index of test_df to make it a column
test_df.reset_index(inplace=True)
# rename index column
test_df.rename(columns={'index': 'Line'}, inplace=True)
test_df


Unnamed: 0,Line,Operator,Fluid,Material,Diam_in,Status,Length_ft,Doc_Num,SHAPE_Length
0,1,EVERGREEN NATURAL RESOURCES LLC,Gas,polly,4.0,Active,2277.71,403322934,693.972162
1,2,NOBLE ENERGY INC,Multiphase,Carbon Steel,3.0,Abandoned,651.58,402501867,198.525215
2,3,PDC ENERGY INC,,,,Partial Removed see comment,1902.59,402839937,579.687012
3,4,NOBLE ENERGY INC,Gas,Carbon Steel,3.0,Active,205.62,402501867,62.64984
4,5,NOBLE ENERGY INC,Multiphase,Carbon Steel,3.0,Active,2069.9,402263524,630.658768


In [15]:
# Merge test_df with crudeOilProduced_transformed_coordinates on 'Line'
combined_df = pd.merge(test_df, Off_Location_transformed_coordinates, on='Line', how='left')
combined_df.head()

Unnamed: 0,Line,Operator,Fluid,Material,Diam_in,Status,Length_ft,Doc_Num,SHAPE_Length,Longitude,Latitude
0,1,EVERGREEN NATURAL RESOURCES LLC,Gas,polly,4.0,Active,2277.71,403322934,693.972162,-104.722313,37.19994
1,1,EVERGREEN NATURAL RESOURCES LLC,Gas,polly,4.0,Active,2277.71,403322934,693.972162,-104.722391,37.199734
2,1,EVERGREEN NATURAL RESOURCES LLC,Gas,polly,4.0,Active,2277.71,403322934,693.972162,-104.722313,37.199535
3,1,EVERGREEN NATURAL RESOURCES LLC,Gas,polly,4.0,Active,2277.71,403322934,693.972162,-104.722291,37.199483
4,1,EVERGREEN NATURAL RESOURCES LLC,Gas,polly,4.0,Active,2277.71,403322934,693.972162,-104.722289,37.199479


### Combine original cvs and transformed

In [16]:
# Drop first column 
NEW_Off_Location = Off_Location.iloc[:, 1:]
# Drop geometry
NEW_Off_Location = NEW_Off_Location.drop(columns=['geometry'])
# reset the index and add 1 to each value
NEW_Off_Location.index = NEW_Off_Location.index + 1
# Reset the index of test_df to make it a column
NEW_Off_Location.reset_index(inplace=True)
# rename index column
NEW_Off_Location.rename(columns={'index': 'Line'}, inplace=True)
NEW_Off_Location

Unnamed: 0,Line,Operator,Fluid,Material,Diam_in,Status,Length_ft,Doc_Num,SHAPE_Length
0,1,EVERGREEN NATURAL RESOURCES LLC,Gas,polly,4.0,Active,2277.71,403322934,693.972162
1,2,NOBLE ENERGY INC,Multiphase,Carbon Steel,3.0,Abandoned,651.58,402501867,198.525215
2,3,PDC ENERGY INC,,,,Partial Removed see comment,1902.59,402839937,579.687012
3,4,NOBLE ENERGY INC,Gas,Carbon Steel,3.0,Active,205.62,402501867,62.649840
4,5,NOBLE ENERGY INC,Multiphase,Carbon Steel,3.0,Active,2069.90,402263524,630.658768
...,...,...,...,...,...,...,...,...,...
152981,152982,NOBLE ENERGY INC,Multiphase,Carbon Steel,3.0,Active,1981.71,402199760,603.792317
152982,152983,NOBLE ENERGY INC,Multiphase,Carbon Steel,2.0,Active,1205.17,402159842,367.195194
152983,152984,TEP ROCKY MOUNTAIN LLC,Multiphase,Coated Steel,2.0,Active,543.96,402178130,165.847635
152984,152985,TEP ROCKY MOUNTAIN LLC,Multiphase,Coated Steel,2.0,Active,494.62,402585737,150.834519


In [17]:
# Merge test_df with crudeOilProduced_transformed_coordinates on 'Line'
combined_df = pd.merge(NEW_Off_Location, Off_Location_transformed_coordinates, on='Line', how='left')
combined_df

Unnamed: 0,Line,Operator,Fluid,Material,Diam_in,Status,Length_ft,Doc_Num,SHAPE_Length,Longitude,Latitude
0,1,EVERGREEN NATURAL RESOURCES LLC,Gas,polly,4.0,Active,2277.71,403322934,693.972162,-104.722313,37.199940
1,1,EVERGREEN NATURAL RESOURCES LLC,Gas,polly,4.0,Active,2277.71,403322934,693.972162,-104.722391,37.199734
2,1,EVERGREEN NATURAL RESOURCES LLC,Gas,polly,4.0,Active,2277.71,403322934,693.972162,-104.722313,37.199535
3,1,EVERGREEN NATURAL RESOURCES LLC,Gas,polly,4.0,Active,2277.71,403322934,693.972162,-104.722291,37.199483
4,1,EVERGREEN NATURAL RESOURCES LLC,Gas,polly,4.0,Active,2277.71,403322934,693.972162,-104.722289,37.199479
...,...,...,...,...,...,...,...,...,...,...,...
7433305,152985,TEP ROCKY MOUNTAIN LLC,Multiphase,Coated Steel,2.0,Active,494.62,402585737,150.834519,-108.123138,39.495532
7433306,152985,TEP ROCKY MOUNTAIN LLC,Multiphase,Coated Steel,2.0,Active,494.62,402585737,150.834519,-108.123100,39.495529
7433307,152985,TEP ROCKY MOUNTAIN LLC,Multiphase,Coated Steel,2.0,Active,494.62,402585737,150.834519,-108.122990,39.495526
7433308,152986,NOBLE ENERGY INC,Multiphase,Carbon Steel,3.0,Abandoned,792.39,402501867,241.425830,-104.721080,40.359905


In [18]:
#export df into csv
combined_df.to_csv('FINAL_Off_Location.csv', index=False)