In [19]:
import os
import pandas as pd
from pandas import Series, DataFrame
import country_converter as coco
import warnings
warnings.filterwarnings("ignore")

Data Sources:
- Cameron Roberts data collection and compiling

In [20]:
## Get current working directory
print("Current working directory:", os.getcwd())

## Find path to raw data (for the raw files)
raw_data_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'raw_data'))

## Find path to folder for saving cleaned csv
cleaned_data_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'cleaned_data'))

Current working directory: /Users/jennagreene/Documents/GitHub/HATCH_data/reading_files


In [21]:
# Find and read the CSV file from the raw_data folder
target_file = 'Integrated_Pipeline_Data.xlsx'
target_file_path = os.path.join(raw_data_path, target_file)
print(target_file_path)

/Users/jennagreene/Documents/GitHub/HATCH_data/raw_data/Integrated_Pipeline_Data.xlsx


In [22]:
pipelines_df = pd.read_excel(target_file_path, sheet_name= 'Pipeline.Data')
pipelines_df

Unnamed: 0,Fuel,Year,Country,Cumulative.or.Annual,Measure,Value
0,Gas,1904.0,Afghanistan,Annual,Capacity,0.0
1,Gas,1905.0,Afghanistan,Annual,Capacity,0.0
2,Gas,1906.0,Afghanistan,Annual,Capacity,0.0
3,Gas,1907.0,Afghanistan,Annual,Capacity,0.0
4,Gas,1908.0,Afghanistan,Annual,Capacity,0.0
...,...,...,...,...,...,...
218377,Oil,2019.0,Zambia,Cumulative,Length,0.0
218378,Oil,2020.0,Zambia,Cumulative,Length,0.0
218379,Oil,2021.0,Zambia,Cumulative,Length,0.0
218380,Oil,2022.0,Zambia,Cumulative,Length,0.0


Filter to only include length of pipeline 

In [23]:

# Filtering
pipelines_df = pipelines_df[(pipelines_df['Measure'] == 'Length') & 
                            (pipelines_df['Cumulative.or.Annual'] == 'Cumulative')]

pipelines_df

Unnamed: 0,Fuel,Year,Country,Cumulative.or.Annual,Measure,Value
600,Gas,1904.0,Afghanistan,Cumulative,Length,0.0
601,Gas,1905.0,Afghanistan,Cumulative,Length,0.0
602,Gas,1906.0,Afghanistan,Cumulative,Length,0.0
603,Gas,1907.0,Afghanistan,Cumulative,Length,0.0
604,Gas,1908.0,Afghanistan,Cumulative,Length,0.0
...,...,...,...,...,...,...
218377,Oil,2019.0,Zambia,Cumulative,Length,0.0
218378,Oil,2020.0,Zambia,Cumulative,Length,0.0
218379,Oil,2021.0,Zambia,Cumulative,Length,0.0
218380,Oil,2022.0,Zambia,Cumulative,Length,0.0


In [24]:
# Create separate dataframes for different types of fuel

gas_df = pipelines_df[(pipelines_df['Fuel'] == 'Gas')]


In [25]:
# Create separate dataframes for different types of fuel

oil_df = pipelines_df[(pipelines_df['Fuel'] == 'Oil')]


In [26]:
co2_pipelines_df = pd.read_excel(target_file_path, sheet_name= 'CO2 Pipelines Length Tally')


In [27]:
gas_df_pivot = gas_df.pivot_table(index = 'Country', columns = 'Year', values = 'Value', fill_value = 0)
gas_df_pivot.columns = gas_df_pivot.columns.astype(int)
gas_df_pivot = gas_df_pivot.reset_index()

gas_df_pivot['Country Code'] = coco.convert(names=gas_df_pivot['Country'], to='iso2')
gas_df_pivot = gas_df_pivot[gas_df_pivot['Country Code'] != 'not found']



Joint Petroleum Development Area not found in regex
TÃ¼rkiye not found in regex
World not found in regex


In [28]:
gas_df_pivot['Technology Name'] = 'Natural Gas Pipeline'
gas_df_pivot['Data Source'] = 'Roberts et al (in preparation)'
gas_df_pivot['Unit'] = 'km'
gas_df_pivot['Metric'] = 'Total Length'
gas_df_pivot['Spatial Scale'] = 'National'
gas_df_pivot['ID'] = gas_df_pivot['Technology Name'] + '_' + gas_df_pivot['Metric'] + '_' + gas_df_pivot['Country Code']
gas_df_pivot = gas_df_pivot.rename(columns = {'Country': 'Country Name'})
gas_df_pivot.set_index('ID', inplace=False, drop = False)

Year,Country Name,1904,1905,1906,1907,1908,1909,1910,1911,1912,...,2021,2022,2023,Country Code,Technology Name,Data Source,Unit,Metric,Spatial Scale,ID
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Natural Gas Pipeline_Total Length_AF,Afghanistan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.00,AF,Natural Gas Pipeline,Roberts et al (in preparation),km,Total Length,National,Natural Gas Pipeline_Total Length_AF
Natural Gas Pipeline_Total Length_AL,Albania,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.00,AL,Natural Gas Pipeline,Roberts et al (in preparation),km,Total Length,National,Natural Gas Pipeline_Total Length_AL
Natural Gas Pipeline_Total Length_DZ,Algeria,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,9436.00,9545.00,9545.00,DZ,Natural Gas Pipeline,Roberts et al (in preparation),km,Total Length,National,Natural Gas Pipeline_Total Length_DZ
Natural Gas Pipeline_Total Length_AO,Angola,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,500.00,500.00,500.00,AO,Natural Gas Pipeline,Roberts et al (in preparation),km,Total Length,National,Natural Gas Pipeline_Total Length_AO
Natural Gas Pipeline_Total Length_AR,Argentina,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,14243.40,14243.40,14243.40,AR,Natural Gas Pipeline,Roberts et al (in preparation),km,Total Length,National,Natural Gas Pipeline_Total Length_AR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Natural Gas Pipeline_Total Length_UZ,Uzbekistan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,827.00,852.00,992.00,UZ,Natural Gas Pipeline,Roberts et al (in preparation),km,Total Length,National,Natural Gas Pipeline_Total Length_UZ
Natural Gas Pipeline_Total Length_VE,Venezuela,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2721.00,2721.00,2721.00,VE,Natural Gas Pipeline,Roberts et al (in preparation),km,Total Length,National,Natural Gas Pipeline_Total Length_VE
Natural Gas Pipeline_Total Length_VN,Vietnam,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,882.43,1036.43,1036.43,VN,Natural Gas Pipeline,Roberts et al (in preparation),km,Total Length,National,Natural Gas Pipeline_Total Length_VN
Natural Gas Pipeline_Total Length_YE,Yemen,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.00,YE,Natural Gas Pipeline,Roberts et al (in preparation),km,Total Length,National,Natural Gas Pipeline_Total Length_YE


In [29]:
oil_df_pivot = oil_df.pivot_table(index = 'Country', columns = 'Year', values = 'Value', fill_value = 0)

oil_df_pivot.columns = oil_df_pivot.columns.astype(int)
oil_df_pivot = oil_df_pivot.reset_index()

oil_df_pivot['Country Code'] = coco.convert(names=oil_df_pivot['Country'], to='iso2')
oil_df_pivot = oil_df_pivot[oil_df_pivot['Country Code'] != 'not found']



Joint Petroleum Development Area not found in regex
TÃ¼rkiye not found in regex
World not found in regex


In [30]:
oil_df_pivot['Technology Name'] = 'Oil Pipeline'
oil_df_pivot['Data Source'] = 'Roberts et al (in preparation)'
oil_df_pivot['Unit'] = 'km'
oil_df_pivot['Metric'] = 'Total Length'
oil_df_pivot['Spatial Scale'] = 'National'
oil_df_pivot['ID'] = oil_df_pivot['Technology Name'] + '_' + oil_df_pivot['Metric'] + '_' + oil_df_pivot['Country Code']
oil_df_pivot = oil_df_pivot.rename(columns = {'Country': 'Country Name'})
oil_df_pivot.set_index('ID', inplace=False, drop = False)

Year,Country Name,1904,1905,1906,1907,1908,1909,1910,1911,1912,...,2021,2022,2023,Country Code,Technology Name,Data Source,Unit,Metric,Spatial Scale,ID
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Oil Pipeline_Total Length_AF,Afghanistan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.00,AF,Oil Pipeline,Roberts et al (in preparation),km,Total Length,National,Oil Pipeline_Total Length_AF
Oil Pipeline_Total Length_AL,Albania,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.00,AL,Oil Pipeline,Roberts et al (in preparation),km,Total Length,National,Oil Pipeline_Total Length_AL
Oil Pipeline_Total Length_DZ,Algeria,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3879.00,3879.00,3879.00,DZ,Oil Pipeline,Roberts et al (in preparation),km,Total Length,National,Oil Pipeline_Total Length_DZ
Oil Pipeline_Total Length_AO,Angola,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,99.78,99.78,99.78,AO,Oil Pipeline,Roberts et al (in preparation),km,Total Length,National,Oil Pipeline_Total Length_AO
Oil Pipeline_Total Length_AR,Argentina,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,762.00,762.00,762.00,AR,Oil Pipeline,Roberts et al (in preparation),km,Total Length,National,Oil Pipeline_Total Length_AR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Oil Pipeline_Total Length_UZ,Uzbekistan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.00,UZ,Oil Pipeline,Roberts et al (in preparation),km,Total Length,National,Oil Pipeline_Total Length_UZ
Oil Pipeline_Total Length_VE,Venezuela,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.00,VE,Oil Pipeline,Roberts et al (in preparation),km,Total Length,National,Oil Pipeline_Total Length_VE
Oil Pipeline_Total Length_VN,Vietnam,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.00,VN,Oil Pipeline,Roberts et al (in preparation),km,Total Length,National,Oil Pipeline_Total Length_VN
Oil Pipeline_Total Length_YE,Yemen,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,797.00,797.00,797.00,YE,Oil Pipeline,Roberts et al (in preparation),km,Total Length,National,Oil Pipeline_Total Length_YE


In [31]:
co2_pipelines_df = co2_pipelines_df.drop(['USA and Canada'], axis = 1)

In [32]:
co2_pipelines_df_pivot = co2_pipelines_df.pivot_table(columns = 'Year',  fill_value = 0)
co2_pipelines_df_pivot.columns = co2_pipelines_df_pivot.columns.astype(int)
co2_pipelines_df_pivot = co2_pipelines_df_pivot.reset_index()
co2_pipelines_df_pivot = co2_pipelines_df_pivot.rename(columns = {'Year': 'Index', 'index': 'Country'})
co2_pipelines_df_pivot['Country Code'] = coco.convert(names=co2_pipelines_df_pivot['Country'], to='iso2')


In [33]:
co2_pipelines_df_pivot['Technology Name'] = 'CO2 Pipeline'
co2_pipelines_df_pivot['Data Source'] = 'Roberts et al (in preparation)'
co2_pipelines_df_pivot['Unit'] = 'km'
co2_pipelines_df_pivot['Metric'] = 'Total Length'
co2_pipelines_df_pivot['Spatial Scale'] = 'National'
co2_pipelines_df_pivot['ID'] = co2_pipelines_df_pivot['Technology Name'] + '_' + co2_pipelines_df_pivot['Metric'] + '_' + co2_pipelines_df_pivot['Country Code']
co2_pipelines_df_pivot = co2_pipelines_df_pivot.rename(columns = {'Country': 'Country Name'})
co2_pipelines_df_pivot.set_index('ID', inplace=False, drop = False)

Year,Country Name,1971,1972,1973,1974,1975,1976,1977,1978,1979,...,2021,2022,2023,Country Code,Technology Name,Data Source,Unit,Metric,Spatial Scale,ID
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CO2 Pipeline_Total Length_DZ,Algeria,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,300.0,300.0,300.0,DZ,CO2 Pipeline,Roberts et al (in preparation),km,Total Length,National,CO2 Pipeline_Total Length_DZ
CO2 Pipeline_Total Length_AU,Australia,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,70.0,70.0,70.0,AU,CO2 Pipeline,Roberts et al (in preparation),km,Total Length,National,CO2 Pipeline_Total Length_AU
CO2 Pipeline_Total Length_CA,Canada,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,390.0,390.0,390.0,CA,CO2 Pipeline,Roberts et al (in preparation),km,Total Length,National,CO2 Pipeline_Total Length_CA
CO2 Pipeline_Total Length_CN,China,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,117.0,117.0,187.0,CN,CO2 Pipeline,Roberts et al (in preparation),km,Total Length,National,CO2 Pipeline_Total Length_CN
CO2 Pipeline_Total Length_NO,Norway,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,NO,CO2 Pipeline,Roberts et al (in preparation),km,Total Length,National,CO2 Pipeline_Total Length_NO
CO2 Pipeline_Total Length_QA,Qatar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,100.0,100.0,100.0,QA,CO2 Pipeline,Roberts et al (in preparation),km,Total Length,National,CO2 Pipeline_Total Length_QA
CO2 Pipeline_Total Length_SA,Saudi Arabia,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,85.0,85.0,85.0,SA,CO2 Pipeline,Roberts et al (in preparation),km,Total Length,National,CO2 Pipeline_Total Length_SA
CO2 Pipeline_Total Length_TR,Turkey,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,90.0,90.0,90.0,TR,CO2 Pipeline,Roberts et al (in preparation),km,Total Length,National,CO2 Pipeline_Total Length_TR
CO2 Pipeline_Total Length_US,USA,0.0,273.53,273.53,273.53,273.53,930.002,930.002,930.002,930.002,...,7908.95905,7936.31205,7936.31205,US,CO2 Pipeline,Roberts et al (in preparation),km,Total Length,National,CO2 Pipeline_Total Length_US


Save data and files


In [34]:
output_file = 'gas_pipelines.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

gas_df_pivot.to_csv(output_file_path, index = False)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/gas_pipelines.csv


In [35]:
output_file = 'oil_pipelines.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

oil_df_pivot.to_csv(output_file_path, index = False)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/oil_pipelines.csv


In [36]:
output_file = 'co2_pipelines.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

co2_pipelines_df_pivot.to_csv(output_file_path, index = False)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/co2_pipelines.csv
