In [79]:
import os
import pandas as pd
from pandas import Series, DataFrame
import country_converter as coco
import warnings
warnings.filterwarnings("ignore")

Data Sources:
- Cameron Roberts data collection and compiling

In [80]:
## Get current working directory
print("Current working directory:", os.getcwd())

## Find path to raw data (for the raw files)
raw_data_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'raw_data'))

## Find path to folder for saving cleaned csv
cleaned_data_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'cleaned_data'))

Current working directory: /Users/jennagreene/Documents/GitHub/HATCH_data/reading_files


In [81]:
# Find and read the CSV file from the raw_data folder
target_file = 'Integrated_Pipeline_Data.xlsx'
target_file_path = os.path.join(raw_data_path, target_file)
print(target_file_path)

/Users/jennagreene/Documents/GitHub/HATCH_data/raw_data/Integrated_Pipeline_Data.xlsx


In [82]:
pipelines_df = pd.read_excel(target_file_path, sheet_name= 'Pipeline.Data')
pipelines_df

Unnamed: 0,Fuel,Year,Country,Cumulative.or.Annual,Measure,Value
0,Gas,1904.0,Afghanistan,Annual,Capacity,0.0
1,Gas,1905.0,Afghanistan,Annual,Capacity,0.0
2,Gas,1906.0,Afghanistan,Annual,Capacity,0.0
3,Gas,1907.0,Afghanistan,Annual,Capacity,0.0
4,Gas,1908.0,Afghanistan,Annual,Capacity,0.0
...,...,...,...,...,...,...
218377,Oil,2019.0,Zambia,Cumulative,Length,0.0
218378,Oil,2020.0,Zambia,Cumulative,Length,0.0
218379,Oil,2021.0,Zambia,Cumulative,Length,0.0
218380,Oil,2022.0,Zambia,Cumulative,Length,0.0


Filter to only include length of pipeline 

In [83]:

# Filtering
pipelines_df = pipelines_df[(pipelines_df['Measure'] == 'Length')]
pipelines_df

Unnamed: 0,Fuel,Year,Country,Cumulative.or.Annual,Measure,Value
240,Gas,1904.0,Afghanistan,Annual,Length,0.0
241,Gas,1905.0,Afghanistan,Annual,Length,0.0
242,Gas,1906.0,Afghanistan,Annual,Length,0.0
243,Gas,1907.0,Afghanistan,Annual,Length,0.0
244,Gas,1908.0,Afghanistan,Annual,Length,0.0
...,...,...,...,...,...,...
218377,Oil,2019.0,Zambia,Cumulative,Length,0.0
218378,Oil,2020.0,Zambia,Cumulative,Length,0.0
218379,Oil,2021.0,Zambia,Cumulative,Length,0.0
218380,Oil,2022.0,Zambia,Cumulative,Length,0.0


In [84]:
# Create separate dataframes for different types of fuel

gas_df = pipelines_df[(pipelines_df['Fuel'] == 'Gas')]


In [85]:
# Create separate dataframes for different types of fuel

oil_df = pipelines_df[(pipelines_df['Fuel'] == 'Oil')]


In [86]:
co2_pipelines_df = pd.read_excel(target_file_path, sheet_name= 'CO2 Pipelines Length Tally')


In [87]:
gas_df_pivot = gas_df.pivot_table(index = 'Country', columns = 'Year', values = 'Value', fill_value = 0)
gas_df_pivot.columns = gas_df_pivot.columns.astype(int)
gas_df_pivot = gas_df_pivot.reset_index()

gas_df_pivot['Country Code'] = coco.convert(names=gas_df_pivot['Country'], to='iso2')
gas_df_pivot = gas_df_pivot[gas_df_pivot['Country Code'] != 'not found']



Joint Petroleum Development Area not found in regex
TÃ¼rkiye not found in regex
World not found in regex


In [88]:
gas_df_pivot['Technology Name'] = 'Natural Gas Pipelines'
gas_df_pivot['Data Source'] = 'Roberts et al (in preparation)'
gas_df_pivot['Unit'] = 'km'
gas_df_pivot['Metric'] = 'Cumulative Length'
gas_df_pivot['Spatial Scale'] = 'National'
gas_df_pivot['ID'] = gas_df_pivot['Technology Name'] + '_' + gas_df_pivot['Metric'] + '_' + gas_df_pivot['Country Code']
gas_df_pivot = gas_df_pivot.rename(columns = {'Country': 'Country Name'})
gas_df_pivot.set_index('ID', inplace=False, drop = False)

Year,Country Name,1904,1905,1906,1907,1908,1909,1910,1911,1912,...,2021,2022,2023,Country Code,Technology Name,Data Source,Unit,Metric,Spatial Scale,ID
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Natural Gas Pipelines_Cumulative Length_AF,Afghanistan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000,0.000,0.000,AF,Natural Gas Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,Natural Gas Pipelines_Cumulative Length_AF
Natural Gas Pipelines_Cumulative Length_AL,Albania,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000,0.000,0.000,AL,Natural Gas Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,Natural Gas Pipelines_Cumulative Length_AL
Natural Gas Pipelines_Cumulative Length_DZ,Algeria,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4816.500,4827.000,4772.500,DZ,Natural Gas Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,Natural Gas Pipelines_Cumulative Length_DZ
Natural Gas Pipelines_Cumulative Length_AO,Angola,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,250.000,250.000,250.000,AO,Natural Gas Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,Natural Gas Pipelines_Cumulative Length_AO
Natural Gas Pipelines_Cumulative Length_AR,Argentina,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,7121.700,7121.700,7121.700,AR,Natural Gas Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,Natural Gas Pipelines_Cumulative Length_AR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Natural Gas Pipelines_Cumulative Length_UZ,Uzbekistan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,413.500,438.500,566.000,UZ,Natural Gas Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,Natural Gas Pipelines_Cumulative Length_UZ
Natural Gas Pipelines_Cumulative Length_VE,Venezuela,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1360.500,1360.500,1360.500,VE,Natural Gas Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,Natural Gas Pipelines_Cumulative Length_VE
Natural Gas Pipelines_Cumulative Length_VN,Vietnam,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,441.215,595.215,518.215,VN,Natural Gas Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,Natural Gas Pipelines_Cumulative Length_VN
Natural Gas Pipelines_Cumulative Length_YE,Yemen,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000,0.000,0.000,YE,Natural Gas Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,Natural Gas Pipelines_Cumulative Length_YE


In [89]:
oil_df_pivot = oil_df.pivot_table(index = 'Country', columns = 'Year', values = 'Value', fill_value = 0)

oil_df_pivot.columns = oil_df_pivot.columns.astype(int)
oil_df_pivot = oil_df_pivot.reset_index()

oil_df_pivot['Country Code'] = coco.convert(names=oil_df_pivot['Country'], to='iso2')
oil_df_pivot = oil_df_pivot[oil_df_pivot['Country Code'] != 'not found']



Joint Petroleum Development Area not found in regex
TÃ¼rkiye not found in regex
World not found in regex


In [90]:
oil_df_pivot['Technology Name'] = 'Oil Pipelines'
oil_df_pivot['Data Source'] = 'Roberts et al (in preparation)'
oil_df_pivot['Unit'] = 'km'
oil_df_pivot['Metric'] = 'Cumulative Length'
oil_df_pivot['Spatial Scale'] = 'National'
oil_df_pivot['ID'] = oil_df_pivot['Technology Name'] + '_' + oil_df_pivot['Metric'] + '_' + oil_df_pivot['Country Code']
oil_df_pivot = oil_df_pivot.rename(columns = {'Country': 'Country Name'})
oil_df_pivot.set_index('ID', inplace=False, drop = False)

Year,Country Name,1904,1905,1906,1907,1908,1909,1910,1911,1912,...,2021,2022,2023,Country Code,Technology Name,Data Source,Unit,Metric,Spatial Scale,ID
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Oil Pipelines_Cumulative Length_AF,Afghanistan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.00,AF,Oil Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,Oil Pipelines_Cumulative Length_AF
Oil Pipelines_Cumulative Length_AL,Albania,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.00,AL,Oil Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,Oil Pipelines_Cumulative Length_AL
Oil Pipelines_Cumulative Length_DZ,Algeria,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1939.50,1939.50,1939.50,DZ,Oil Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,Oil Pipelines_Cumulative Length_DZ
Oil Pipelines_Cumulative Length_AO,Angola,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,49.89,49.89,49.89,AO,Oil Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,Oil Pipelines_Cumulative Length_AO
Oil Pipelines_Cumulative Length_AR,Argentina,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,381.00,381.00,381.00,AR,Oil Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,Oil Pipelines_Cumulative Length_AR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Oil Pipelines_Cumulative Length_UZ,Uzbekistan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.00,UZ,Oil Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,Oil Pipelines_Cumulative Length_UZ
Oil Pipelines_Cumulative Length_VE,Venezuela,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.00,VE,Oil Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,Oil Pipelines_Cumulative Length_VE
Oil Pipelines_Cumulative Length_VN,Vietnam,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.00,VN,Oil Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,Oil Pipelines_Cumulative Length_VN
Oil Pipelines_Cumulative Length_YE,Yemen,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,398.50,398.50,398.50,YE,Oil Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,Oil Pipelines_Cumulative Length_YE


In [91]:
co2_pipelines_df = co2_pipelines_df.drop(['USA and Canada'], axis = 1)

In [92]:
co2_pipelines_df_pivot = co2_pipelines_df.pivot_table(columns = 'Year',  fill_value = 0)
co2_pipelines_df_pivot.columns = co2_pipelines_df_pivot.columns.astype(int)
co2_pipelines_df_pivot = co2_pipelines_df_pivot.reset_index()
co2_pipelines_df_pivot = co2_pipelines_df_pivot.rename(columns = {'Year': 'Index', 'index': 'Country'})
co2_pipelines_df_pivot['Country Code'] = coco.convert(names=co2_pipelines_df_pivot['Country'], to='iso2')


In [93]:
co2_pipelines_df_pivot['Technology Name'] = 'CO2 Pipelines'
co2_pipelines_df_pivot['Data Source'] = 'Roberts et al (in preparation)'
co2_pipelines_df_pivot['Unit'] = 'km'
co2_pipelines_df_pivot['Metric'] = 'Cumulative Length'
co2_pipelines_df_pivot['Spatial Scale'] = 'National'
co2_pipelines_df_pivot['ID'] = co2_pipelines_df_pivot['Technology Name'] + '_' + co2_pipelines_df_pivot['Metric'] + '_' + co2_pipelines_df_pivot['Country Code']
co2_pipelines_df_pivot = co2_pipelines_df_pivot.rename(columns = {'Country': 'Country Name'})
co2_pipelines_df_pivot.set_index('ID', inplace=False, drop = False)

Year,Country Name,1971,1972,1973,1974,1975,1976,1977,1978,1979,...,2021,2022,2023,Country Code,Technology Name,Data Source,Unit,Metric,Spatial Scale,ID
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CO2 Pipelines_Cumulative Length_DZ,Algeria,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,300.0,300.0,300.0,DZ,CO2 Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,CO2 Pipelines_Cumulative Length_DZ
CO2 Pipelines_Cumulative Length_AU,Australia,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,70.0,70.0,70.0,AU,CO2 Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,CO2 Pipelines_Cumulative Length_AU
CO2 Pipelines_Cumulative Length_CA,Canada,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,390.0,390.0,390.0,CA,CO2 Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,CO2 Pipelines_Cumulative Length_CA
CO2 Pipelines_Cumulative Length_CN,China,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,117.0,117.0,187.0,CN,CO2 Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,CO2 Pipelines_Cumulative Length_CN
CO2 Pipelines_Cumulative Length_NO,Norway,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,NO,CO2 Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,CO2 Pipelines_Cumulative Length_NO
CO2 Pipelines_Cumulative Length_QA,Qatar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,100.0,100.0,100.0,QA,CO2 Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,CO2 Pipelines_Cumulative Length_QA
CO2 Pipelines_Cumulative Length_SA,Saudi Arabia,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,85.0,85.0,85.0,SA,CO2 Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,CO2 Pipelines_Cumulative Length_SA
CO2 Pipelines_Cumulative Length_TR,Turkey,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,90.0,90.0,90.0,TR,CO2 Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,CO2 Pipelines_Cumulative Length_TR
CO2 Pipelines_Cumulative Length_US,USA,0.0,273.53,273.53,273.53,273.53,930.002,930.002,930.002,930.002,...,7908.95905,7936.31205,7936.31205,US,CO2 Pipelines,Roberts et al (in preparation),km,Cumulative Length,National,CO2 Pipelines_Cumulative Length_US


Save data and files


In [94]:
output_file = 'gas_pipelines.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

gas_df_pivot.to_csv(output_file_path, index = False)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/gas_pipelines.csv


In [95]:
output_file = 'oil_pipelines.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

oil_df_pivot.to_csv(output_file_path, index = False)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/oil_pipelines.csv


In [96]:
output_file = 'co2_pipelines.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

co2_pipelines_df_pivot.to_csv(output_file_path, index = False)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/co2_pipelines.csv
