In [5]:
import os
import pandas as pd
from pandas import Series, DataFrame
import country_converter as coco
import warnings
warnings.filterwarnings("ignore")

Data source: Statista
Files: 
- Austria
- Czechia
- Germany
- Netherlands
- Norway
- Poland
- Switzerland

In [6]:
## Get current working directory
print("Current working directory:", os.getcwd())

## Find path to raw data (for the raw files)
raw_data_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'raw_data', 'ashp'))

## Find path to folder for saving cleaned csv
cleaned_data_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'cleaned_data'))

Current working directory: /Users/jennagreene/Documents/GitHub/HATCH_data/reading_files


Austria

In [7]:
file_path = os.path.join(raw_data_path, 'austria.xlsx')

austria = pd.read_excel(file_path,sheet_name = 'Data', header = 4, index_col=1)

In [8]:

# Sum heat pumps
austria['Total ASHPs'] = austria['Heat pumps for domestic hot water'] + austria['Heat pumps for space heating']

# only include ASHPs for hot water heating or space heating
austria = austria.drop(columns = {'Unnamed: 0', 'Heat pumps for ventilation', 'Industrial heat pumps', 'Heat pumps for domestic hot water', 'Heat pumps for space heating'})

# Transpose
austria = austria.T

# Multiply value by 1000

austria = austria.apply(lambda x: x * 1000 if x.dtype == 'float64' else x)

In [9]:

austria['Data Source'] = 'Statista'
austria['Unit'] = '-'
austria['Technology Name'] = 'Air-Source Heat Pumps'
austria['Spatial Scale'] = 'National'
austria['Metric'] = 'Annual Production'
austria['Country Name'] = 'Austria'
austria['Country Code'] = coco.convert(names=austria['Country Name'], to='iso2')
austria = austria.dropna(subset=['Country Code'])

austria['ID'] = austria['Technology Name'] + '_' + austria['Metric'] + '_' + austria['Country Code']
austria = austria.set_index('ID', drop = False)



In [10]:
output_file = 'austria_ashp.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

austria.to_csv(output_file_path, index = False)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/austria_ashp.csv


Czechia

In [11]:
file_path = os.path.join(raw_data_path, 'czechia.xlsx')

czechia = pd.read_excel(file_path,sheet_name = 'Data', header = 4, index_col=1)

In [12]:
# only include ASHPs for hot water heating or space heating
czechia = czechia.drop(columns = {'Unnamed: 0', 'Ground-water', 'Air-exhaust water', 'Water-water'})

# Transpose
czechia = czechia.T

# Multiply value by 1000

czechia = czechia.apply(lambda x: x * 1000 if x.dtype == 'float64' else x)

In [13]:
czechia['Data Source'] = 'Statista'
czechia['Unit'] = '-'
czechia['Technology Name'] = 'Air-Source Heat Pumps'
czechia['Spatial Scale'] = 'National'
czechia['Metric'] = 'Annual Production'
czechia['Country Name'] = 'Czechia'
czechia['Country Code'] = coco.convert(names=czechia['Country Name'], to='iso2')
czechia = czechia.dropna(subset=['Country Code'])

czechia['ID'] = czechia['Technology Name'] + '_' + czechia['Metric'] + '_' + czechia['Country Code']
czechia.set_index('ID', drop = False, inplace = True)



In [14]:
output_file = 'czechia_ashp.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

czechia.to_csv(output_file_path, index = False)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/czechia_ashp.csv


Germany

In [15]:
file_path = os.path.join(raw_data_path, 'germany.xlsx')

germany = pd.read_excel(file_path,sheet_name = 'Data', header = 4, index_col=1)

In [16]:
germany = germany.drop(columns = 'Unnamed: 0')
germany = germany.T
# Convert because the data is in 1000s
germany = germany.apply(lambda x: x * 1000 if x.dtype == 'float64' else x)

In [17]:

germany['Data Source'] = 'Statista'
germany['Unit'] = '-'
germany['Technology Name'] = 'Air-Source Heat Pumps'
germany['Spatial Scale'] = 'National'
germany['Metric'] = 'Annual Production'
germany['Country Name'] = 'Germany'
germany['Country Code'] = coco.convert(names=germany['Country Name'], to='iso2')
germany['ID'] = germany['Technology Name'] + '_' + germany['Metric'] + '_' + germany['Country Code']
germany = germany.set_index('ID', drop = False)

germany


Unnamed: 0_level_0,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,...,2022,2023,Data Source,Unit,Technology Name,Spatial Scale,Metric,Country Name,Country Code,ID
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Air-Source Heat Pumps_Annual Production_DE,15000.0,14000.0,15500.0,18500.0,25500.0,57500.0,57500.0,78000.0,67500.0,60000.0,...,281000.0,438500.0,Statista,-,Air-Source Heat Pumps,National,Annual Production,Germany,DE,Air-Source Heat Pumps_Annual Production_DE


In [18]:
output_file = 'germany_ashp.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

germany.to_csv(output_file_path, index = False)
print("Data saved to:", output_file_path)



Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/germany_ashp.csv


Netherlands

In [19]:
file_path = os.path.join(raw_data_path, 'netherlands.xlsx')
netherlands = pd.read_excel(file_path,sheet_name = 'Data', header = 4, index_col=1)

In [20]:
netherlands = netherlands.drop(columns = 'Unnamed: 0')
netherlands = netherlands.T
# Convert because the data is in 1000s
netherlands = netherlands.apply(lambda x: x * 1000 if x.dtype == 'float64' else x)

In [21]:
netherlands['Data Source'] = 'Statista'
netherlands['Unit'] = '-'
netherlands['Technology Name'] = 'Air-Source Heat Pumps'
netherlands['Spatial Scale'] = 'National'
netherlands['Metric'] = 'Annual Production'
netherlands['Country Name'] = 'Netherlands'
netherlands['Country Code'] = coco.convert(names=netherlands['Country Name'], to='iso2')
netherlands['ID'] = netherlands['Technology Name'] + '_' + netherlands['Metric'] + '_' + netherlands['Country Code']
netherlands = netherlands.set_index('ID', drop = False)

In [22]:
output_file = 'netherlands_ashp.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

netherlands.to_csv(output_file_path, index = False)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/netherlands_ashp.csv


Norway

In [23]:
file_path = os.path.join(raw_data_path, 'norway.xlsx')
norway = pd.read_excel(file_path,sheet_name = 'Data', header = 4, index_col=1)

In [24]:
norway = norway.drop(columns = 'Unnamed: 0')
norway = norway.T
# Convert because the data is in 1000s
norway = norway.apply(lambda x: x * 1000 if x.dtype == 'float64' else x)

In [25]:
norway['Data Source'] = 'Statista'
norway['Unit'] = '-'
norway['Technology Name'] = 'Air-Source Heat Pumps'
norway['Spatial Scale'] = 'National'
norway['Metric'] = 'Annual Production'
norway['Country Name'] = 'Norway'
norway['Country Code'] = coco.convert(names=norway['Country Name'], to='iso2')
norway['ID'] = norway['Technology Name'] + '_' + norway['Metric'] + '_' + norway['Country Code']
norway = norway.set_index('ID', drop = False)

In [26]:
output_file = 'norway_ashp.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

norway.to_csv(output_file_path, index = False)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/norway_ashp.csv


Poland

In [27]:
file_path = os.path.join(raw_data_path, 'poland.xlsx')
poland = pd.read_excel(file_path,sheet_name = 'Data', header = 4, index_col=1)

In [28]:
poland['Total ASHPs'] = poland['air/water (heating)'] + poland['air/water (hot water)']
poland = poland.drop(columns = {'Unnamed: 0', 'brine/water (heating)', 'air/water (heating)', 'air/water (hot water)'})
poland = poland.T

In [29]:
poland['Data Source'] = 'Statista'
poland ['Unit'] = '-'
poland['Technology Name'] = 'Air-Source Heat Pumps'
poland['Spatial Scale'] = 'National'
poland['Metric'] = 'Annual Production'
poland['Country Name'] = 'Poland'
poland['Country Code'] = coco.convert(names=poland['Country Name'], to='iso2')
poland['ID'] = poland['Technology Name'] + '_' + poland['Metric'] + '_' + poland['Country Code']
poland = poland.set_index('ID', drop = False)

In [30]:
output_file = 'poland_ashp.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

poland.to_csv(output_file_path, index = False)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/poland_ashp.csv


Switzerland

In [31]:
file_path = os.path.join(raw_data_path, 'switzerland.xlsx')
switzerland = pd.read_excel(file_path,sheet_name = 'Data', header = 4, index_col=1)

In [32]:
switzerland['Total ASHPs'] = switzerland['For space heating'] + switzerland['For water heating']
switzerland = switzerland.drop(columns = {'Unnamed: 0', 'For space heating', 'For water heating'})
switzerland = switzerland.T
switzerland = switzerland.apply(lambda x: x * 1000 if x.dtype == 'float64' else x)


In [33]:
switzerland['Data Source'] = 'Statista'
switzerland ['Unit'] = '-'
switzerland['Technology Name'] = 'Air-Source Heat Pumps'
switzerland['Spatial Scale'] = 'National'
switzerland['Metric'] = 'Annual Production'
switzerland['Country Name'] = 'Switzerland'
switzerland['Country Code'] = coco.convert(names=switzerland['Country Name'], to='iso2')
switzerland['ID'] = switzerland['Technology Name'] + '_' + switzerland['Metric'] + '_' + switzerland['Country Code']
switzerland = switzerland.set_index('ID', drop = False)

In [34]:
output_file = 'switzerland_ashp.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

switzerland.to_csv(output_file_path, index = False)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/switzerland_ashp.csv
