In [1]:
import os
import pandas as pd
from pandas import Series, DataFrame
import country_converter as coco
import warnings
warnings.filterwarnings("ignore")

Data sources:

Brazil: Source: Energy Research Company. (2022). Historical Series and Matrices, Chapter 2, Table 2.30. Retrieved from: https://www.epe.gov.br/pt/publicacoes-dados-abertos/publicacoes/BEN-Series-Historicas-Completas					


USA: Source: USDA Economic Research Service. (2022). U. S. Energy Statistics Table 2. Retrieved from https://www.ers.usda.gov/data-products/u-s-bioenergy-statistics/u-s-bioenergy-statistics/.				
                

In [2]:
## Get current working directory
print("Current working directory:", os.getcwd())

## Find path to raw data (for the raw files)
raw_data_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'raw_data'))

## Find path to folder for saving cleaned csv
cleaned_data_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'cleaned_data'))

Current working directory: /Users/jennagreene/Documents/GitHub/HATCH_data/reading_files


Brazil Ethanol

In [438]:
ethanol_brazil_filepath = os.path.join(raw_data_path, 'Ethanol_TimeSeries.xlsx')
ethanol_brazil = pd.read_excel(ethanol_brazil_filepath, sheet_name= 'Brazil Ethanol', header = 1, skipfooter=1)

In [439]:
columns_to_keep = ['Year', 'Production (liters)']
ethanol_brazil = ethanol_brazil[columns_to_keep]
ethanol_brazil = ethanol_brazil.set_index('Year').T
ethanol_brazil.reset_index(drop=True, inplace = True)
ethanol_brazil.columns = [col if not isinstance(col, tuple) else col[0] for col in ethanol_brazil.columns]

In [440]:
ethanol_brazil['Data Source'] = 'Energy Research Company Brazil'
ethanol_brazil['Technology Name'] = 'Ethanol'
ethanol_brazil['Country Name'] = 'Brazil'
ethanol_brazil['Spatial Scale'] = 'National'
ethanol_brazil['Metric'] = 'Annual Production'
ethanol_brazil['Unit'] = 'Liters'
ethanol_brazil['Country Code'] = coco.convert(names=ethanol_brazil['Country Name'], to='iso2')


In [441]:
ethanol_brazil['ID'] = ethanol_brazil['Technology Name'] + '_' + ethanol_brazil['Metric'] + '_' + ethanol_brazil['Country Code']
ethanol_brazil.set_index('ID', drop=True, inplace=False)
ethanol_brazil

Unnamed: 0,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,...,2020,2021,Data Source,Technology Name,Country Name,Spatial Scale,Metric,Unit,Country Code,ID
0,625000000.0,624000000.0,684000000.0,652000000.0,615000000.0,580000000.0,642000000.0,1388000000.0,2248000000.0,2854000000.0,...,32598860000.0,29897910000.0,Energy Research Company Brazil,Ethanol,Brazil,National,Annual Production,Liters,BR,Ethanol_Annual Production_BR


In [442]:
output_file = 'brazil_ethanol.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

ethanol_brazil.to_csv(output_file_path, index = False)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/brazil_ethanol.csv


USA Ethanol

In [454]:
ethanol_usa_filepath = os.path.join(raw_data_path, 'Ethanol_TimeSeries.xlsx')
ethanol_usa = pd.read_excel(ethanol_brazil_filepath, sheet_name= 'US Ethanol', skipfooter=1)

In [456]:
columns_to_keep = ['Year', 'Production (Liters)']
ethanol_usa = ethanol_usa[columns_to_keep]
ethanol_usa = ethanol_usa.set_index('Year').T
ethanol_usa.reset_index(drop=True, inplace = True)
ethanol_usa.columns = [col if not isinstance(col, tuple) else col[0] for col in ethanol_usa.columns]

In [457]:
ethanol_usa['Data Source'] = 'USDA Economic Research Service'
ethanol_usa['Technology Name'] = 'Ethanol'
ethanol_usa['Country Name'] = 'United States'
ethanol_usa['Spatial Scale'] = 'National'
ethanol_usa['Metric'] = 'Annual Production'
ethanol_usa['Unit'] = 'Liters'
ethanol_usa['Country Code'] = coco.convert(names=ethanol_usa['Country Name'], to='iso2')


In [458]:
ethanol_usa['ID'] = ethanol_usa['Technology Name'] + '_' + ethanol_usa['Metric'] + '_' + ethanol_usa['Country Code']
ethanol_usa.set_index('ID', drop=True, inplace=False)


Unnamed: 0,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,...,2020,2021,Data Source,Technology Name,Country Name,Spatial Scale,Metric,Unit,Country Code,ID
0,314794695.6,853443400.0,1572543000.0,1931695000.0,2336317000.0,2695310000.0,3100251000.0,3144131000.0,3189761000.0,2830290000.0,...,52772470000.0,56838250000.0,USDA Economic Research Service,Ethanol,United States,National,Annual Production,Liters,US,Ethanol_Annual Production_US


In [459]:
output_file = 'usa_ethanol.csv'
output_file_path = os.path.join(cleaned_data_path, output_file)

ethanol_usa.to_csv(output_file_path, index = False)
print("Data saved to:", output_file_path)

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/usa_ethanol.csv
