In [1]:
import os
import pandas as pd
from pandas import Series, DataFrame
import country_converter as coco
import warnings
warnings.filterwarnings("ignore")

Data sources

- https://data.nber.org/data-appendix/w15319/
- https://data.nber.org/data-appendix/w15319/FinalCHAT_72909.csv
- https://www.nber.org/system/files/working_papers/w15319/w15319.pdf


In [2]:
## Get current working directory
print("Current working directory:", os.getcwd())

## Find path to raw data (for the raw files)
raw_data_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'raw_data'))

## Find path to folder for saving cleaned csv
cleaned_data_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'cleaned_data'))

## Find path to folder for inflation
inflation_data_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'inflation'))

Current working directory: /Users/jennagreene/Documents/GitHub/HATCH_data/reading_files


In [3]:
# Find and read the CSV file from the raw_data folder
target_file = 'FinalCHAT_72909.csv'
target_file_path = os.path.join(raw_data_path, target_file)

target_dict = 'chat_dictionary.xlsx'
target_dict_path = os.path.join(raw_data_path, target_dict)

In [4]:
chat = pd.read_csv(target_file_path)


In [5]:
chat_dict = pd.read_excel(target_dict_path, skipfooter=2)
chat_dict

Unnamed: 0,VARIABLE NAME,DEFINITION,SOURCE
0,ag_harvester,Number of self‐propelled machines that reap an...,FAOSTAT (2004)
1,ag_milkingmachine,Number of installations consisting of several ...,FAOSTAT (2004)
2,ag_tractor,Number of wheel and crawler tractors (excludin...,FAOSTAT (2004)
3,atm,Number of electromechanical devices that permi...,BIS (various years) and ECB (various years)
4,aviationpkm,Civil aviation passenger‐KM traveled on schedu...,Mitchell (1998)
...,...,...,...
106,pctivprimeenroll,Primary school enrollment rate,Mitchell and Banks
107,pctivsecenroll,Secondary school enrollment rate,Mitchell and Banks
108,pctivprivateinv,Private investment as a share of GDP,Penn World Table (2007)
109,pctivpublicinv,Public investment as a share of GDP,Penn World Table (2007)


Options to include

In [6]:
var_names = list(chat.columns[2:])
var_names

['ag_harvester',
 'ag_milkingmachine',
 'ag_tractor',
 'atm',
 'aviationpkm',
 'aviationtkm',
 'bed_acute',
 'bed_hosp',
 'bed_longterm',
 'cabletv',
 'cellphone',
 'cheque',
 'computer',
 'creditdebit',
 'eft',
 'elecprod',
 'fert_total',
 'internetuser',
 'irrigatedarea',
 'kidney_dialpat',
 'kidney_homedialpat',
 'loom_auto',
 'loom_total',
 'mail',
 'med_catscanner',
 'med_lithotriptor',
 'med_mammograph',
 'med_mriunit',
 'med_radiationequip',
 'newspaper',
 'pctdaysurg_cataract',
 'pctdaysurg_cholecyst',
 'pctdaysurg_hernia',
 'pctdaysurg_lapcholecyst',
 'pctdaysurg_tonsil',
 'pctdaysurg_varicosevein',
 'pcthomedialysis',
 'pctimmunizdpt',
 'pctimmunizmeas',
 'pctirrigated',
 'pctmvbyarea',
 'pest_total',
 'pos',
 'radio',
 'railline',
 'railp',
 'railpkm',
 'railt',
 'railtkm',
 'ship_all',
 'ship_motor',
 'ship_sail',
 'ship_steam',
 'ship_steammotor',
 'shipton_all',
 'shipton_motor',
 'shipton_sail',
 'shipton_steam',
 'shipton_steammotor',
 'spindle_mule',
 'spindle_ring',
 

Read CHAT Data based on specific technology name selected

In [7]:
def read_chat(col_num, tech_name, unit='-', metric='Cumulative total capacity'):
    # Read data from CSV file, specifying column types and handling missing values
    chat = pd.read_csv(target_file_path, usecols=[0,1,col_num], dtype={col_num:float}, na_values=' ')
    
    # Replace misspelled country names
    chat.replace('Venezuala', 'Venezuela', inplace=True)
    
    # Remove rows corresponding to 'Indochina' from the DataFrame
    chat = chat[chat['country_name']!='Indochina']
    
    # Extract unique country names from the DataFrame
    country_list = list(set(chat['country_name']))
    
    # Initialize an empty list to store DataFrames for each country
    chat_df = []
    
    # Iterate through each country in the country list
    for country in country_list:
        # Select rows corresponding to the current country
        df = chat[chat['country_name']==country]
        
        # Set the year column as the index
        df.set_index('year', drop=True, inplace=True)
        
        # Transpose the DataFrame
        df = df.transpose()
        
        # Add columns for country name and country code
        df['Country Name'] = country
        if country == 'North Vietnam':
            country_code = 'VD'
        elif country == 'South Vietnam':
            country_code = 'VNM'
        elif country == 'Czechoslovakia':
            country_code = 'CSK'
        else:
            country_code = coco.convert(names=country, to='iso2')
        df['Country Code'] = country_code
        
        # Drop the 'country_name' index
        df.drop(index='country_name', inplace=True)
        
        # Append the modified DataFrame to the chat_df list
        chat_df.append(df)
    
    # Concatenate all DataFrames in chat_df into a single DataFrame
    chat_all = pd.concat(chat_df)
    
    # Add metadata columns
    chat_all['Data Source'] = 'CHAT'
    chat_all['Spatial Scale'] = 'National'
    chat_all['Unit'] = unit
    chat_all['Metric'] = metric
    chat_all['Technology Name'] = tech_name
    
    # Combine columns to create unique IDs
    chat_all['ID'] = chat_all['Technology Name'] + '_' + chat_all['Metric'] + '_' + chat_all['Country Code']
    
    # Set the ID column as the index
    chat_all.set_index('ID', drop=True, inplace=True)
    
    # Remove non-integer columns
    omit = []
    for col in chat_all.columns:
        if type(col) != int:
            omit.append(col)
    empty_rows = chat_all.drop(columns=omit)
    
    # Drop rows with all NaN values
    empty_rows.dropna(how='all', inplace=True)
    
    # Remove rows from the original DataFrame where the index label is not present in empty_rows
    na_idx = []
    for country in chat_all.index:
        if country not in empty_rows.index:
            na_idx.append(country)
    chat_all.drop(na_idx, inplace=True)
    
    # Define the path and file name for saving the processed DataFrame
    output_file = 'chat_' + tech_name + '.csv'
    output_file_path = os.path.join(cleaned_data_path, output_file)
    
    # Save the processed DataFrame to a CSV file
    chat_all.to_csv(output_file_path)
    print("Data saved to:", output_file_path)
    
    # Return the processed DataFrame
    return chat_all

Ag Harvesters

In [8]:
ag_harvester = read_chat(2, 'Crop Harvester', metric='Total Number')
ag_harvester

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/chat_Crop Harvester.csv


year,1750,1751,1752,1753,1754,1755,1756,1757,1758,1759,...,2006,2007,2008,Country Name,Country Code,Data Source,Spatial Scale,Unit,Metric,Technology Name
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Crop Harvester_Total Number_KG,,,,,,,,,,,...,,,,Kyrgyzstan,KG,CHAT,National,-,Total Number,Crop Harvester
Crop Harvester_Total Number_AE,,,,,,,,,,,...,,,,United Arab Emirates,AE,CHAT,National,-,Total Number,Crop Harvester
Crop Harvester_Total Number_GM,,,,,,,,,,,...,,,,Gambia,GM,CHAT,National,-,Total Number,Crop Harvester
Crop Harvester_Total Number_ET,,,,,,,,,,,...,,,,Ethiopia,ET,CHAT,National,-,Total Number,Crop Harvester
Crop Harvester_Total Number_PH,,,,,,,,,,,...,,,,Philippines,PH,CHAT,National,-,Total Number,Crop Harvester
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Crop Harvester_Total Number_SN,,,,,,,,,,,...,,,,Senegal,SN,CHAT,National,-,Total Number,Crop Harvester
Crop Harvester_Total Number_LY,,,,,,,,,,,...,,,,Libya,LY,CHAT,National,-,Total Number,Crop Harvester
Crop Harvester_Total Number_NO,,,,,,,,,,,...,,,,Norway,NO,CHAT,National,-,Total Number,Crop Harvester
Crop Harvester_Total Number_PL,,,,,,,,,,,...,,,,Poland,PL,CHAT,National,-,Total Number,Crop Harvester


Steamships

In [9]:
steam_ships = read_chat(54, 'Steamships', metric='Total Number')
steam_ships

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/chat_Steamships.csv


year,1750,1751,1752,1753,1754,1755,1756,1757,1758,1759,...,2006,2007,2008,Country Name,Country Code,Data Source,Spatial Scale,Unit,Metric,Technology Name
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Steamships_Total Number_IE,,,,,,,,,,,...,,,,Ireland,IE,CHAT,National,-,Total Number,Steamships
Steamships_Total Number_DK,,,,,,,,,,,...,,,,Denmark,DK,CHAT,National,-,Total Number,Steamships
Steamships_Total Number_GR,,,,,,,,,,,...,,,,Greece,GR,CHAT,National,-,Total Number,Steamships
Steamships_Total Number_CL,,,,,,,,,,,...,,,,Chile,CL,CHAT,National,-,Total Number,Steamships
Steamships_Total Number_AT,,,,,,,,,,,...,,,,Austria,AT,CHAT,National,-,Total Number,Steamships
Steamships_Total Number_ES,,,,,,,,,,,...,,,,Spain,ES,CHAT,National,-,Total Number,Steamships
Steamships_Total Number_DE,,,,,,,,,,,...,,,,Germany,DE,CHAT,National,-,Total Number,Steamships
Steamships_Total Number_CU,,,,,,,,,,,...,,,,Cuba,CU,CHAT,National,-,Total Number,Steamships
Steamships_Total Number_NL,,,,,,,,,,,...,,,,Netherlands,NL,CHAT,National,-,Total Number,Steamships
Steamships_Total Number_BE,,,,,,,,,,,...,,,,Belgium,BE,CHAT,National,-,Total Number,Steamships


Telephones

In [10]:
telephones = read_chat(91, 'Telephones', metric='Total Number')
telephones

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/chat_Telephones.csv


year,1750,1751,1752,1753,1754,1755,1756,1757,1758,1759,...,2006,2007,2008,Country Name,Country Code,Data Source,Spatial Scale,Unit,Metric,Technology Name
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Telephones_Total Number_KG,,,,,,,,,,,...,,,,Kyrgyzstan,KG,CHAT,National,-,Total Number,Telephones
Telephones_Total Number_AE,,,,,,,,,,,...,,,,United Arab Emirates,AE,CHAT,National,-,Total Number,Telephones
Telephones_Total Number_CD,,,,,,,,,,,...,,,,Democratic Republic of the Congo,CD,CHAT,National,-,Total Number,Telephones
Telephones_Total Number_GM,,,,,,,,,,,...,,,,Gambia,GM,CHAT,National,-,Total Number,Telephones
Telephones_Total Number_MY,,,,,,,,,,,...,,,,Malaysia,MY,CHAT,National,-,Total Number,Telephones
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Telephones_Total Number_LY,,,,,,,,,,,...,,,,Libya,LY,CHAT,National,-,Total Number,Telephones
Telephones_Total Number_NO,,,,,,,,,,,...,,,,Norway,NO,CHAT,National,-,Total Number,Telephones
Telephones_Total Number_NP,,,,,,,,,,,...,,,,Nepal,NP,CHAT,National,-,Total Number,Telephones
Telephones_Total Number_PL,,,,,,,,,,,...,,,,Poland,PL,CHAT,National,-,Total Number,Telephones


Railroads

In [11]:
rail = read_chat(46, 'Railroad', unit='kilometers',metric='Total Length')
rail

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/chat_Railroad.csv


year,1750,1751,1752,1753,1754,1755,1756,1757,1758,1759,...,2006,2007,2008,Country Name,Country Code,Data Source,Spatial Scale,Unit,Metric,Technology Name
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Railroad_Total Length_KG,,,,,,,,,,,...,,,,Kyrgyzstan,KG,CHAT,National,kilometers,Total Length,Railroad
Railroad_Total Length_CD,,,,,,,,,,,...,,,,Democratic Republic of the Congo,CD,CHAT,National,kilometers,Total Length,Railroad
Railroad_Total Length_MY,,,,,,,,,,,...,,,,Malaysia,MY,CHAT,National,kilometers,Total Length,Railroad
Railroad_Total Length_ET,,,,,,,,,,,...,,,,Ethiopia,ET,CHAT,National,kilometers,Total Length,Railroad
Railroad_Total Length_PH,,,,,,,,,,,...,,,,Philippines,PH,CHAT,National,kilometers,Total Length,Railroad
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Railroad_Total Length_SN,,,,,,,,,,,...,,,,Senegal,SN,CHAT,National,kilometers,Total Length,Railroad
Railroad_Total Length_LY,,,,,,,,,,,...,,,,Libya,LY,CHAT,National,kilometers,Total Length,Railroad
Railroad_Total Length_NO,,,,,,,,,,,...,,,,Norway,NO,CHAT,National,kilometers,Total Length,Railroad
Railroad_Total Length_PL,,,,,,,,,,,...,,,,Poland,PL,CHAT,National,kilometers,Total Length,Railroad


TVs

In [12]:
tv = read_chat(97, 'Television', metric='Total Number')
tv

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/chat_Television.csv


year,1750,1751,1752,1753,1754,1755,1756,1757,1758,1759,...,2006,2007,2008,Country Name,Country Code,Data Source,Spatial Scale,Unit,Metric,Technology Name
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Television_Total Number_KG,,,,,,,,,,,...,,,,Kyrgyzstan,KG,CHAT,National,-,Total Number,Television
Television_Total Number_AE,,,,,,,,,,,...,,,,United Arab Emirates,AE,CHAT,National,-,Total Number,Television
Television_Total Number_CD,,,,,,,,,,,...,,,,Democratic Republic of the Congo,CD,CHAT,National,-,Total Number,Television
Television_Total Number_GM,,,,,,,,,,,...,,,,Gambia,GM,CHAT,National,-,Total Number,Television
Television_Total Number_MY,,,,,,,,,,,...,,,,Malaysia,MY,CHAT,National,-,Total Number,Television
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Television_Total Number_LY,,,,,,,,,,,...,,,,Libya,LY,CHAT,National,-,Total Number,Television
Television_Total Number_NO,,,,,,,,,,,...,,,,Norway,NO,CHAT,National,-,Total Number,Television
Television_Total Number_NP,,,,,,,,,,,...,,,,Nepal,NP,CHAT,National,-,Total Number,Television
Television_Total Number_PL,,,,,,,,,,,...,,,,Poland,PL,CHAT,National,-,Total Number,Television


Radios

In [13]:
# Radio	CHAT database (radio)
radio = read_chat(45, 'Radio', metric='Total Number')
radio

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/chat_Radio.csv


year,1750,1751,1752,1753,1754,1755,1756,1757,1758,1759,...,2006,2007,2008,Country Name,Country Code,Data Source,Spatial Scale,Unit,Metric,Technology Name
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Radio_Total Number_KG,,,,,,,,,,,...,,,,Kyrgyzstan,KG,CHAT,National,-,Total Number,Radio
Radio_Total Number_CD,,,,,,,,,,,...,,,,Democratic Republic of the Congo,CD,CHAT,National,-,Total Number,Radio
Radio_Total Number_GM,,,,,,,,,,,...,,,,Gambia,GM,CHAT,National,-,Total Number,Radio
Radio_Total Number_MY,,,,,,,,,,,...,,,,Malaysia,MY,CHAT,National,-,Total Number,Radio
Radio_Total Number_ET,,,,,,,,,,,...,,,,Ethiopia,ET,CHAT,National,-,Total Number,Radio
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Radio_Total Number_LY,,,,,,,,,,,...,,,,Libya,LY,CHAT,National,-,Total Number,Radio
Radio_Total Number_NO,,,,,,,,,,,...,,,,Norway,NO,CHAT,National,-,Total Number,Radio
Radio_Total Number_NP,,,,,,,,,,,...,,,,Nepal,NP,CHAT,National,-,Total Number,Radio
Radio_Total Number_PL,,,,,,,,,,,...,,,,Poland,PL,CHAT,National,-,Total Number,Radio


Cars

In [14]:
# Passenger Cars	CHAT (vehicle_car)
cars = read_chat(102, 'Passenger Vehicles', metric='Total Number')
cars

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/chat_Passenger Vehicles.csv


year,1750,1751,1752,1753,1754,1755,1756,1757,1758,1759,...,2006,2007,2008,Country Name,Country Code,Data Source,Spatial Scale,Unit,Metric,Technology Name
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Passenger Vehicles_Total Number_KG,,,,,,,,,,,...,,,,Kyrgyzstan,KG,CHAT,National,-,Total Number,Passenger Vehicles
Passenger Vehicles_Total Number_AE,,,,,,,,,,,...,,,,United Arab Emirates,AE,CHAT,National,-,Total Number,Passenger Vehicles
Passenger Vehicles_Total Number_CD,,,,,,,,,,,...,,,,Democratic Republic of the Congo,CD,CHAT,National,-,Total Number,Passenger Vehicles
Passenger Vehicles_Total Number_GM,,,,,,,,,,,...,,,,Gambia,GM,CHAT,National,-,Total Number,Passenger Vehicles
Passenger Vehicles_Total Number_MY,,,,,,,,,,,...,,,,Malaysia,MY,CHAT,National,-,Total Number,Passenger Vehicles
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Passenger Vehicles_Total Number_LY,,,,,,,,,,,...,,,,Libya,LY,CHAT,National,-,Total Number,Passenger Vehicles
Passenger Vehicles_Total Number_NO,,,,,,,,,,,...,,,,Norway,NO,CHAT,National,-,Total Number,Passenger Vehicles
Passenger Vehicles_Total Number_NP,,,,,,,,,,,...,,,,Nepal,NP,CHAT,National,-,Total Number,Passenger Vehicles
Passenger Vehicles_Total Number_PL,,,,,,,,,,,...,,,,Poland,PL,CHAT,National,-,Total Number,Passenger Vehicles


Telegrams

In [15]:
# Telegraph Traffic	CHAT (telegram)

telegram = read_chat(90, 'Telegraph Traffic', metric='Total Number')
telegram

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/chat_Telegraph Traffic.csv


year,1750,1751,1752,1753,1754,1755,1756,1757,1758,1759,...,2006,2007,2008,Country Name,Country Code,Data Source,Spatial Scale,Unit,Metric,Technology Name
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Telegraph Traffic_Total Number_CD,,,,,,,,,,,...,,,,Democratic Republic of the Congo,CD,CHAT,National,-,Total Number,Telegraph Traffic
Telegraph Traffic_Total Number_MY,,,,,,,,,,,...,,,,Malaysia,MY,CHAT,National,-,Total Number,Telegraph Traffic
Telegraph Traffic_Total Number_PH,,,,,,,,,,,...,,,,Philippines,PH,CHAT,National,-,Total Number,Telegraph Traffic
Telegraph Traffic_Total Number_TW,,,,,,,,,,,...,,,,Taiwan,TW,CHAT,National,-,Total Number,Telegraph Traffic
Telegraph Traffic_Total Number_LK,,,,,,,,,,,...,,,,Sri Lanka,LK,CHAT,National,-,Total Number,Telegraph Traffic
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Telegraph Traffic_Total Number_SD,,,,,,,,,,,...,,,,Sudan,SD,CHAT,National,-,Total Number,Telegraph Traffic
Telegraph Traffic_Total Number_NG,,,,,,,,,,,...,,,,Nigeria,NG,CHAT,National,-,Total Number,Telegraph Traffic
Telegraph Traffic_Total Number_CN,,,,,,,,,,,...,,,,China,CN,CHAT,National,-,Total Number,Telegraph Traffic
Telegraph Traffic_Total Number_NO,,,,,,,,,,,...,,,,Norway,NO,CHAT,National,-,Total Number,Telegraph Traffic


Mail

In [16]:
# Postal traffic	CHAT (mail)

mail = read_chat(25, 'Postal Traffic', metric='Total Number')
mail

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/chat_Postal Traffic.csv


year,1750,1751,1752,1753,1754,1755,1756,1757,1758,1759,...,2006,2007,2008,Country Name,Country Code,Data Source,Spatial Scale,Unit,Metric,Technology Name
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Postal Traffic_Total Number_CD,,,,,,,,,,,...,,,,Democratic Republic of the Congo,CD,CHAT,National,-,Total Number,Postal Traffic
Postal Traffic_Total Number_MY,,,,,,,,,,,...,,,,Malaysia,MY,CHAT,National,-,Total Number,Postal Traffic
Postal Traffic_Total Number_PH,,,,,,,,,,,...,,,,Philippines,PH,CHAT,National,-,Total Number,Postal Traffic
Postal Traffic_Total Number_TW,,,,,,,,,,,...,,,,Taiwan,TW,CHAT,National,-,Total Number,Postal Traffic
Postal Traffic_Total Number_LK,,,,,,,,,,,...,,,,Sri Lanka,LK,CHAT,National,-,Total Number,Postal Traffic
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Postal Traffic_Total Number_NG,,,,,,,,,,,...,,,,Nigeria,NG,CHAT,National,-,Total Number,Postal Traffic
Postal Traffic_Total Number_CN,,,,,,,,,,,...,,,,China,CN,CHAT,National,-,Total Number,Postal Traffic
Postal Traffic_Total Number_LY,,,,,,,,,,,...,,,,Libya,LY,CHAT,National,-,Total Number,Postal Traffic
Postal Traffic_Total Number_NO,,,,,,,,,,,...,,,,Norway,NO,CHAT,National,-,Total Number,Postal Traffic


Cellphones

In [17]:
# Cellphones	CHAT (cellphone)

cells = read_chat(12, 'Cellphones', metric='Total Number')
cells

Data saved to: /Users/jennagreene/Documents/GitHub/HATCH_data/cleaned_data/chat_Cellphones.csv


year,1750,1751,1752,1753,1754,1755,1756,1757,1758,1759,...,2006,2007,2008,Country Name,Country Code,Data Source,Spatial Scale,Unit,Metric,Technology Name
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cellphones_Total Number_KG,,,,,,,,,,,...,,,,Kyrgyzstan,KG,CHAT,National,-,Total Number,Cellphones
Cellphones_Total Number_AE,,,,,,,,,,,...,,,,United Arab Emirates,AE,CHAT,National,-,Total Number,Cellphones
Cellphones_Total Number_CD,,,,,,,,,,,...,,,,Democratic Republic of the Congo,CD,CHAT,National,-,Total Number,Cellphones
Cellphones_Total Number_GM,,,,,,,,,,,...,,,,Gambia,GM,CHAT,National,-,Total Number,Cellphones
Cellphones_Total Number_MY,,,,,,,,,,,...,,,,Malaysia,MY,CHAT,National,-,Total Number,Cellphones
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Cellphones_Total Number_LY,,,,,,,,,,,...,,,,Libya,LY,CHAT,National,-,Total Number,Cellphones
Cellphones_Total Number_NO,,,,,,,,,,,...,,,,Norway,NO,CHAT,National,-,Total Number,Cellphones
Cellphones_Total Number_NP,,,,,,,,,,,...,,,,Nepal,NP,CHAT,National,-,Total Number,Cellphones
Cellphones_Total Number_PL,,,,,,,,,,,...,,,,Poland,PL,CHAT,National,-,Total Number,Cellphones


Test variable definitions

In [18]:
for var_def in chat_dict[chat_dict[chat_dict.columns[0]]=='telephone'][chat_dict.columns[1]]:
    print(var_def)

Number of mainline telephone lines connecting a customer's equipment to the public switched telephone network as of year end


In [19]:
for var_idx in chat_dict[chat_dict[chat_dict.columns[0]]=='ag_harvester'].index:
    print(var_idx)

0


In [20]:
chat_dict.iloc[89]

VARIABLE NAME                                             telephone
DEFINITION        Number of mainline telephone lines connecting ...
SOURCE                           Mitchell (1998), WORLD BANK (2007)
Name: 89, dtype: object