In [1]:
import os
import pandas as pd
from pandas import Series, DataFrame
#import string
import country_converter as coco

In [2]:
os.chdir('/Users/ariana/desktop/historical_tech/raw data/cost data/unisd')

unisd_files = []
for file in os.listdir():
    if '.csv' in file:
        unisd_files.append(file)
unisd_files

['Television receivers.csv',
 'Radio receivers.csv',
 'Potassic fertilizers.csv',
 'Caustic soda.csv',
 'Bicycles and other cycles.csv',
 'Electric water heaters and immersion heaters.csv',
 'Turbo-jets of a thrust not exceeding 25 kN.csv',
 'Household washing and drying machines.csv',
 'Telephones and videophones.csv',
 'Motorcycles and scooters.csv',
 'Synthetic filament tow and staple fibers.csv',
 'Household refrigerators and freezers.csv',
 'Microwave ovens.csv',
 'Turbo-jets of a thrust exceeding 25 kN.csv',
 'Beer.csv',
 'Combined refrigerators-freezers.csv',
 'Refined copper.csv',
 'Air conditioning machines.csv',
 'Vacuum cleaners.csv',
 'Cars.csv',
 'Synthetic filament yarn.csv',
 'Nitrogenous fertilizers.csv']

In [3]:
# http://data.un.org/Default.aspx

In [4]:
def read_unisd(file):
    # load data and omit footnotes
    df = pd.read_csv(file)
    end_idx = None
    for idx in range(len(df)):
        val = df.iloc[idx]['Unit']
        if val == 'Footnote':
            end_idx = idx
            break
    df = df[:end_idx]

    # drop footnote columns (alternating columns with data in '2000' and notes in '2000 Footnotes')
    drop_cols = []
    for col in df.columns:
        if 'Footnotes' in col:
            drop_cols.append(col)
    df.drop(columns=drop_cols, inplace=True)

    # drop rows where country doesn't have both mil. USD and thousand metric tons data
    all_countries = list(set(df['Country or Area']))
    countries_lt2_units = []
    for country in all_countries:
        country_units = list(set(df[df['Country or Area']==country]['Unit']))
        if len(country_units) != 2:
            countries_lt2_units.append(country)
    idx_lt2_units = []
    for idx in range(len(df)):
        val = df.iloc[idx]['Country or Area']
        if val in countries_lt2_units:
            idx_lt2_units.append(idx)
    df.drop(index=idx_lt2_units, inplace=True)
    
    # pull units before omitting col (need rows of just float in order to divide entire rows)
    df['country x unit'] = df['Country or Area'] + ' | ' + df['Unit']
    num = df['Unit'].iloc[0]
    denom = df['Unit'].iloc[1]
    unit = num + '/' + denom
    if unit=='Mil. USD/Units':
        unit='million USD/unit'
    elif unit=='Mil. USD/Thousand hectolitres':
        unit='thousand USD/hectolitre'
    elif unit=='Mil. USD/Thousand metric tons':
        unit='thousand USD/metric ton'
    elif unit=='Mil. USD/Thousand units':
        unit='thousand USD/unit'

    # calculate USD per metric ton and omit extra rows
    df.drop(columns=['Country or Area', 'Unit'], inplace=True)
    df.set_index('country x unit', drop=True, inplace=True)
    drop_rows = []
    i = 0 
    while i < len(df):
        new_row = df.iloc[i] / df.iloc[i + 1]
        df.iloc[i] = new_row
        drop_rows.append(df.iloc[i + 1].name)
        i += 2
    df.drop(index=drop_rows, inplace=True)
    df.dropna(how='all', inplace=True)
        
    # input summary data
    df['Unit'] = unit
    df['Data Source'] = 'UN Industrial Commodity Statistics Database'
    df['Metric'] = 'Price'
    df['Spatial Scale'] = 'National'
    if file=='Electric water heaters and immersion heaters.csv':
        df['Technology Name'] = 'Electric Water Heaters and Immersion Heaters'
    elif file=='Bicycles and other cycles.csv':
        df['Technology Name'] = 'Bicycles and Other Cycles'
    elif file=='Household washing and drying machines.csv':
        df['Technology Name'] = 'Household Washing and Drying Machines'
    elif file=='Telephones and videophones.csv':
        df['Technology Name'] = 'Telephones and Videophones'
    elif file=='Motorcycles and scooters.csv':
        df['Technology Name'] = 'Motorcycles and Scooters'
    elif file=='Synthetic filament tow and staple fibers.csv':
        df['Technology Name'] = 'Synthetic Filament Tow and Staple Fibers'
    elif file=='Household refrigerators and freezers.csv':
        df['Technology Name'] = 'Household Refrigerators and Freezers'
    elif file=='Turbo-jets of a thrust not exceeding 25 kN.csv':
        df['Technology Name'] = 'Turbo-Jets of a Thrust Not Exceeding 25 kN'
    else:
        df['Technology Name'] = file[:-4].title()
    country_list = []
    iso_list = []
    for idx in df.index:
        cutoff = idx.index(' |')
        name = idx[:cutoff]
        code = coco.convert(names=name, to='iso2')
        country_list.append(name)
        iso_list.append(code)
    df['Country Name'] = country_list
    df['Country Code'] = iso_list
    df['ID'] = df['Technology Name'] + '_' + df['Metric'] + '_' + df['Country Code']
    df.set_index('ID', drop=True, inplace=True)
    return df

In [5]:
unisd_df = []
for file in unisd_files:
    df = read_unisd(file)
    unisd_df.append(df)

In [6]:
all_unisd = pd.concat(unisd_df)
all_unisd

Unnamed: 0_level_0,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,...,2014,2015,2016,Unit,Data Source,Metric,Spatial Scale,Technology Name,Country Name,Country Code
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Television Receivers_Price_AZ,,,,,,0.000055,,,,,...,0.000042,0.000037,,million USD/unit,UN Industrial Commodity Statistics Database,Price,National,Television Receivers,Azerbaijan,AZ
Television Receivers_Price_BD,,,,,,,0.000153,0.000135,0.000137,0.000135,...,,,,million USD/unit,UN Industrial Commodity Statistics Database,Price,National,Television Receivers,Bangladesh,BD
Television Receivers_Price_BR,,,,,,0.000171,0.000127,0.000133,0.000119,0.000121,...,0.000217,0.000166,,million USD/unit,UN Industrial Commodity Statistics Database,Price,National,Television Receivers,Brazil,BR
Television Receivers_Price_BG,,,,0.000168,0.0,0.000000,,,,,...,,,,million USD/unit,UN Industrial Commodity Statistics Database,Price,National,Television Receivers,Bulgaria,BG
Television Receivers_Price_CZ,,,,,,,,,,,...,,0.000383,,million USD/unit,UN Industrial Commodity Statistics Database,Price,National,Television Receivers,Czechia,CZ
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Nitrogenous Fertilizers_Price_TR,,,,,,,,,,,...,0.317629,0.281149,0.232374,thousand USD/metric ton,UN Industrial Commodity Statistics Database,Price,National,Nitrogenous Fertilizers,Turkey,TR
Nitrogenous Fertilizers_Price_GB,,,,,,,,,,,...,,,,thousand USD/metric ton,UN Industrial Commodity Statistics Database,Price,National,Nitrogenous Fertilizers,United Kingdom,GB
Nitrogenous Fertilizers_Price_UY,,,,,,,,,,,...,,,,thousand USD/metric ton,UN Industrial Commodity Statistics Database,Price,National,Nitrogenous Fertilizers,Uruguay,UY
Nitrogenous Fertilizers_Price_UZ,,,,,,,,,,,...,,,0.441300,thousand USD/metric ton,UN Industrial Commodity Statistics Database,Price,National,Nitrogenous Fertilizers,Uzbekistan,UZ


In [7]:
set(all_unisd['Unit'])

{'million USD/unit',
 'thousand USD/hectolitre',
 'thousand USD/metric ton',
 'thousand USD/unit'}

In [8]:
os.chdir('/Users/ariana/desktop/historical_tech/cleaned data')
all_unisd.to_csv('unisd.csv')