In [15]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
import os.path
from countrycode import countrycode

from matplotlib.pyplot import cm

warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore')
pd.options.mode.chained_assignment = None


In [16]:
# Read in CSVs

new_directory = '/Users/jennagreene/Documents/GitHub/country-tech-growth/Tech_Characteristics' 
os.chdir(new_directory)

all_files = os.listdir(new_directory)

all_files



['MaterialUse.csv',
 '.DS_Store',
 'Tech_FirstYrCommercial.csv',
 'All_Technology_Characteristics.csv',
 'Granularity.csv',
 'TechReplacement.csv',
 'TechLifetime.csv',
 'Feed.csv',
 'Technology Categories.csv']

In [17]:
# Cycle through csv files in the directory

dfs = [pd.read_csv(os.path.join(new_directory, file)) for file in all_files if file.endswith('.csv')]


In [18]:
common_technologies = set(dfs[0]['Technology'])
for df in dfs[1:]:
    common_technologies = common_technologies.intersection(set(df['Technology']))

print(common_technologies   )
print(len(common_technologies))


{'Cyclohexane', 'High Speed Rail', 'Synthetic Filaments', 'Amazon Prime Users', 'Air-Source Heat Pumps', 'Nitrogen Fertilizer', 'Nuclear Weapons', 'Lead-Acid Battery Storage', 'BCG Vaccine', 'Laundry Dryers', 'Tin', 'Crop Harvester', 'Jet Aircraft', 'Microprocessor Clock Speed', 'Liquefied Natural Gas', 'Computing Growth', 'Sulphuric Acid', 'Sodium-Based Battery Storage', 'Natural Gas Power', 'Sodium Chlorate', 'Vinyl Acetate', 'Microcomputers', 'Aquaculture Production', 'Caprolactam', 'Titanium Sponge', 'Crude Oil', 'Pentaerythritol', 'Postal Traffic', 'All Biofuels', 'Herbicide-Tolerant Corn', 'Carbon Capture and Sequestration', 'Lead', 'Solid Biofuels', 'Random Access Memory', 'Acrylic Fiber', 'Coal Production', 'Motorcycles', 'Renewable Power', 'Microwaves', 'Paraxylene', 'Shotgun Sanger DNA Sequencing', 'Sand and Gravel|Construction', 'Marine Energy', 'Insect-Resistant Cotton', 'Vinyl Chloride', 'Beer Production', 'Primary Bauxite Production', 'Ethanol', 'Primary Magnesium', 'Elec

In [19]:
# Step 3: Create a new dataframe containing only rows with common technologies
combined_data = dfs[0]
for df in dfs[1:]:
    combined_data = pd.merge(combined_data, 
                             df[df['Technology'].isin(common_technologies)], 
                             on='Technology', 
                             how='inner')


combined_data['Technology'].unique()

array(['Acrylic Fiber', 'Acrylonitrile', 'Air-Source Heat Pumps',
       'All Biofuels', 'Amazon Prime Users', 'Ammonia Synthesis',
       'Aniline', 'Aquaculture Production', 'Automatic Transmission',
       'BCG Vaccine', 'Beer Production', 'Benzene', 'Bicycles', 'Biogas',
       'BisphenolA', 'Cable TV', 'Cadmium Refining', 'Canals',
       'Cane Sugar', 'Caprolactam', 'Capture Fisheries',
       'Carbon Capture and Sequestration', 'Caustic Soda', 'Cellphones',
       'Cement', 'Central Heating', 'Coal Power', 'Coal Production',
       'Cobalt', 'Compact Fluorescent Light Bulbs', 'Computing Growth',
       'Copper|Mining', 'Copper|Refining', 'Crop Harvester', 'Crude Oil',
       'Cyclohexane', 'Desalination Capacity', 'Dishwashers',
       'Disk Brakes', 'DTP1 Vaccine', 'DTP3 Vaccine', 'Ebook Reader',
       'Electric Bicycles', 'Electric Range', 'Electricity',
       'Electronic Ignition', 'Ethanol', 'Ethanolamine', 'Ethylene',
       'Ethylene Glycol', 'Flow Battery Storage',
    

In this step, we keep only certain columns (categorization of technologies and numerical for those with those columns)

In [20]:


charac_of_interest = ['Technology',
                      'Kg per dollar',
                      'Final Material Use',
                      'Year of Invention',
                      'Year of First Embodiment of Tech',
                      'Year of First Commercialization',
                      'Need for Customization',
                      'Complexity',
                      'Type of Adopter',
                      'Unit Price / Investment Size at t0 (09 dollars)',
                      'Granularity Category', 
                      'Average lifetime (years)',
                      'Technology Lifetime (Final)',
                      'Strict replacement', 
                      'Broad Replacement?', 
                      'Requires a Feedstock?', 
                      'Other Category Type', 
                      'Patent Category Name']
combined_data = combined_data[charac_of_interest]

combined_data


Unnamed: 0,Technology,Kg per dollar,Final Material Use,Year of Invention,Year of First Embodiment of Tech,Year of First Commercialization,Need for Customization,Complexity,Type of Adopter,Unit Price / Investment Size at t0 (09 dollars),Granularity Category,Average lifetime (years),Technology Lifetime (Final),Strict replacement,Broad Replacement?,Requires a Feedstock?,Other Category Type,Patent Category Name
0,Acrylic Fiber,1.321877,High Material Use,1948,,1950,Mass-customized [2],Complex [3],Firms [1],43000000,Low,,Decades,Yes,Yes,,Chemicals and Industrial,Textiles; Paper
1,Acrylonitrile,0.793651,Medium Material Use,1893,,1930,Mass-customized [2],Complex [3],Firms [1],15050000,Low,30.0,Decades,No,Yes,,Chemicals and Industrial,Chemistry; Metallurgy
2,Air-Source Heat Pumps,0.014898,Medium Material Use,,,1938,Standardized [1],Simple [1],Individuals [2],9800,Medium,15.0,Years,Yes,Yes,,Appliances,Mechanical Engineering; Lighting; Heating; Wea...
3,All Biofuels,,Medium Material Use,n/d,,n/d,Mass-Customized [2],Complex [3],Firms [1],,Low,3.0,Years,No,No,Yes,Energy Supply,General Tagging of New Technological Developme...
4,Amazon Prime Users,,,,,2005,Standardized [1],Design-Intensive [2],Individuals [2],139,High,,,Yes,Yes,,Digitalization,Physics
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193,Washing Machines,0.179997,Medium Material Use,1875,,1908,Standardized [1],Simple [1],Individuals [2],1766.488794,Medium,12.0,Years,Yes,Yes,,Appliances,Textiles; Paper
194,Water Heater,0.047249,Low Material Use,1868,1889,1906,Standardized [1],Simple [1],Individuals [2],,Medium,9.0,Years,No,Yes,,Appliances,Human Necessities
195,Wet Flue Gas Desulfurization Systems,0.000000,Medium Material Use,,,1931,Mass-customized [2],Design-Intensive [2],Firms [1],2253160800,Low,,Decades,No,No,No,Energy Supply,General Tagging of New Technological Developme...
196,YFV Vaccine,0.000042,Low Material Use,,,1937,Standardized [1],Simple [1],Individuals [2],16.7,High,3.0,Years,No,No,No,Food and Health,Human Necessities


In [21]:
# Rename columns
new_names = ['technology',
             'Material Use Numerical',
            'Final Material Use',
            'Year of Invention',
            'Year of First Embodiment of Tech',
            'FirstCommercialYr',
            'Need for Customization',
            'Complexity',
            'Type of Adopter',
            'Granularity Numerical',
            'Granularity', 
            'Average lifetime',
            'Technology Lifetime',
            'Strict replacement', 
            'Broad replacement', 
            'Feedstock', 
            'Category Type', 
            'Patent Category Name']


combined_data.columns = new_names

combined_data


new_directory = '/Users/jennagreene/Documents/GitHub/country-tech-growth/' 
os.chdir(new_directory)

combined_data.to_csv('combined_tech_characteristics_Updated.csv', index=False)
