In [1]:
import os
import pandas as pd
from pandas import Series, DataFrame
import country_converter as coco

Data sources:
- https://ourworldindata.org/grapher/technology-adoption-by-households-in-the-united-states

In [2]:
## Get current working directory
print("Current working directory:", os.getcwd())

## Find path to raw data (for the raw files)
raw_data_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'raw_data'))

## Find path to folder for saving cleaned csv
cleaned_data_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'cleaned_data'))


## Find path to folder for inflation
inflation_data_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'inflation'))

Current working directory: /Users/jennagreene/Documents/GitHub/HATCH_data/reading_files


In [3]:
target_file_name = "/technology-adoption-by-households-in-the-united-states.csv"
target_filepath = raw_data_path + target_file_name

In [4]:
owd = pd.read_csv(target_filepath)
tech_list = list(set(owd['Entity']))
drop_list = ['Automobile','Cellular phone','Dryer','Electric power','Electricity access','Household refrigerator',
             'Landline','Refrigerator','Washer','Washing machine', 'Shipping container port infrastructure',
            'Radio','Television']
for item in drop_list:
    tech_list.remove(item)
tech_list

['Iron',
 'Households with only mobile phones (no landlines)',
 'Home air conditioning',
 'Internet',
 'Power steering',
 'Videocassette recorder',
 'Automatic transmission',
 'Stove',
 'Colour TV',
 'Vacuum',
 'Central heating',
 'Freezer',
 'Radial tires',
 'Dishwasher',
 'Computer',
 'RTGS adoption',
 'Cable TV',
 'Social media usage',
 'Nox pollution controls (boilers)',
 'Podcasting',
 'Electric Range',
 'Microwave',
 'Tablet',
 'Ebook reader',
 'Running water',
 'Amazon Prime users',
 'Disk brakes',
 'Microcomputer',
 'Smartphone usage',
 'Water Heater',
 'Electronic ignition',
 'Flush toilet']

In [5]:
def read_owd(technology):
    # Read the CSV file into a DataFrame
    owd = pd.read_csv(target_filepath)
    
    # Remove the 'Code' column
    owd.drop(columns='Code', inplace=True)
    
    # Filter the DataFrame for the specified technology
    owd = owd[owd['Entity']==technology]
    
    # Convert the values to percentages
    owd[owd.columns[2]] = owd[owd.columns[2]] / 100
    
    # Transpose the DataFrame
    owd = owd.transpose()
    
    # Extract the technology name and years
    tech_name = owd.loc['Entity'].values[0]
    years = list(owd.loc['Year'])
    
    # Rename the columns with the corresponding years
    i = list(owd.columns)[0]
    for year in years:
        owd.rename(columns={i: year}, inplace=True)
        i += 1
    
    # Remove unnecessary rows
    owd = owd[2:]
    
    # Add metadata columns
    owd['Country Name'] = 'United States'
    owd['Country Code'] = 'US'
    owd['Spatial Scale'] = 'National'
    owd['Unit'] = '%'
    if tech_name in ['Automatic transmission','Disk brakes','Electronic ignition', 'Power steering',
                     'Radial tires', 'RTGS adoption']:
        metric = 'Share of Market'
    elif tech_name in ['Ebook reader','Podcasting','Smartphone usage','Social media usage','Tablet']:
        metric = 'Share of Population'
    elif tech_name == 'Nox pollution controls (boilers)':
        metric = 'Share of Boilers'
    else:
        metric = 'Share of Households'
    owd['Technology Name'] = tech_name.title()
    owd['Metric'] = metric
    owd['Data Source'] = 'OWID'
    
    # Replace technology names for consistency
    owd.replace({'Rtgs Adoption':'Real-Time Gross Settlement Adoption','Dishwasher':'Dishwashers',
                 'Microwave':'Microwaves','Vacuum':'Vacuums', 'Computer':'Home Computers',
                 'Internet':'Household Internet Access','Microcomputer':'Microcomputers',
                'Cable Tv':'Cable TV','Colour Tv':'Colour TV'},inplace=True)
    
    # Create unique IDs and set it as the index
    owd['ID'] = owd['Technology Name'] + '_' + owd['Metric'] + '_' + owd['Country Code']
    owd.set_index('ID', drop=True, inplace=True)

    # Define the file path
    file_path = cleaned_data_path + "/" + technology + '.csv'
    
    # Save the DataFrame to a CSV file
    owd.to_csv(file_path)
    
    return owd

In [6]:
for tech in tech_list:
    read_owd(tech)