In [1]:
import os
import pandas as pd
import requests
import shutil
import time
import zipfile

from config import download_files_folder, extract_files_folder

In [2]:
def download_file(url, filepath=None):
    
    # if no filepath is provided, save as the filename from url
    if filepath is None:
        filepath = url.split('/')[-1]
    
    # stream in the file
    response = requests.get(url, stream=True)
    
    # save the raw streamed response to disk
    with open(filepath, 'wb') as file:
        shutil.copyfileobj(response.raw, file)

In [3]:
df = pd.read_csv('data/urls.csv', dtype={'fips':str})
df.head()

Unnamed: 0,fips,name,state,url
0,1,Alabama,AL,https://usbuildingdata.blob.core.windows.net/u...
1,2,Alaska,AK,https://usbuildingdata.blob.core.windows.net/u...
2,4,Arizona,AZ,https://usbuildingdata.blob.core.windows.net/u...
3,5,Arkansas,AR,https://usbuildingdata.blob.core.windows.net/u...
4,6,California,CA,https://usbuildingdata.blob.core.windows.net/u...


In [4]:
for label, row in df.iterrows():
    
    print(row['state'], end=' ')
    start_time = time.time()
    url = row['url']
    
    rename_to = '{}/{}_{}.json'.format(extract_files_folder, row['fips'], row['state'])
    
    if not os.path.exists(rename_to):
        
        # download the zip file
        download_filepath = '{}/{}_{}.zip'.format(download_files_folder, row['fips'], row['state'])
        download_file(url, download_filepath)
    
        # extract the zip file
        with zipfile.ZipFile(download_filepath, 'r') as zf:
            zf.extractall(extract_files_folder)

        # rename the unzipped file to fips_state.json
        assert len(zf.namelist()) == 1
        unzipped_filename = zf.namelist()[0]
        rename_from = '{}/{}'.format(extract_files_folder, unzipped_filename)
        os.rename(rename_from, rename_to)
    
    end_time = time.time() - start_time
    print('{:.1f}'.format(end_time, end='\n'))

AL 24.0
AK 5.1
AZ 25.2
AR 15.1
CA 117.3
CO 19.5
CT 12.3
DE 3.7
DC 0.7
FL 64.7
GA 38.6
HI 2.9
ID 8.8
IL 43.6
IN 29.6
IA 18.9
KS 13.9
KY 26.8
LA 20.8
ME 8.0
MD 16.4
MA 21.3
MI 54.6
MN 27.5
MS 16.1
MO 32.5
MT 8.3
NE 11.2
NV 8.1
NH 6.7
NJ 25.3
NM 10.0
NY 63.2
NC 61.0
ND 7.9
OH 69.8
OK 20.0
OR 18.0
PA 55.7
RI 4.4
SC 23.1
SD 6.4
TN 34.8
TX 122.2
UT 9.4
VT 3.8
VA 31.0
WA 30.5
WV 10.7
WI 33.2
WY 4.2
