In [1]:
import boto3
import s3fs
from sagemaker import get_execution_role
import pickle
import pandas as pd
import geopandas as gpd
import sagemaker
import os
import requests
import zipfile
import io
import glob

In [2]:
def list_s3_files(bucket, directory_path):
    contents = conn.list_objects(Bucket=bucket, Prefix=directory_path)['Contents']
    for f in contents:
        print(f['Key'])

In [3]:
# Set variables
sess = sagemaker.Session()
role = get_execution_role()
conn = boto3.client("s3")
bucket = "w210-poverty-mapper"
subfolder = "shape_files"

# Set urls
shape_url = "https://international.ipums.org/international/resources/gis/IPUMSI_world_release2020.zip"
box_url = "https://data.humdata.org/dataset/6992403a-d9dc-4962-b97e-c30abd1feefc/resource/aec5d77d-095a-4d42-8a13-5193ec18a6a9/download/country-boundingboxes.csv"

In [4]:
# Download IPUMS world shapes
shape_r = requests.get(shape_url)
shape_z = zipfile.ZipFile(io.BytesIO(shape_r.content))
shape_z.extractall("/root")

In [5]:
# Write IPUMS world shapes to s3
for file in glob.glob("/root/world_countries*"):
    sess.upload_data(file, bucket=bucket, key_prefix="shape_files/raw_data/ipums_world_shapes")
    os.remove(file)

In [6]:
# List files in s3 bucket
list_s3_files(bucket, subfolder)

shape_files/raw_data/ipums_world_shapes/world_countries_2020.CPG
shape_files/raw_data/ipums_world_shapes/world_countries_2020.dbf
shape_files/raw_data/ipums_world_shapes/world_countries_2020.prj
shape_files/raw_data/ipums_world_shapes/world_countries_2020.sbn
shape_files/raw_data/ipums_world_shapes/world_countries_2020.sbx
shape_files/raw_data/ipums_world_shapes/world_countries_2020.shp
shape_files/raw_data/ipums_world_shapes/world_countries_2020.shp.xml
shape_files/raw_data/ipums_world_shapes/world_countries_2020.shx
shape_files/raw_data/ocha_bounding_boxes/country-boundingboxes.csv


In [7]:
# Download OCHA country bounding boxes
boxes = pd.read_csv(box_url)

In [8]:
# Write OCHA country bounding boxes to s3
save_path = subfolder + "/raw_data/ocha_bounding_boxes" 
boxes.to_csv("s3://{}/{}/country-boundingboxes.csv".format(bucket, save_path))

In [9]:
# List files in s3 bucket
list_s3_files(bucket, subfolder)

shape_files/raw_data/ipums_world_shapes/world_countries_2020.CPG
shape_files/raw_data/ipums_world_shapes/world_countries_2020.dbf
shape_files/raw_data/ipums_world_shapes/world_countries_2020.prj
shape_files/raw_data/ipums_world_shapes/world_countries_2020.sbn
shape_files/raw_data/ipums_world_shapes/world_countries_2020.sbx
shape_files/raw_data/ipums_world_shapes/world_countries_2020.shp
shape_files/raw_data/ipums_world_shapes/world_countries_2020.shp.xml
shape_files/raw_data/ipums_world_shapes/world_countries_2020.shx
shape_files/raw_data/ocha_bounding_boxes/country-boundingboxes.csv
