In [43]:
# TODO:
# put everything into one function that loads raster, resamples, and prints out a csv and df
# visualize in QGIS - are rasters correct?
# clean up comments


In [44]:
import sys
import os
import subprocess
import datetime
import pandas as pd
import numpy as np
import pygeoprocessing
import matplotlib.pyplot as plt
from osgeo import gdal

# Utility functions

In [103]:
def print_start_time(message=""):
    """Prints out start time of a process.
    
    Keyword arguments:
    message -- (optional) custom message indicating what process is running
    """
    print(message)
    print("Start time:", datetime.datetime.now())
    
# TODO: change input to a path 
def get_raster_resolution(raster_path):
    """Prints out dimensions of raster. 
    Returns pixel dimensions in tuple (pixelSizeX, pixelSizeY).
    
    Keyword arguments:
    raster -- GDAL-loaded raster to measure
    """
    # NOTE: GetGeoTransform fetches the coefficients for transforming between 
    # pixel/line (P,L) raster space, and projection coordinates (Xp,Yp) space.
    # Xp = padfTransform[0] + P*padfTransform[1] + L*padfTransform[2];
    # Yp = padfTransform[3] + P*padfTransform[4] + L*padfTransform[5];
    
    raster = gdal.Open(raster_path)
    print(raster_path)
    
    gt = raster.GetGeoTransform()
    
    pixelSizeX = gt[1]
    pixelSizeY =-gt[5]
    pixelDims = (pixelSizeX, pixelSizeY)
    
    print("Where (pixelSizeX, pixelSizeY) is", pixelDims)
    
    return pixelDims

def resample_raster_to_resolution(raster_src_path, xres, yres, resampled_dest_path=None):
    """Uses gdalwarp to resample raster via averaging to the given resolution. 
    
    Keyword arguments:
    raster_src_path -- (dtype: string) file path to raster to resample
    xres -- (dtype: int) desired x-resolution
    yres -- (dtype: int) desired y-resolution
    """
    if not resampled_dest_path:
        # Get base filename without extension
        filename = os.path.basename(raster_src_path)[:-4]

        # Denote resolution of 5 arcmin to file name; alternatively, call it _resampled    
        resampled_dest_path = os.path.join(OUTPUT_DIR, filename + "_5m.tif")

    # Can specify different resampling method (-r) depending on purpose
    args = ['gdalwarp', '-tr', str(xres), str(yres), '-tap', '-r', 'average', raster_src_path, resampled_dest_path]
    
    # TODO: check projection by loading in QGIS; look at resample_raster in nutrition code; mask NoData values
    
    print("Resampling raster file...start time:", datetime.datetime.now())
    
    try:
        # Call system command to run GDAL resampling
        process = subprocess.check_output(args)
    except:
        print("The file name: [", resampled_dest_path, "] already exists.")
    
    return resampled_dest_path

def resample_folder(dir_path, desired_resolution, res_tag):
    """Resamples all rasters in a given directory to the desired resolution. 
    
    Keyword arguments:
    dir_path -- (dtype: string) file path to raster to resample
    desired_resolution -- (dtype: int) tuple of resolution to resample to 
                            in the form (x_resolution, y_resolution)
    res_tag -- (dtype: string) tag to append to new file name to denote new resolution 
    """
    raster_files = os.listdir(dir_path)

    print_start_time("Resampling carbon rasters...")
    for filename in abg_biomass_files:
        raster_name = filename[:-4]
        extension = filename[-4:]
        if extension == '.tif':
        # TODO: add check that resampled raster doesn't already exist
            src_path = os.path.join(dir_path, filename)
            dest_path = os.path.join(OUTPUT_DIR, 
                                     CARBON_BIOMASS_FOLDER+res_tag, 
                                     raster_name+res_tag+extension)
            try:
                resample_raster_to_resolution(src_path, 
                                          desired_resolution[0], 
                                          desired_resolution[1], 
                                          resampled_dest_path=dest_path)
                print("Successfully resampled raster:", raster_name)
                get_raster_resolution(dest_path)
            except:
                print("Resampled raster:", raster_name+resolution_tag, "already exists. Trying next one...")
    
def convert_raster_to_xyz(raster_src_path):
    """Converts given raster to .xyz format using gdal_translate.
    
    Keyword arguments:
    raster_src_path -- (dtype: string) file path to .tif file to convert
    """
    
    # Remove 3-letter tif extension, replace with XYZ extension
    filename = os.path.basename(raster_src_path)[:-4]
    xyz_filename = ('{}.xyz').format(filename)
    
    xyz_dest_path = os.path.join(OUTPUT_DIR, xyz_filename)
    
    # Arguments for GDAL translation
    args = ["gdal_translate", raster_src_path, xyz_dest_path, "-co", "ADD_HEADER_LINE=YES", "-co", "COMPRESS=LZW"]
    
    print("Now converting raster into XYZ format...start time:", datetime.datetime.now())
    
    # Call system command to run GDAL translation
    process = subprocess.check_output(args)
    
    print("Saved new XYZ file to", xyz_dest_path)
    return xyz_dest_path

def convert_xyz_to_df(xyz_src_path, value_col_name):
    """Reads XYZ file into a dataframe with columns {"index", "pixel_id", "lat", "long", "value"},
    where "pixel_id" is a string concatenation of the coordinates.
    Returns this dataframe. 
    
    Keyword arguments:
    xyz_src_path -- (dtype: string) file path to .xyz file to read in
    """
    
    print("Now reading file into dataframe...start time:", datetime.datetime.now())
    df = pd.read_csv(xyz_src_path, delimiter=' ')
    df.index.name = 'index'
    df.rename(columns={'X': 'lat', 'Y': 'long', 'Z': value_col_name}, inplace=True)
    
    df['pixel_id'] = ("" + 
                    df['lat'].round(4).astype(str) + 
                    "," + 
                    df['long'].round(4).astype(str))

    return df

def convert_raster_to_df(tif_src_path, csv_save_path, value_col_name="value"):
    """Reads a raster file into a dataframe with columns {"index", "pixel_id", "lat", "long", "value"}.
    Saves this dataframe in csv format. 
    Returns this dataframe. 
    
    Keyword arguments:
    tif_src_path -- (dtype: string) file path to raster file to read in
    """
    xyz_dest_path = convert_raster_to_xyz(tif_src_path)
    new_df = convert_xyz_to_df(xyz_dest_path, value_col_name)
    
    print_start_time("Finally, saving dataframe to csv...")
    new_df.to_csv(csv_save_path)
    
    # TODO: delete intermediate XYZ file after use
    
    return new_df

## TODO: write wrapper function
## def resample_and_convert_to_csv():
    """Wrapper function to take in a raster file, resample it to the desired resolution, 
    read it into a dataframe, and save it to a csv file.
    """
    
# TODO: This still doesn't really work without pulling up the terminal. Why?? 
def merge_rasters(dir_of_rasters):
    """Merges list of rasters into one raster.
    
    Keyword arguments:
    dir_of_rasters -- (dtype: string) file path to directory containing all rasters to be merged
    """
    
    # gdalbuildvrt mosaic.vrt c:\data\....\*.tif
    # gdal_translate -of GTiff -co "COMPRESS=JPEG" -co "PHOTOMETRIC=YCBCR" -co "TILED=YES" mosaic.vrt mosaic.tif

    raster_list = os.listdir(dir_of_rasters)
    num_files = len(raster_list)
    
    vrt_name = os.path.basename(dir_of_rasters)+'.vrt'
    file_space = os.path.join(dir_of_rasters, '*.tif')
    builtvrt_args = ['gdalbuildvrt', vrt_name, file_space, '-overwrite']
    print(builtvrt_args)
    print("Merging "+str(num_files)+" files into virtual raster...", datetime.datetime.now())
    
    try:
        buildvrt_process = subprocess.check_output(builtvrt_args)
        print("Built", vrt_name, "successfully!")
    except subprocess.CalledProcessError as e:
        print(e.output)
        print("Building virtual raster failed.")
    
    tif_name =  dir_of_rasters+'.tif'
    translate_args = ['gdal_translate', '-of', 'GTiff', vrt_name, tif_name]
    print(translate_args)
    print("Merging virtual raster into true raster...", datetime.datetime.now())

    try:
        translate_process = subprocess.check_output(translate_args)
        print("Merged rasters successfully!")
    except subprocess.CalledProcessError as e:
        print(e.output)
        print("Merging rasters failed.")


In [46]:
 """DATASET CLASS
    Keeps track of the various locations and format of the data for scalable access and reuse. 
    """
class Dataset:
    def __init__(self, metric, basefile, path_to_raster):
        self.metric = metric
        self.basefile = basefile
        self.path_to_raster = path_to_raster
        self.resolution = get_raster_resolution(path_to_raster)
        
        self.path_to_csv = os.path.join(OUTPUT_DIR, basefile+".csv") 
        if os.path.exists(self.path_to_csv):
            print_start_time("Loading dataframe from" + str(self.path_to_csv))
            self.dataframe = pd.read_csv(self.path_to_csv)
        else:
            self.dataframe = convert_raster_to_df(path_to_raster, self.path_to_csv, metric)
        
    def get_resolution(self):
        print(self.resolution)
        return self.resolution
        

# Main processes

## Preparing the datasets for analysis - loading, resampling, and merging

In [17]:
# Define working directories
NAT_CAP_DIR = '/Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project'
CARBON_DATA_DIR = os.path.join(NAT_CAP_DIR, 'Carbon/Data')
OUTPUT_DIR = os.path.join(NAT_CAP_DIR, 'Outputs')
PEOPLE_DIR = os.path.join(NAT_CAP_DIR, 'People')

# To access 'Carbon' data
CARBON_BIOMASS_FOLDER = 'GFW_ALWBD_2000'
BIOMASS_DIR = os.path.join(CARBON_DATA_DIR, CARBON_BIOMASS_FOLDER)

In [19]:
# Find desired resolution for 5 arcmins
raster_example_5m = os.path.join(PEOPLE_DIR, 'Distance_to_market/minutes_to_market_5m.tif')
(Xres, Yres) = get_raster_resolution(raster_example_5m)

/Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/People/Distance_to_market/minutes_to_market_5m.tif
Where (pixelSizeX, pixelSizeY) is (0.083333333333333, 0.083333333333333)


### Resampling the data to 5 arcmins

#### CARBON DATA
** NOTE: Resampled / merged carbon raster created in Terminal to fix bugs with individual raster components.

In [45]:
# List of carbon files
abg_biomass_files = os.listdir(BIOMASS_DIR)
desired_resolution = (Xres, Yres)
resolution_tag = "_5m"

# Resample all the carbon files to 5 arcmins
resample_folder(BIOMASS_DIR, desired_resolution, resolution_tag)

In [58]:
# Merge resampled carbon files into one raster
carbon_raster_dir = os.path.join(OUTPUT_DIR, "GFW_ALWBD_2000_5m")
merge_rasters(carbon_raster_dir)

['gdalbuildvrt', 'GFW_ALWBD_2000_5m.vrt', '/Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/Carbon/Outputs/GFW_ALWBD_2000_5m/*.tif', '-overwrite']
Merging 281 files into virtual raster... 2019-05-23 02:22:01.357704
b'0...10...20...30...40...50...60...70...80...90...100 - done.\n'
Building virtual raster failed.
['gdal_translate', '-of', 'GTiff', 'GFW_ALWBD_2000_5m.vrt', '/Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/Carbon/Outputs/GFW_ALWBD_2000_5m.tif']
Merging virtual raster into true raster... 2019-05-23 02:22:01.409168
b''
Merging rasters failed.


In [24]:
# Check that merged and resampled carbon raster has correct resolution
carbon_raster_path = os.path.join(OUTPUT_DIR, "GFW_ALWBD_2000_5m.tif")
carbon_5m = Dataset(metric="carbon", 
                   basefile="GFW_ALWBD_2000_5m",
                   path_to_raster = carbon_raster_path)

/Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/Outputs/GFW_ALWBD_2000_5m.tif
Where (pixelSizeX, pixelSizeY) is (0.08333333333333247, 0.08333333333333247)
Loading dataframe from/Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/Outputs/GFW_ALWBD_2000_5m.csv
Start time: 2019-06-11 17:34:19.560343


In [25]:
carbon_5m.dataframe.head()

Unnamed: 0,index,lat,long,carbon,pixel_id
0,0,-179.958333,79.958333,0,"-179.9583,79.9583"
1,1,-179.875,79.958333,0,"-179.875,79.9583"
2,2,-179.791667,79.958333,0,"-179.7917,79.9583"
3,3,-179.708333,79.958333,0,"-179.7083,79.9583"
4,4,-179.625,79.958333,0,"-179.625,79.9583"


#### NIGHTTIME LIGHTS

In [130]:
# Resample raster of average nighttime lights to 5m
# Original data is in 30 arc sec
# TODO: Would like to one day keep raster in 30s
path_to_nighttime_lights = os.path.join(
                                PEOPLE_DIR, 
                                "nighttime_lights",
                                "avg_lights_x_pct",
                                "F152000.v4b.avg_lights_x_pct.tif")
resample_raster_to_resolution(path_to_nighttime_lights, 
                              Xres, Yres, 
                              resampled_dest_path=os.path.join(OUTPUT_DIR, 'nighttime_lights_5m.tif'))

Resampling raster file...start time: 2019-06-12 07:14:38.094758


'/Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/Outputs/nighttime_lights_5m.tif'

#### POPULATION DENSITY

In [104]:
# Resample raster of population density to 5m
# Original data is in 30 arc sec
# TODO: Would like to one day keep raster in 30s

path_to_pop_dens = os.path.join(PEOPLE_DIR,
                            'adjusted_pop',
                            'gpw_v4_population_density_adjusted_30_sec.tif')

resample_raster_to_resolution(path_to_pop_dens, 
                              Xres, Yres, 
                              resampled_dest_path=os.path.join(OUTPUT_DIR, 'pop_density_5m.tif'))

Resampling raster file...start time: 2019-06-12 06:39:46.710543


'/Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/Outputs/pop_density_5m.tif'

#### INFANT MORTALITY

In [107]:
# Resample raster of infant mortality to 5m
# Original data is in 2.5 arc min (?)
# TODO: Would like to one day keep raster in 2.5m

path_to_imr = os.path.join(PEOPLE_DIR,
                            'global_subnational_infant_mortality_rates',
                            '2015_v2',
                            'povmap_global_subnational_infant_mortality_rates_v2.tif')

resample_raster_to_resolution(path_to_imr, 
                              Xres, Yres, 
                              resampled_dest_path=os.path.join(OUTPUT_DIR, 'infant_mortality_5m.tif'))

Resampling raster file...start time: 2019-06-12 06:56:16.195137


'/Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/Outputs/infant_mortality_5m.tif'

#### SOIL WATER CAPACITY

In [164]:
# Resample raster of soil water capacity to 5m, even though it might already be in 5m

path_to_soil = os.path.join(CARBON_DATA_DIR,
                            'soil_water_capacity',
                            'soil_water_capacity.tif')

resample_raster_to_resolution(path_to_soil, 
                              Xres, Yres, 
                              resampled_dest_path=os.path.join(OUTPUT_DIR, 'soil_water_5m.tif'))

Resampling raster file...start time: 2019-06-13 00:38:46.006767


'/Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/Outputs/soil_water_5m.tif'

#### HUMAN FOOTPRINT

In [171]:
# Resample raster of pasture land to 5m
                                   
path_to_footprint = os.path.join(PEOPLE_DIR,
                            'human_footprint',
                            'wildareas-v3-2009-human-footprint.tif')
print(path_to_footprint)

resample_raster_to_resolution(path_to_footprint, 
                              Xres, Yres, 
                              resampled_dest_path=os.path.join(OUTPUT_DIR, 'human_footprint_5m.tif'))


/Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/People/human_footprint/wildareas-v3-2009-human-footprint.tif
Resampling raster file...start time: 2019-06-13 12:08:21.835673
The file name: [ /Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/Outputs/human_footprint_5m.tif ] already exists.


'/Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/Outputs/human_footprint_5m.tif'

#### PASTURES

In [155]:
# Resample raster of pasture land to 5m
                                   
path_to_pastures = os.path.join(PEOPLE_DIR,
                            'pastures',
                            'pasture.tif')
print(path_to_pastures)

resample_raster_to_resolution(path_to_pastures, 
                              Xres, Yres, 
                              resampled_dest_path=os.path.join(OUTPUT_DIR, 'pasture_5m.tif'))


/Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/People/pastures/pasture.tif
Resampling raster file...start time: 2019-06-12 13:08:18.611687


'/Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/Outputs/pasture_5m.tif'

### Loading in data as Dataset class member

#### MINUTES TO MARKET

In [6]:
# Accessibility (minutes) to market
market_dist_5m = Dataset(metric="mins_to_market",
                          basefile="minutes_to_market_5m",
                          path_to_raster=os.path.join(PEOPLE_DIR, 'Distance_to_market/minutes_to_market_5m.tif'))

/Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/People/Distance_to_market/minutes_to_market_5m.tif
Geotransform is (-180.0, 0.083333333333333, 0.0, 89.99999999928002, 0.0, -0.083333333333333)
Where (pixelSizeX, pixelSizeY) is (0.083333333333333, 0.083333333333333)


#### MARKET INFLUENCE

In [9]:
# Market influence
market_influence_5m = Dataset(metric="market_influence",
                             basefile="market_influence_5m", 
                             path_to_raster=os.path.join(PEOPLE_DIR, 'market_influence_v2/mkt_infind_5m/hdr.adf'))

/Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/People/market_influence_v2/mkt_infind_5m/hdr.adf
Geotransform is (-180.0, 0.08333333333333329, 0.0, 89.99999999999989, 0.0, -0.08333333333333329)
Where (pixelSizeX, pixelSizeY) is (0.08333333333333329, 0.08333333333333329)
Loaded dataframe from /Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/Outputs/market_influence_5m.csv


#### POPULATION DENSITY

In [105]:
# Adjusted population density - TODO: redo for 30 arcsecs
population_density_5m = Dataset(metric="population_density",
                                  basefile="population_density_5m",
                                  path_to_raster=os.path.join(
                                      OUTPUT_DIR, 
                                      'pop_density_5m.tif'))

/Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/Outputs/pop_density_5m.tif
Where (pixelSizeX, pixelSizeY) is (0.083333333333333, 0.083333333333333)
Loading dataframe from/Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/Outputs/population_density_5m.csv
Start time: 2019-06-12 06:54:55.206021


In [120]:
population_density_5m.dataframe.loc[population_density_5m.dataframe["pixel_id"]=="-179.7917,83.5417"]

Unnamed: 0,index,lat,long,population_density,pixel_id
332642,332642,-179.791667,83.541667,-3.4028230000000003e+38,"-179.7917,83.5417"


#### NIGHTTIME LIGHTS

In [132]:
# Average nighttime lights
nighttime_lights_5m = Dataset(metric="nighttime_lights",
                            basefile="nighttime_lights_5m", 
                            path_to_raster=os.path.join(OUTPUT_DIR, "nighttime_lights_5m.tif"))

/Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/Outputs/nighttime_lights_5m.tif
Where (pixelSizeX, pixelSizeY) is (0.083333333333333, 0.083333333333333)
Now converting raster into XYZ format...start time: 2019-06-12 07:16:09.436949
Saved new XYZ file to /Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/Outputs/nighttime_lights_5m.xyz
Now reading file into dataframe...start time: 2019-06-12 07:16:41.749808
Finally, saving dataframe to csv...
Start time: 2019-06-12 07:17:02.405922


In [135]:
nighttime_lights_5m.dataframe.loc[nighttime_lights_5m.dataframe['pixel_id']=="-179.4583,69.9583"]

Unnamed: 0_level_0,lat,long,nighttime_lights,pixel_id
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
263649,-179.458333,69.958333,0.0,"-179.4583,69.9583"


#### INFANT MORTALITY

In [108]:
# Infant mortality rates
infant_mortality_5m = Dataset(metric='infant_mortality_rate',
                                    basefile='infant_mortality_5m',
                                    path_to_raster=os.path.join(OUTPUT_DIR, "infant_mortality_5m.tif"))

/Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/Outputs/infant_mortality_5m.tif
Where (pixelSizeX, pixelSizeY) is (0.083333333333333, 0.083333333333333)
Now converting raster into XYZ format...start time: 2019-06-12 06:57:27.696878
Saved new XYZ file to /Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/Outputs/infant_mortality_5m.xyz
Now reading file into dataframe...start time: 2019-06-12 06:57:43.994182
Finally, saving dataframe to csv...
Start time: 2019-06-12 06:57:59.751699


#### SOIL WATER CAPACITY

In [165]:
# Soil water capacity (validation set)
soil_water_5m = Dataset(metric='soil_water_capacity',
                                    basefile='soil_water_5m',
                                    path_to_raster=os.path.join(OUTPUT_DIR, "soil_water_5m.tif"))               

/Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/Outputs/soil_water_5m.tif
Where (pixelSizeX, pixelSizeY) is (0.083333333333333, 0.083333333333333)
Now converting raster into XYZ format...start time: 2019-06-13 00:39:48.308239
Saved new XYZ file to /Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/Outputs/soil_water_5m.xyz
Now reading file into dataframe...start time: 2019-06-13 00:40:05.328705
Finally, saving dataframe to csv...
Start time: 2019-06-13 00:40:37.337436


#### PASTURES

In [159]:
# Pastures
pastures_5m = Dataset(metric='pasture_land', basefile='pastures', 
                 path_to_raster=os.path.join(OUTPUT_DIR, "pasture_5m.tif"))               

/Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/Outputs/pasture_5m.tif
Where (pixelSizeX, pixelSizeY) is (0.083333333333333, 0.083333333333333)
Now converting raster into XYZ format...start time: 2019-06-12 13:25:44.165256
Saved new XYZ file to /Users/jackieennis/Google Drive/Classes/Active Classes/Impact Lab/Carbon Students Project/Outputs/pasture_5m.xyz
Now reading file into dataframe...start time: 2019-06-12 13:26:09.048785
Finally, saving dataframe to csv...
Start time: 2019-06-12 13:26:29.528527


In [160]:
pastures_5m.dataframe.head()

Unnamed: 0_level_0,lat,long,pasture_land,pixel_id
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,-180.041667,89.958333,-3.4028230000000003e+38,"-180.0417,89.9583"
1,-179.958333,89.958333,-3.4028230000000003e+38,"-179.9583,89.9583"
2,-179.875,89.958333,-3.4028230000000003e+38,"-179.875,89.9583"
3,-179.791667,89.958333,-3.4028230000000003e+38,"-179.7917,89.9583"
4,-179.708333,89.958333,-3.4028230000000003e+38,"-179.7083,89.9583"


### Merging dataframes by resolution

In [77]:
# Merge each 5m dataset back to carbon, starting with mins_to_market
print_start_time("Merging carbon with mins_to_market...")
carbon_5m_merged = carbon_5m.dataframe.merge(market_dist_5m.dataframe, on=['pixel_id'], suffixes=('_carbon', 'mins_to_market'))


Merging carbon with mins_to_market...
Start time: 2019-06-12 01:02:10.237296


In [78]:
# Sanity check, then only keep necessary columns 
print(carbon_5m_merged.head())
carbon_5m_merged = carbon_5m_merged[["index_carbon", "lat_carbon", "long_carbon", "carbon", "pixel_id", "mins_to_market"]]
print(carbon_5m_merged.columns)

   index_carbon  lat_carbon  long_carbon  carbon           pixel_id  \
0             0 -179.958333    79.958333       0  -179.9583,79.9583   
1             1 -179.875000    79.958333       0   -179.875,79.9583   
2             2 -179.791667    79.958333       0  -179.7917,79.9583   
3             3 -179.708333    79.958333       0  -179.7083,79.9583   
4             4 -179.625000    79.958333       0   -179.625,79.9583   

   indexmins_to_market  latmins_to_market  longmins_to_market  mins_to_market  
0               518400        -179.958333           79.958333     -2147483647  
1               518401        -179.875000           79.958333     -2147483647  
2               518402        -179.791667           79.958333     -2147483647  
3               518403        -179.708333           79.958333     -2147483647  
4               518404        -179.625000           79.958333     -2147483647  
Index(['index_carbon', 'lat_carbon', 'long_carbon', 'carbon', 'pixel_id',
       'mins_to_mar

In [79]:
# Now add market_influence to merge
print_start_time("Merging carbon with market_influence...")
carbon_5m_merged = carbon_5m_merged.merge(market_influence_5m.dataframe, on=['pixel_id'], suffixes=('_carbon', '_market_influence'))

# Sanity check, then only keep necessary columns 
print(carbon_5m_merged.head())
carbon_5m_merged = carbon_5m_merged[["index_carbon", "lat_carbon", 
                                     "long_carbon", "carbon", 
                                     "pixel_id", "mins_to_market",
                                    "market_influence"]]
print(carbon_5m_merged.columns)


Merging carbon with market_influence...
Start time: 2019-06-12 01:02:32.293998
   index_carbon  lat_carbon  long_carbon  carbon           pixel_id  \
0             0 -179.958333    79.958333       0  -179.9583,79.9583   
1             1 -179.875000    79.958333       0   -179.875,79.9583   
2             2 -179.791667    79.958333       0  -179.7917,79.9583   
3             3 -179.708333    79.958333       0  -179.7083,79.9583   
4             4 -179.625000    79.958333       0   -179.625,79.9583   

   mins_to_market   index         lat       long  market_influence  
0     -2147483647  518400 -179.958333  79.958333     -3.402823e+38  
1     -2147483647  518401 -179.875000  79.958333     -3.402823e+38  
2     -2147483647  518402 -179.791667  79.958333     -3.402823e+38  
3     -2147483647  518403 -179.708333  79.958333     -3.402823e+38  
4     -2147483647  518404 -179.625000  79.958333     -3.402823e+38  
Index(['index_carbon', 'lat_carbon', 'long_carbon', 'carbon', 'pixel_id',
      

In [80]:
# Now add population_density to merge
print_start_time("Merging carbon with population_density...")
carbon_5m_merged = carbon_5m_merged.merge(population_density_5m.dataframe, on=['pixel_id'], suffixes=('_carbon', '_pop_density'))

# Sanity check, then only keep necessary columns 
print(carbon_5m_merged.head())
carbon_5m_merged = carbon_5m_merged[["index_carbon", "lat_carbon", 
                                     "long_carbon", "carbon", 
                                     "pixel_id", "mins_to_market",
                                    "market_influence", "population_density"]]
print(carbon_5m_merged.columns)

Merging carbon with population_density...
Start time: 2019-06-12 01:02:51.346472
   index_carbon  lat_carbon  long_carbon  carbon           pixel_id  \
0             0 -179.958333    79.958333       0  -179.9583,79.9583   
1             1 -179.875000    79.958333       0   -179.875,79.9583   
2             2 -179.791667    79.958333       0  -179.7917,79.9583   
3             3 -179.708333    79.958333       0  -179.7083,79.9583   
4             4 -179.625000    79.958333       0   -179.625,79.9583   

   mins_to_market  market_influence   index         lat       long  \
0     -2147483647     -3.402823e+38  518400 -179.958333  79.958333   
1     -2147483647     -3.402823e+38  518401 -179.875000  79.958333   
2     -2147483647     -3.402823e+38  518402 -179.791667  79.958333   
3     -2147483647     -3.402823e+38  518403 -179.708333  79.958333   
4     -2147483647     -3.402823e+38  518404 -179.625000  79.958333   

   population_density  
0       -3.402823e+38  
1       -3.402823e+38  

In [128]:
# Now add infant_mortality to merge
print_start_time("Merging carbon with infant_mortality...")
carbon_5m_merged = carbon_5m_merged.merge(infant_mortality_5m.dataframe, on=['pixel_id'], suffixes=('_carbon', '_infant_mort'))

# Sanity check, then only keep necessary columns 
print(carbon_5m_merged.head())
carbon_5m_merged = carbon_5m_merged[["index_carbon", "lat_carbon", 
                                     "long_carbon", "carbon", 
                                     "pixel_id", "mins_to_market",
                                    "market_influence", "population_density",
                                    "infant_mortality_rate"]]
print(carbon_5m_merged.columns)

   index_carbon  lat_carbon  long_carbon  carbon           pixel_id  \
0             0 -179.958333    79.958333       0  -179.9583,79.9583   
1             1 -179.875000    79.958333       0   -179.875,79.9583   
2             2 -179.791667    79.958333       0  -179.7917,79.9583   
3             3 -179.708333    79.958333       0  -179.7083,79.9583   
4             4 -179.625000    79.958333       0   -179.625,79.9583   

   mins_to_market  market_influence  population_density         lat  \
0     -2147483647     -3.402823e+38       -3.402823e+38 -179.958333   
1     -2147483647     -3.402823e+38       -3.402823e+38 -179.875000   
2     -2147483647     -3.402823e+38       -3.402823e+38 -179.791667   
3     -2147483647     -3.402823e+38       -3.402823e+38 -179.708333   
4     -2147483647     -3.402823e+38       -3.402823e+38 -179.625000   

        long  infant_mortality_rate  
0  79.958333                   -inf  
1  79.958333                   -inf  
2  79.958333                   -

In [137]:
# Now add nighttime_lights to merge
print_start_time("Merging carbon with nighttime_lights...")
carbon_5m_merged = carbon_5m_merged.merge(nighttime_lights_5m.dataframe, on=['pixel_id'], suffixes=('_carbon', '_nighttime_lights'))

# Sanity check, then only keep necessary columns 
print(carbon_5m_merged.head())
carbon_5m_merged = carbon_5m_merged[["index_carbon", "lat_carbon", 
                                     "long_carbon", "carbon", 
                                     "pixel_id", "mins_to_market",
                                    "market_influence", "population_density",
                                    "nighttime_lights", "infant_mortality_rate"]]
print(carbon_5m_merged.columns)

Merging carbon with nighttime_lights...
Start time: 2019-06-12 07:22:49.555733
   index_carbon  lat_carbon  long_carbon  carbon           pixel_id  \
0        254880 -179.958333    75.041667       0  -179.9583,75.0417   
1        254881 -179.875000    75.041667       0   -179.875,75.0417   
2        254882 -179.791667    75.041667       0  -179.7917,75.0417   
3        254883 -179.708333    75.041667       0  -179.7083,75.0417   
4        254884 -179.625000    75.041667       0   -179.625,75.0417   

   mins_to_market  market_influence  population_density  \
0     -2147483647     -3.402823e+38       -3.402823e+38   
1     -2147483647     -3.402823e+38       -3.402823e+38   
2     -2147483647     -3.402823e+38       -3.402823e+38   
3     -2147483647     -3.402823e+38       -3.402823e+38   
4     -2147483647     -3.402823e+38       -3.402823e+38   

   infant_mortality_rate         lat       long  nighttime_lights  
0                   -inf -179.958333  75.041667               0.0  
1  

In [161]:
# Now add pastures to merge
print_start_time("Merging carbon with pastures...")
carbon_5m_merged = carbon_5m_merged.merge(pastures_5m.dataframe, on=['pixel_id'], suffixes=('_carbon', '_pasture'))

# Sanity check, then only keep necessary columns 
print(carbon_5m_merged.head())
carbon_5m_merged = carbon_5m_merged[["index_carbon", "lat_carbon", 
                                     "long_carbon", "carbon", 
                                     "pixel_id", "mins_to_market",
                                    "market_influence", "population_density",
                                    "nighttime_lights", "infant_mortality_rate",
                                    "pasture_land"]]
print(carbon_5m_merged.columns)

Merging carbon with pastures...
Start time: 2019-06-12 13:35:53.736538
   index_carbon  lat_carbon  long_carbon  carbon           pixel_id  \
0        254880 -179.958333    75.041667       0  -179.9583,75.0417   
1        254881 -179.875000    75.041667       0   -179.875,75.0417   
2        254882 -179.791667    75.041667       0  -179.7917,75.0417   
3        254883 -179.708333    75.041667       0  -179.7083,75.0417   
4        254884 -179.625000    75.041667       0   -179.625,75.0417   

   mins_to_market  market_influence  population_density  nighttime_lights  \
0     -2147483647     -3.402823e+38       -3.402823e+38               0.0   
1     -2147483647     -3.402823e+38       -3.402823e+38               0.0   
2     -2147483647     -3.402823e+38       -3.402823e+38               0.0   
3     -2147483647     -3.402823e+38       -3.402823e+38               0.0   
4     -2147483647     -3.402823e+38       -3.402823e+38               0.0   

   infant_mortality_rate  tree_density 

In [169]:
# Now add soil_water_capacity to merge
print_start_time("Merging carbon with soil_water_capacity...")
carbon_5m_merged = carbon_5m_merged.merge(soil_water_5m.dataframe, on=['pixel_id'], suffixes=('_carbon', '_soil'))

# Sanity check, then only keep necessary columns 
print(carbon_5m_merged.head())
carbon_5m_merged = carbon_5m_merged[["index_carbon", "lat_carbon", 
                                     "long_carbon", "carbon", 
                                     "pixel_id", "mins_to_market",
                                    "market_influence", "population_density",
                                    "nighttime_lights", "infant_mortality_rate",
                                     "pasture_land", "soil_water_capacity"]]
print(carbon_5m_merged.columns)

Merging carbon with soil_water_capacity...
Start time: 2019-06-13 00:44:13.540333
   index_carbon  lat_carbon  long_carbon  carbon           pixel_id  \
0        254880 -179.958333    75.041667       0  -179.9583,75.0417   
1        254881 -179.875000    75.041667       0   -179.875,75.0417   
2        254882 -179.791667    75.041667       0  -179.7917,75.0417   
3        254883 -179.708333    75.041667       0  -179.7083,75.0417   
4        254884 -179.625000    75.041667       0   -179.625,75.0417   

   mins_to_market  market_influence  population_density  nighttime_lights  \
0     -2147483647     -3.402823e+38       -3.402823e+38               0.0   
1     -2147483647     -3.402823e+38       -3.402823e+38               0.0   
2     -2147483647     -3.402823e+38       -3.402823e+38               0.0   
3     -2147483647     -3.402823e+38       -3.402823e+38               0.0   
4     -2147483647     -3.402823e+38       -3.402823e+38               0.0   

   infant_mortality_rate  tr

In [170]:
# Save newly merged dataframes to csv
print_start_time("Saving to CSV...")

carbon_5m_merged.to_csv(os.path.join(OUTPUT_DIR, "carbon_5m_merged.csv"))
carbon_5m_merged.head()

Saving to CSV...
Start time: 2019-06-13 00:45:34.568641


Unnamed: 0,index_carbon,lat_carbon,long_carbon,carbon,pixel_id,mins_to_market,market_influence,population_density,nighttime_lights,infant_mortality_rate,pasture_land,soil_water_capacity
0,254880,-179.958333,75.041667,0,"-179.9583,75.0417",-2147483647,-3.4028230000000003e+38,-3.4028230000000003e+38,0.0,-inf,-3.4028230000000003e+38,0.0
1,254881,-179.875,75.041667,0,"-179.875,75.0417",-2147483647,-3.4028230000000003e+38,-3.4028230000000003e+38,0.0,-inf,-3.4028230000000003e+38,0.0
2,254882,-179.791667,75.041667,0,"-179.7917,75.0417",-2147483647,-3.4028230000000003e+38,-3.4028230000000003e+38,0.0,-inf,-3.4028230000000003e+38,0.0
3,254883,-179.708333,75.041667,0,"-179.7083,75.0417",-2147483647,-3.4028230000000003e+38,-3.4028230000000003e+38,0.0,-inf,-3.4028230000000003e+38,0.0
4,254884,-179.625,75.041667,0,"-179.625,75.0417",-2147483647,-3.4028230000000003e+38,-3.4028230000000003e+38,0.0,-inf,-3.4028230000000003e+38,0.0
