In [53]:
# This script assumes parse_crop_mask.ipynb has already been run.
# This script parses and normalizes the lpjml pft_harvest.pft.bin data and outputs it as a tsv

In [54]:
import numpy as np
import pickle

In [55]:
# read all short values (lat,lon values)
f = open("data/grid.bin", "rb")

In [56]:
shorts = np.fromfile(f, dtype=np.short)

In [57]:
len(shorts)

134840

In [58]:
# divide to get actual lat/lons
shorts = shorts / 100.0

In [59]:
shorts.shape

(134840,)

In [60]:
ll = shorts.reshape((int(len(shorts)/2),2))

In [61]:
ll.shape

(67420, 2)

In [62]:
# This file isn't in the repo - move it to the data directory for this to work.
ff = open("data/pft_harvest.pft.bin", "rb")

In [63]:
datas = np.fromfile(ff, dtype=np.float32)

In [64]:
len(datas)

23731840

In [65]:
NUM_BANDS = 32
NUM_YEARS = 11
NUM_BANDS * NUM_YEARS * ll.shape[0]

23731840

In [66]:
cell_vals = datas.reshape((NUM_YEARS, NUM_BANDS, ll.shape[0]))

In [67]:
# 11 years with 32 bands for each of the 67,420 cells
cell_vals.shape

(11, 32, 67420)

In [68]:
# 67,420 cells with lon,lat values
ll.shape

(67420, 2)

In [69]:
# Create our lookup tables - see https://github.com/PIK-LPJmL/LPJmL/wiki for more info on where this came from

mgmt_types = ['rainfed','irrigated']

crop_types = [
    'Temperate cereals (wheat, rye, barley; wheat)',
    'Rice (paddy rice; rice)',
    'Maize (maize for food; maize)',
    'Tropical cereals (millet, sorghum; millet)',
    'Pulses (pulses; field peas)',
    'Temperate roots (sugar beet; sugar beet)',
    'Tropical roots (cassava; cassava)',
    'Sunflower (sunflower; sunflower)',
    'Soybean (soybean; soybean)',
    'Groundnuts (groundnuts; groundnuts)',
    'Rapeseed (rapeseed; rapeseed)',
    'Sugarcane (sugarcane: sugarcane)',
    'others (potatoes, oil palm, citrus, date palm, grapes/vine, cotton, cocoa, coffee, other perennial crops, other annual crops; managed grassland)',
    'managed grasslands (pastures; managed grasslands)',
    'bio-energy grass',
    'bio-energy tree',
]

years = np.arange(2007,2007+cell_vals.shape[2])

options = []
for m in mgmt_types:
    for c in crop_types:
        options.append(c + " -- " + m)

In [70]:
# This is the meat of the processing.
mydata = []

# Read the mask data (67420, 32) -> 67,420 cells with 32 columns(one for each crop-hydro combo.
# This is produced from the parse_crop_mask.ipynb script so make sure to run first!
mask_data = pickle.load(open("data/landuse_patterns/crop_mask.npy", "rb"))

# Read the hectares float array of len 67420.  hectares per cell
# This is produced from the parse_crop_mask.ipynb script so make sure to run first!
hectares = pickle.load(open("data/landuse_patterns/hectares.npy", "rb"))

# For each year, grab the year index and the data for the year (x)
for year_idx,x in enumerate(cell_vals):
    # Grab the actual year name using the lookup table from above.
    year = years[year_idx]
    
    # For each band, grab the band (crop -- hydro string pair) index and the data for the band (y)
    for band_idx,y in enumerate(x):
        # Grab the actual band (crop -- hydro string pair) using the lookup table from above
        band = options[band_idx]
        
        # For each cell, grab the cell index and the data value for the cell (z)
        for cell_idx,z in enumerate(y):
            # Get the lon, lat from the ll index from above.
            lon,lat = ll[cell_idx]
            # Get the mask values for the cell index
            mask_vals = mask_data[cell_idx]
            # Get the specific mask value for the band (crop -- hydro string pair) index
            mask_val = mask_vals[band_idx]
            # Get the specific hectare value for the cell index
            hectare = hectares[cell_idx]
            # Split the crop and hydro out from the string agg above.
            crop, hydro = band.split("--")
            crop = crop.strip()
            hydro = hydro.strip()
            
            # Add tuple to mydata list
            # longitude\tlatitude\tcrop\thydro\tyear\tyield\tmask\thectare\thaih\tnorm_yield
            mydata.append((lon,lat,crop,hydro,year,z,mask_val,hectare,mask_val*hectare,mask_val*hectare*z))

In [71]:
mydata[0]

(-179.75,
 -16.25,
 'Temperate cereals (wheat, rye, barley; wheat)',
 'rainfed',
 2007,
 0.0,
 0.0,
 296758.96875,
 0.0,
 0.0)

In [None]:
# Write out data as .tsv
with open('data/lpjml_out.txt',"w") as out:
    out.write("longitude\tlatitude\tcrop\thydro\tyear\tyield\tmask\thectare\thaih\tproduction\n")
    for d in mydata:
        out.write("\t".join(map(lambda x:str(x),d)) + "\n")