In [None]:
import os
import time
import numpy as np
import pandas as pd
import xarray as xr

In [None]:
def loadDatasets(yield_filePath, plant_filePath, maty_filePath, crop_name):

    plantData = xr.open_dataset(plant_filePath, decode_times=False)
    matyData = xr.open_dataset(maty_filePath, decode_times=False)
    yieldData = xr.open_dataset(yield_filePath, decode_times=False)

    units, referenceDate = plantData.time.attrs['units'].split('since')
    plantData['time'] = pd.date_range(start=referenceDate, periods=plantData.sizes['time'], freq='MS')

    units, referenceDate = matyData.time.attrs['units'].split('since')
    matyData['time'] = pd.date_range(start=referenceDate, periods=matyData.sizes['time'], freq='MS')

    units, referenceDate = yieldData.time.attrs['units'].split('since')
    yieldData['time'] = pd.date_range(start=referenceDate, periods=yieldData.sizes['time'], freq='MS')

    plantDF = plantData['plant-day_{}'.format(crop_name)].to_dataframe()
    matyDF = matyData['maty-day_{}'.format(crop_name)].to_dataframe()
    yieldDF = yieldData['yield_{}'.format(crop_name)].to_dataframe()
    
    yieldDF['plant-day'] = plantDF['plant-day_{}'.format(crop_name)]
    yieldDF['maturity-day'] = matyDF['maty-day_{}'.format(crop_name)]

    del plantDF, matyDF, plantData, matyData, yieldData, units, referenceDate
    return yieldDF

In [None]:
def surfaceFeatureExtractor(yieldDF, yield_filePath):
    splittedFilename = yield_filePath.split('/')[-1].split('_')
    raw_CO2, raw_W, raw_T, raw_N, raw_A = splittedFilename[9], splittedFilename[11], splittedFilename[10], splittedFilename[12], splittedFilename[13]

    CO2, N, A = int(raw_CO2[1:]), int(raw_N[1:]), int(raw_A[1])
    T, W = raw_T.split('-'), raw_W.split('-')

    # print("[INFO]. Raw CO2 : ", raw_CO2)
    # print("[INFO]. Raw  W  : ", raw_W)
    # print("[INFO]. Raw  T  : ", raw_T)
    # print("[INFO]. Raw  N  : ", raw_N)
    # print("[INFO]. Raw  A  : ", raw_A)

    if len(T) > 1:
        T = int(T[-1])
    else:
        T = int(T[0][1:])


    if len(W) > 1:
        W = int(W[-1])
    else:
        W = W[0].strip()
        if len(W) > 3:
            W = np.inf
        else:
            W = int(W[1:])


    print("\n[INFO]. CO2 : ", CO2)
    print("[INFO].  W  : ", W)
    print("[INFO].  T : ", T)
    print("[INFO].  N : ", N)
    print("[INFO].  A : ", A, '\n')


    yieldDF['CO2'] = CO2
    yieldDF['W'] = W
    yieldDF['T'] = T
    yieldDF['N'] = N
    yieldDF['A'] = A

    del raw_CO2, raw_A, raw_N, raw_T, raw_W, splittedFilename, CO2, W, T, N, A
    return yieldDF

In [None]:
def soilFeatureCombine(yieldDF, soilFile):
    
    soilData = xr.open_dataset(soilFile)
    soilDF = soilData.to_dataframe().reset_index()
    
    soilDF = soilDF.drop(columns=['mu_global', 'bulk_density', 'root_obstacles', 'impermeable_layer', 'ece', 'bs_soil', 'issoil'])
    soilDF = soilDF.dropna(how='any')
    
    yieldDF = pd.merge(yieldDF, soilDF, on=['lat', 'lon'], how='inner')
    return yieldDF

In [None]:
if __name__ == "__main__":
    plantDir, matyDir, yieldDir = "./ggcmi/phase2_outputs/dataset/plant-day/", "./ggcmi/phase2_outputs/dataset/maty-day/", "./ggcmi/phase2_outputs/dataset/yield/"
    soilFile = "./ggcmi/HWSD/HWSD_soil_data_on_cropland_v2.2.nc"

    count = 1
    total_files = os.listdir(yieldDir)

    for filename in os.listdir(yieldDir):
        yield_filePath = yieldDir + filename
        filenameList = yield_filePath.split('/')[-1].split('_')

        crop_name = filenameList[4]

        filenameList[3] = 'plant-day'
        plant_filePath = plantDir + '_'.join(filenameList)

        filenameList[3] = 'maty-day'
        maty_filePath = matyDir + '_'.join(filenameList)
        
        print("[INFO]. Plant Day File    : ", plant_filePath)
        print("[INFO]. Maturity Day File : ", maty_filePath)
        print("[INFO]. Yield Day File    : ", yield_filePath)

        if not os.path.exists(plant_filePath) or not os.path.exists(maty_filePath):
            print("[ERROR]. File does not exists.")
            print("[ERROR]. Passing through this iteration without change.")
            count += 1
            time.sleep(3)
            pass
        

        print("\n\n\n\n[INFO]. Starting Loading Datasets.")
        yieldDF = loadDatasets(yield_filePath, plant_filePath, maty_filePath, crop_name)
        print("[INFO]. Datasets Loaded Successfully.")

        
        print("[INFO]. Starting Surface Feature Extraction.")
        yieldDF = surfaceFeatureExtractor(yieldDF, yield_filePath)
        print("[INFO]. Surface Feature Extracted Successfully.")

        yieldDF = yieldDF.reset_index()
        yieldDF = yieldDF.dropna(how='any')        
        
        print("[INFO]. Starting Soil Feature Extraction.")
        yieldDF = soilFeatureCombine(yieldDF, soilFile)
        print("[INFO]. Soil Feature Extracted Successfully.")

        if count == 1:
            prevFile = yieldDF
        else:
            yieldDF = pd.concat([prevFile, yieldDF], ignore_index=True)
            prevFile = yieldDF
        
        yieldDF["yield_{}".format(crop_name)] = np.round(yieldDF["yield_{}".format(crop_name)])

        if count % 10  == 0 or count == total_files:
            os.system("rm -rvf ./staticInputs.csv")
            yieldDF.to_csv("./staticInputs.csv", index=False)
            print("[INFO]. CSV File saved successfully.")    

        print("[INFO]. {}/{} File Completed.".format(count, total_files))
        count += 1