In [14]:
%matplotlib inline

In [15]:
import xarray as xr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Convert parallel LandscapeDNDC Output to netCDF

Motebook converts a series of MPI parallel LandscapeDNDC Output Files to one netCDF:
E.g. 
rg_arable_zero_0_report-harvest.txt
rg_arable_zero_1_report-harvest.txt
...
rg_arable_zero_48_report-harvest.txt

to rg_arable_zero_report-harvest.nc. 

In [22]:
def csvToNetCDF(df, maskFileName, outputFileName):
    # The netCDF Outline including the ids
    ds = xr.open_dataset(maskFileName)
    # Create a Dataframe with the ids and the (lat,lon)
    dm = {}
    for ila, la in enumerate(ds.lat.values):
        for ilo, lo in enumerate(ds.lon.values):

            the_id = ds.ID[ila,ilo].values
            if np.isnan(the_id) == False:
                dm[int(the_id)] = (la,lo)
    #
    # Store the dm dictionary into data according to the id
    df["coords"] = df.id.map(dm)
    # add lat and lon from the coords touple
    df[['lat', 'lon']] = pd.DataFrame(df['coords'].tolist(), index=df.index) 
    # Correct the datatime in the data Dataframe
    del df["coords"]
    data2 = df.copy(deep=True)
    #data2['time'] = data2['year'].astype('datetime64[ns]')
    ## Set the Index to 'datetime','lat','lon'
    #data2 = data2.set_index(['datetime','lat','lon'])
    #data2['time'] = data2['year']
    # Set the Index to 'datetime','lat','lon'
    data2 = data2.set_index(['year','lat','lon'])
    # Create an empty Dataset and fill it with data2 Dataframe and use index as coordinates. 
    dsout = xr.Dataset()
    dsout = dsout.from_dataframe(data2)
    print(dsout)
    # # Group the content of dsout by year-sums 
    # dsout2 = dsout.groupby('datetime.year').sum(dim='datetime')
    # Save the new Dataset to a netCDF file.
    dsout.to_netcdf(outputFileName)
    print("Done.")
#

In [23]:
fileList = ["rg_arable_metrx_two__Harvest.csv", "rg_arable_metrx_two__Fertilize.csv", 
            "rg_arable_metrx_two__Manure.csv", "rg_arable_metrx_two__PH-yearly.csv",
            "rg_arable_metrx_two__SC-yearly.csv", "rg_arable_metrx_zero__Fertilize.csv",
            "rg_arable_metrx_zero__Harvest.csv", "rg_arable_metrx_zero__Manure.csv",
            "rg_arable_metrx_zero__PH-yearly.csv", "rg_arable_metrx_zero__SC-yearly.csv"]
outputFileNameDict = {}
for file in fileList:
    name = file[:-3]
    outputFileNameDict[file] = name + "nc"
print(outputFileNameDict)

{'rg_arable_metrx_two__Harvest.csv': 'rg_arable_metrx_two__Harvest.nc', 'rg_arable_metrx_two__Fertilize.csv': 'rg_arable_metrx_two__Fertilize.nc', 'rg_arable_metrx_two__Manure.csv': 'rg_arable_metrx_two__Manure.nc', 'rg_arable_metrx_two__PH-yearly.csv': 'rg_arable_metrx_two__PH-yearly.nc', 'rg_arable_metrx_two__SC-yearly.csv': 'rg_arable_metrx_two__SC-yearly.nc', 'rg_arable_metrx_zero__Fertilize.csv': 'rg_arable_metrx_zero__Fertilize.nc', 'rg_arable_metrx_zero__Harvest.csv': 'rg_arable_metrx_zero__Harvest.nc', 'rg_arable_metrx_zero__Manure.csv': 'rg_arable_metrx_zero__Manure.nc', 'rg_arable_metrx_zero__PH-yearly.csv': 'rg_arable_metrx_zero__PH-yearly.nc', 'rg_arable_metrx_zero__SC-yearly.csv': 'rg_arable_metrx_zero__SC-yearly.nc'}


In [24]:
for file in fileList:
    print(file)
    data = pd.read_csv(file, sep=',')
    csvToNetCDF(data, "rg_mask.nc", outputFileNameDict[file])
    

rg_arable_metrx_two__Harvest.csv
<xarray.Dataset>
Dimensions:                   (lat: 128, lon: 168, year: 6)
Coordinates:
  * year                      (year) int64 1990 1991 1992 1993 1994 1995
  * lat                       (lat) float64 36.12 36.38 36.62 ... 67.62 68.38
  * lon                       (lon) float64 -10.38 -10.12 -9.875 ... 31.12 31.38
Data variables:
    source                    (year, lat, lon) object nan nan nan ... nan nan
    id                        (year, lat, lon) float64 nan nan nan ... nan nan
    dC_bud_export[kgCha-1]    (year, lat, lon) float64 nan nan nan ... nan nan
    dC_straw_export[kgCha-1]  (year, lat, lon) float64 nan nan nan ... nan nan
    dN_bud_export[kgNha-1]    (year, lat, lon) float64 nan nan nan ... nan nan
    dN_straw_export[kgNha-1]  (year, lat, lon) float64 nan nan nan ... nan nan
Done.
rg_arable_metrx_two__Fertilize.csv
<xarray.Dataset>
Dimensions:                 (lat: 128, lon: 168, year: 6)
Coordinates:
  * year                   

In [27]:
!ls -lrt rg*metrx*nc  

-rw-r--r--  1 haas  901   9704514 30 Jun 19:51 rg_arable_metrx_two__Harvest.nc
-rw-r--r--  1 haas  901   6581950 30 Jun 19:51 rg_arable_metrx_two__Fertilize.nc
-rw-r--r--  1 haas  901   5516990 30 Jun 19:52 rg_arable_metrx_two__Manure.nc
-rw-r--r--  1 haas  901   6631102 30 Jun 19:52 rg_arable_metrx_two__PH-yearly.nc
-rw-r--r--  1 haas  901  14263640 30 Jun 19:52 rg_arable_metrx_two__SC-yearly.nc
-rw-r--r--  1 haas  901   6581950 30 Jun 19:52 rg_arable_metrx_zero__Fertilize.nc
-rw-r--r--  1 haas  901  14255100 30 Jun 19:53 rg_arable_metrx_zero__Harvest.nc
-rw-r--r--  1 haas  901   5516990 30 Jun 19:53 rg_arable_metrx_zero__Manure.nc
-rw-r--r--  1 haas  901   6631102 30 Jun 19:53 rg_arable_metrx_zero__PH-yearly.nc
-rw-r--r--  1 haas  901  14263640 30 Jun 19:54 rg_arable_metrx_zero__SC-yearly.nc
