Summarize model "bias" for all scenarios
===

Import custom modules for working with model and AWS data
----

In [1]:
from netCDF4 import Dataset
import netCDF4
import pandas as pd
import numpy as np
import os
# import seaborn as sns
# import matplotlib.pyplot as plt
# import matplotlib.ticker as ticker
# from matplotlib.lines import Line2D
# from matplotlib.text import Text
from datetime import datetime

%load_ext autoreload
%autoreload 2
from model import Model
from GCNet import GCNet
# from plotUtils import PlotUtils

pd.options.mode.chained_assignment = None

Global vars
---

In [2]:
qcCutoff = 50
Syr = 1996
Eyr = 2005
yrRange = (Syr,Eyr)
yrRangeAWS = (Syr,Eyr)
subStr = "Jul"
monSub = [7]

biasColumns = ['ERAI',     'WRF_ERAI', \
               'LE_Hist',  'WRF_LE_Hist', \
               'LE_RCP85', 'WRF_LE_RCP85', \
               'LW',       'WRF_LW', \
               'Future RCP85', 'Future LW', 
               'Future WRF RCP85', 'Future WRF LW']

Function: subset a variable by years, months, QC
---

In [3]:
def subsetData( X, yrRange = None, monRange = None, qcFunc = None, qcLevel = None, qcIX = None):
    """
        yrRange = (year1, year2), i.e., a range of years
        monRange = (m1, m2, m3...), i.e., a list of explicit month numbers
        qcFunc = function handle for qcStats
        qcLevel = x, i.e., a percent cutoff
        qcIX = existing index for QC slicing
    """

    # first, subset by year range
    if yrRange is not None:
        yrStart = yrRange[0]
        yrEnd = yrRange[1]
        yearSub = range( yrStart, yrEnd+1 )
        Xnew = X.loc[ X['year'].isin(yearSub) ]
        X = Xnew

    # second, subset by month range
    if monRange is not None:
        Xnew = X.loc[ X['month'].isin(monRange) ]
        X = Xnew

    # third, subset by data quality
    if qcLevel is not None:
        if qcIX is None:  # create a new QC index
            qcPct = qcFunc( X.iloc[:,:-2] )
            qcIX = qcPct > qcLevel

        Xtemp = X.iloc[:,:-2]  # temporarily drop time, year and month columns
        Xbest = Xtemp.iloc[:,qcIX]
        Xbest['month'] = X['month']  # restore month column
        X = Xbest

    # drop sites with no valid data
    Xvalid = X.dropna(axis='columns',how='all')
    
    # return data subset and the QC index used
    return ( Xvalid, qcIX )

Load AWS metadata
----

In [4]:
home = os.path.expanduser("~")
metaFN = "erai/site_info.nc"
gcnetDir = home+"/Documents/GCNet/current"
dataFN = gcnetDir+"/gcnet.allStationData_d_365.nc"
A = GCNet( metaFN, dataFN )
A.loadMeta()

Read AWS time and data, add columns for "year" and "month"
-----

In [5]:
time = A.loadData( "time", True )
Taws = A.loadData( "AirT1", True )
awsCols = Taws.columns.values
# print awsCols

# add column with just the year (for subsetting later)
Taws['year'] = Taws["Time"].apply( lambda Taws : 
                                    datetime(year=Taws.year, month=Taws.month, day=Taws.day).year)
# add column with just the month (for subsetting later)
Taws['month'] = Taws["Time"].apply( lambda Taws : 
                                    datetime(year=Taws.year, month=Taws.month, day=Taws.day).month)

# drop column 1 (Time)
Taws = Taws.iloc[:,1:]

hours since 1900-01-01 00:00:00
noleap
RHS: Resetting value 18.121666
LHS: Resetting value 14.764

  Y = np.array( X )
  Xqc = np.where( X > 20., np.nan, X )
  Yqc = np.where( Yqc > 1.e36, np.nan, Yqc )





Prepare ERA Interim
----

In [6]:
suff = "19960101-20171231"

# closest point data
modelFN = "erai/erai_tas_closest_"+suff+".nc"
Mcl = Model("erai/erai_geog_sub.nc", modelFN)
Mcl.loadMeta()

# #  interpolated data
# modelFN = "erai/erai_tas_interp_"+suff+".nc"
# Mint = Model("erai/erai_geog_sub.nc", modelFN)
# Mint.loadMeta()

# geography
minLon = -70 + 360
maxLon = -15 + 360
minLat = 58
maxLat = 86.5
Mcl.setDomain( (minLat, maxLat, minLon, maxLon) )

# variable
varName = "tas"

srcFile = "Closest"
timeModel = Mcl.loadData( "time", True )
Tmodel1 = Mcl.loadData( varName, True )
Tmodel1.columns = awsCols

# add column with just the year (for subsetting later)
Tmodel1['year'] = Tmodel1["Time"].apply( lambda Tmodel1 : 
                                    datetime(year=Tmodel1.year, month=Tmodel1.month, day=Tmodel1.day).year)
# add column with just the month (for subsetting later)
Tmodel1['month'] = Tmodel1["Time"].apply( lambda Tmodel1 : 
                                    datetime(year=Tmodel1.year, month=Tmodel1.month, day=Tmodel1.day).month)

# drop column 1 (Time)
Tmodel1 = Tmodel1.iloc[:,1:]

# create comparison datasets (dataframes)
TawsValid, qcIX = subsetData( Taws, yrRangeAWS, monSub, A.qcStats, qcCutoff )

TmodelValid, _ = subsetData( Tmodel1, yrRange, monSub, A.qcStats, qcCutoff, qcIX)
print TawsValid.columns
print TmodelValid.columns

# drop the month column, no longer needed
TawsValid   = TawsValid.iloc[:,:-1]
TmodelValid = TmodelValid.iloc[:,:-1]

1d dataset
Index([u'Swiss Camp', u'Crawford Point1', u'Humboldt', u'Summit', u'Tunu-N',
       u'DYE-2', u'JAR1', u'Saddle', u'NASA-E', u'NASA-SE', u'JAR2', u'month'],
      dtype='object')
Index([u'Swiss Camp', u'Crawford Point1', u'Humboldt', u'Summit', u'Tunu-N',
       u'DYE-2', u'JAR1', u'Saddle', u'NASA-E', u'NASA-SE', u'JAR2', u'month'],
      dtype='object')


Set up dataframe for results
---

In [7]:
awsNames = TawsValid.columns
dfBias = pd.DataFrame(columns=biasColumns, index=awsNames)

Add bias for ERAI
---

In [8]:
bias = []
for aws in awsNames:
    mn1 = np.mean(TawsValid[aws])
    mn2 = np.mean(TmodelValid[aws])
    bias.append( mn2 - mn1 )
dfBias[ 'ERAI' ] = bias    

Prepare WRF(ERA Interim)
----

In [9]:
suff = "1986-2015"

# closest point data
modelFN = "gis_erai/wrf_erai_tas_closest_"+suff+".nc"
Mcl = Model("gis_erai/wrf_geog.nc", modelFN)
Mcl.loadMeta()

# geography
minLon = -70 + 360
maxLon = -15 + 360
minLat = 58
maxLat = 86.5
Mcl.setDomain( (minLat, maxLat, minLon, maxLon) )

# variable
varName = "tas"

srcFile = "Closest"
timeModel = Mcl.loadData( "time", True )
Tmodel1 = Mcl.loadData( varName, True )
Tmodel1.columns = awsCols

# add column with just the year (for subsetting later)
Tmodel1['year'] = Tmodel1["Time"].apply( lambda Tmodel1 : 
                                    datetime(year=Tmodel1.year, month=Tmodel1.month, day=Tmodel1.day).year)
# add column with just the month (for subsetting later)
Tmodel1['month'] = Tmodel1["Time"].apply( lambda Tmodel1 : 
                                    datetime(year=Tmodel1.year, month=Tmodel1.month, day=Tmodel1.day).month)

# drop column 1 (Time)
Tmodel1 = Tmodel1.iloc[:,1:]

# create comparison datasets (dataframes)
TawsValid, qcIX = subsetData( Taws, yrRangeAWS, monSub, A.qcStats, qcCutoff )
TmodelValid, _ = subsetData( Tmodel1, yrRange, monSub, A.qcStats, qcCutoff, qcIX)

# drop the month column, no longer needed
TawsValid   = TawsValid.iloc[:,:-1]
TmodelValid = TmodelValid.iloc[:,:-1]

2d dataset


Add bias for WRF(ERAI)
---

In [10]:
bias = []
for aws in awsNames:
    mn1 = np.mean(TawsValid[aws])
    mn2 = np.mean(TmodelValid[aws])
    bias.append( mn2 - mn1 )
dfBias[ 'WRF_ERAI' ] = bias    

Prepare CESM LE Historical
----

In [11]:
ens = -1  # ensemble average (and because it's the mean, results will be the same for "all" data!)

suff = str(Syr)+"0101-"+str(Eyr)+"1231"

# closest point data
modelFN = "cesmle/cesmle_tas_closest_"+suff+".nc"
Mcl = Model("cesmle/cesmle_geog_sub.nc", modelFN)
Mcl.loadMeta()

# geography
minLon = -70 + 360
maxLon = -15 + 360
minLat = 58
maxLat = 86.5
Mcl.setDomain( (minLat, maxLat, minLon, maxLon) )

# variable
varName = "tas"

srcFile = "Closest"
timeModel = Mcl.loadData( "time", True )
Tmodel1 = Mcl.loadData( varName, True )
Tmodel1.columns = awsCols

# add column with just the year (for subsetting later)
Tmodel1['year'] = Tmodel1["Time"].apply( lambda Tmodel1 : 
                                    datetime(year=Tmodel1.year, month=Tmodel1.month, day=Tmodel1.day).year)
# add column with just the month (for subsetting later)
Tmodel1['month'] = Tmodel1["Time"].apply( lambda Tmodel1 : 
                                    datetime(year=Tmodel1.year, month=Tmodel1.month, day=Tmodel1.day).month)

# drop column 1 (Time)
Tmodel1 = Tmodel1.iloc[:,1:]

# create comparison datasets (dataframes)
TawsValid, qcIX = subsetData( Taws, yrRangeAWS, monSub, A.qcStats, qcCutoff )
TmodelValid, _ = subsetData( Tmodel1, yrRange, monSub, A.qcStats, qcCutoff, qcIX)

# drop the month column, no longer needed
TawsValid   = TawsValid.iloc[:,:-1]
TmodelValid = TmodelValid.iloc[:,:-1]

1d dataset


Add bias for CESM LE Historical
---

In [12]:
bias = []
for aws in awsNames:
    mn1 = np.mean(TawsValid[aws])
    mn2 = np.mean(TmodelValid[aws])
    bias.append( mn2 - mn1 )
dfBias[ 'LE_Hist' ] = bias    

Prepare WRF(CESM LE Hist)
----

In [13]:
suff = str(Syr)+"-"+str(Eyr)

# closest point data
modelFN = "gis_cesmle/wrf_cesmle_tas_closest_"+suff+".nc"
Mcl = Model("gis_cesmle/wrf_geog.nc", modelFN)
Mcl.loadMeta()

# geography
minLon = -70 + 360
maxLon = -15 + 360
minLat = 58
maxLat = 86.5
Mcl.setDomain( (minLat, maxLat, minLon, maxLon) )

# variable
varName = "tas"

srcFile = "Closest"
timeModel = Mcl.loadData( "time", True )
Tmodel1 = Mcl.loadData( varName, True )
Tmodel1.columns = awsCols

# add column with just the year (for subsetting later)
Tmodel1['year'] = Tmodel1["Time"].apply( lambda Tmodel1 : 
                                    datetime(year=Tmodel1.year, month=Tmodel1.month, day=Tmodel1.day).year)
# add column with just the month (for subsetting later)
Tmodel1['month'] = Tmodel1["Time"].apply( lambda Tmodel1 : 
                                    datetime(year=Tmodel1.year, month=Tmodel1.month, day=Tmodel1.day).month)

# drop column 1 (Time)
Tmodel1 = Tmodel1.iloc[:,1:]

# create comparison datasets (dataframes)
TawsValid, qcIX = subsetData( Taws, yrRangeAWS, monSub, A.qcStats, qcCutoff )
TmodelValid, _ = subsetData( Tmodel1, yrRange, monSub, A.qcStats, qcCutoff, qcIX)

# drop the month column, no longer needed
TawsValid   = TawsValid.iloc[:,:-1]
TmodelValid = TmodelValid.iloc[:,:-1]

2d dataset


Add bias for WRF(CESM LE Hist)
---

In [14]:
bias = []
for aws in awsNames:
    mn1 = np.mean(TawsValid[aws])
    mn2 = np.mean(TmodelValid[aws])
    bias.append( mn2 - mn1 )
dfBias[ 'WRF_LE_Hist' ] = bias    

Prepare CESM LE RCP85
----

In [15]:
Syr = 2071
Eyr = 2080
yrRange = (Syr,Eyr)
suff = str(Syr)+"0101-"+str(Eyr)+"1231"

# closest point data
modelFN = "cesmle/cesmle_tas_closest_"+suff+".nc"
Mcl = Model("cesmle/cesmle_geog_sub.nc", modelFN)
Mcl.loadMeta()

# geography
minLon = -70 + 360
maxLon = -15 + 360
minLat = 58
maxLat = 86.5
Mcl.setDomain( (minLat, maxLat, minLon, maxLon) )

# variable
varName = "tas"

srcFile = "Closest"
timeModel = Mcl.loadData( "time", True )
Tmodel1 = Mcl.loadData( varName, True )
Tmodel1.columns = awsCols

# add column with just the year (for subsetting later)
Tmodel1['year'] = Tmodel1["Time"].apply( lambda Tmodel1 : 
                                    datetime(year=Tmodel1.year, month=Tmodel1.month, day=Tmodel1.day).year)
# add column with just the month (for subsetting later)
Tmodel1['month'] = Tmodel1["Time"].apply( lambda Tmodel1 : 
                                    datetime(year=Tmodel1.year, month=Tmodel1.month, day=Tmodel1.day).month)

# drop column 1 (Time)
Tmodel1 = Tmodel1.iloc[:,1:]

# create comparison datasets (dataframes)
TawsValid, qcIX = subsetData( Taws, yrRangeAWS, monSub, A.qcStats, qcCutoff )
TmodelValid, _ = subsetData( Tmodel1, yrRange, monSub, A.qcStats, qcCutoff, qcIX)

# drop the month column, no longer needed
TawsValid   = TawsValid.iloc[:,:-1]
TmodelValid = TmodelValid.iloc[:,:-1]

1d dataset


Add bias for CESM LE RCP85
---

In [16]:
bias = []
for aws in awsNames:
    mn1 = np.mean(TawsValid[aws])
    mn2 = np.mean(TmodelValid[aws])
    bias.append( mn2 - mn1 )
dfBias[ 'LE_RCP85' ] = bias    

Prepare WRF(CESM LE RCP85)
----

In [17]:
suff = str(Syr)+"-"+str(Eyr)

# closest point data
modelFN = "gis_cesmle/wrf_cesmle_tas_closest_"+suff+".nc"
Mcl = Model("gis_cesmle/wrf_geog.nc", modelFN)
Mcl.loadMeta()

# geography
minLon = -70 + 360
maxLon = -15 + 360
minLat = 58
maxLat = 86.5
Mcl.setDomain( (minLat, maxLat, minLon, maxLon) )

# variable
varName = "tas"

srcFile = "Closest"
timeModel = Mcl.loadData( "time", True )
Tmodel1 = Mcl.loadData( varName, True )
Tmodel1.columns = awsCols

# add column with just the year (for subsetting later)
Tmodel1['year'] = Tmodel1["Time"].apply( lambda Tmodel1 : 
                                    datetime(year=Tmodel1.year, month=Tmodel1.month, day=Tmodel1.day).year)
# add column with just the month (for subsetting later)
Tmodel1['month'] = Tmodel1["Time"].apply( lambda Tmodel1 : 
                                    datetime(year=Tmodel1.year, month=Tmodel1.month, day=Tmodel1.day).month)

# drop column 1 (Time)
Tmodel1 = Tmodel1.iloc[:,1:]

# create comparison datasets (dataframes)
TawsValid, qcIX = subsetData( Taws, yrRangeAWS, monSub, A.qcStats, qcCutoff )
TmodelValid, _ = subsetData( Tmodel1, yrRange, monSub, A.qcStats, qcCutoff, qcIX)

# drop the month column, no longer needed
TawsValid   = TawsValid.iloc[:,:-1]
TmodelValid = TmodelValid.iloc[:,:-1]

2d dataset


Add bias for WRF(CESM LE RCP85)
---

In [18]:
bias = []
for aws in awsNames:
    mn1 = np.mean(TawsValid[aws])
    mn2 = np.mean(TmodelValid[aws])
    bias.append( mn2 - mn1 )
dfBias[ 'WRF_LE_RCP85' ] = bias    

Prepare CESM LW
----

In [19]:
suff = str(Syr)+"0101-"+str(Eyr)+"1231"

# closest point data
modelFN = "cesmlw/cesmlw_tas_closest_"+suff+".nc"
Mcl = Model("cesmlw/cesmlw_geog_sub.nc", modelFN)
Mcl.loadMeta()

# geography
minLon = -70 + 360
maxLon = -15 + 360
minLat = 58
maxLat = 86.5
Mcl.setDomain( (minLat, maxLat, minLon, maxLon) )

# variable
varName = "tas"

srcFile = "Closest"
timeModel = Mcl.loadData( "time", True )
Tmodel1 = Mcl.loadData( varName, True )
Tmodel1.columns = awsCols

# add column with just the year (for subsetting later)
Tmodel1['year'] = Tmodel1["Time"].apply( lambda Tmodel1 : 
                                    datetime(year=Tmodel1.year, month=Tmodel1.month, day=Tmodel1.day).year)
# add column with just the month (for subsetting later)
Tmodel1['month'] = Tmodel1["Time"].apply( lambda Tmodel1 : 
                                    datetime(year=Tmodel1.year, month=Tmodel1.month, day=Tmodel1.day).month)

# drop column 1 (Time)
Tmodel1 = Tmodel1.iloc[:,1:]

# create comparison datasets (dataframes)
TawsValid, qcIX = subsetData( Taws, yrRangeAWS, monSub, A.qcStats, qcCutoff )
TmodelValid, _ = subsetData( Tmodel1, yrRange, monSub, A.qcStats, qcCutoff, qcIX)

# drop the month column, no longer needed
TawsValid   = TawsValid.iloc[:,:-1]
TmodelValid = TmodelValid.iloc[:,:-1]

1d dataset


Add bias for CESM LW
---

In [20]:
bias = []
for aws in awsNames:
    mn1 = np.mean(TawsValid[aws])
    mn2 = np.mean(TmodelValid[aws])
    bias.append( mn2 - mn1 )
dfBias[ 'LW' ] = bias    

Prepare WRF(CESM LW)
----

In [21]:
suff = str(Syr)+"-"+str(Eyr)

# closest point data
modelFN = "gis_cesmlw/wrf_cesmlw_tas_closest_"+suff+".nc"
Mcl = Model("gis_cesmlw/wrf_geog.nc", modelFN)
Mcl.loadMeta()

# geography
minLon = -70 + 360
maxLon = -15 + 360
minLat = 58
maxLat = 86.5
Mcl.setDomain( (minLat, maxLat, minLon, maxLon) )

# variable
varName = "tas"

srcFile = "Closest"
timeModel = Mcl.loadData( "time", True )
Tmodel1 = Mcl.loadData( varName, True )
Tmodel1.columns = awsCols

# add column with just the year (for subsetting later)
Tmodel1['year'] = Tmodel1["Time"].apply( lambda Tmodel1 : 
                                    datetime(year=Tmodel1.year, month=Tmodel1.month, day=Tmodel1.day).year)
# add column with just the month (for subsetting later)
Tmodel1['month'] = Tmodel1["Time"].apply( lambda Tmodel1 : 
                                    datetime(year=Tmodel1.year, month=Tmodel1.month, day=Tmodel1.day).month)

# drop column 1 (Time)
Tmodel1 = Tmodel1.iloc[:,1:]

# create comparison datasets (dataframes)
TawsValid, qcIX = subsetData( Taws, yrRangeAWS, monSub, A.qcStats, qcCutoff )
TmodelValid, _ = subsetData( Tmodel1, yrRange, monSub, A.qcStats, qcCutoff, qcIX)

# drop the month column, no longer needed
TawsValid   = TawsValid.iloc[:,:-1]
TmodelValid = TmodelValid.iloc[:,:-1]

2d dataset


Add bias for WRF(CESM LE RCP85)
---

In [22]:
bias = []
for aws in awsNames:
    mn1 = np.mean(TawsValid[aws])
    mn2 = np.mean(TmodelValid[aws])
    bias.append( mn2 - mn1 )
dfBias[ 'WRF_LW' ] = bias    

In [30]:
dfBias['Future RCP85'] = dfBias['LE_RCP85'] - dfBias['LE_Hist']
dfBias['Future LW'] = dfBias['LW'] - dfBias['LE_Hist']
dfBias['Future WRF RCP85'] = dfBias['WRF_LE_RCP85'] - dfBias['WRF_LE_Hist']
dfBias['Future WRF LW'] = dfBias['WRF_LW'] - dfBias['WRF_LE_Hist']

# calculate average across stations for each statistic
dfBiasAvg = dfBias.mean()
print dfBiasAvg
print len(dfBiasAvg)

ERAI                2.01
WRF_ERAI           -0.57
LE_Hist            -0.47
WRF_LE_Hist        -0.24
LE_RCP85            4.67
WRF_LE_RCP85        3.03
LW                  1.42
WRF_LW              0.38
Future RCP85        5.15
Future LW           1.89
Future WRF RCP85    3.27
Future WRF LW       0.62
dtype: float64
12


In [24]:
pd.options.display.float_format = '{:,.2f}'.format

dfBias

Unnamed: 0,ERAI,WRF_ERAI,LE_Hist,WRF_LE_Hist,LE_RCP85,WRF_LE_RCP85,LW,WRF_LW,Future RCP85,Future LW,Future WRF RCP85,Future WRF LW
Swiss Camp,2.24,-1.19,-0.94,-0.5,5.13,1.53,1.07,-0.16,6.07,2.01,2.03,0.34
Crawford Point1,2.39,-0.21,0.78,0.36,4.77,3.45,2.0,0.8,3.99,1.22,3.08,0.44
Humboldt,1.2,0.91,-1.57,1.02,3.49,4.18,0.52,1.53,5.06,2.08,3.16,0.51
Summit,1.42,-1.19,-2.35,-1.12,3.24,4.31,-0.56,-0.22,5.6,1.79,5.44,0.9
Tunu-N,2.41,0.98,-2.89,0.83,2.65,4.21,-0.78,1.72,5.54,2.11,3.38,0.89
DYE-2,0.72,-1.37,-1.91,-1.01,2.84,2.35,-0.41,-0.34,4.75,1.5,3.36,0.67
JAR1,3.55,-0.86,4.2,0.03,9.38,1.76,6.82,0.31,5.19,2.62,1.73,0.28
Saddle,0.93,-1.53,-1.57,-1.38,3.15,2.52,-0.1,-0.69,4.72,1.47,3.89,0.69
NASA-E,2.88,1.06,-0.11,0.78,5.67,5.08,1.72,1.86,5.78,1.84,4.31,1.08
NASA-SE,1.94,-1.3,-1.08,-1.34,3.67,2.79,0.48,-0.62,4.76,1.56,4.13,0.72


Create a netCDF file for the bias data
-----

In [39]:
x = max( [ len(d) for d in stnNames])
x

15

In [50]:
newNCFN = "model_bias_summary.nc"

try: ncfile.close()  # just to be safe, make sure dataset is not already open.
except: pass

ncfile = Dataset(newNCFN,mode='w',format='NETCDF4') 
ncfile.title = "Summary of bias by model"

stnNames = dfBias.index
nSites = len(stnNames)
stn_dim = ncfile.createDimension('station', nSites)

str_len1 = max( [ len(d) for d in stnNames])  # longest string in stnNames
str_dim1 = ncfile.createDimension('str_dim1', str_len1)

stats_dim = ncfile.createDimension('stats_dim', len(dfBiasAvg))

varDesc = [ 'ERAI bias', 'WRF ERAI bias', 'CESM LE Hist bias', 'WRF CESM LE Hist bias', \
            'CESM LE RCP85 bias', 'WRF CESM LE RCP85 bias', 'CESM LW 1pt5degC bias', 'WRF CESM LW 1pt5degC bias', \
            'CESM LE RCP85 future change', 'CESM LW 1pt5degC future change', \
            'WRF CESM LE RCP85 future change', 'WRF CESM LW 1pt5degC future change' ]
str_len2 = max( [ len(d) for d in varDesc])  # longest string in varDesc
str_dim2 = ncfile.createDimension('str_dim2', str_len2)

# station names
ncVar = ncfile.createVariable('stationName', 'c', ('station','str_dim1'))
i = 0
stns = np.empty( shape=(nSites,str_len1), dtype='c')
for stn in stnNames:
    x = netCDF4.stringtochar(np.array(stn))
    stns[i,:] = str_len1*' '
    stns[i,:len(x)] = netCDF4.stringtochar(np.array(stn))
    i = i + 1
ncVar[:,:] = stns

# each column of the dataframe
varNames = dfBias.columns
for v, vDesc in zip( varNames, varDesc ):
    v2 = v.replace(' ','_')
    ncVar = ncfile.createVariable(v2, np.float32, ('station'))
    ncVar.units = "deg C"
    ncVar.description = vDesc
    ncVar.long_name = vDesc
    ncVar[:] = list( dfBias[ v ].astype("float") )

# names of the dataframe columns    
ncVar = ncfile.createVariable('statsNames', 'c', ('stats_dim','str_dim2'))
i = 0
statNames = np.empty( shape=(len(dfBiasAvg),str_len2), dtype='c')
for s in varDesc:
    x = netCDF4.stringtochar(np.array(s))
    statNames[i,:] = str_len2*' '
    statNames[i,:len(x)] = netCDF4.stringtochar(np.array(s))
    i = i + 1
ncVar[:,:] = statNames

# averages of statistics
ncVar = ncfile.createVariable('statsAvg', np.float32, ('stats_dim'))
ncVar.units = "deg C"
ncVar.description = "Average across stations of statistical values"
ncVar.long_name = "Average across stations of statistical values"
ncVar[:] = list(dfBiasAvg)

ncfile.close()