# Read time series of wind speed from observations and the WRF model 


*   Read data from DTU Wind Energy site Høvsøre on the Danish west coast 
*   The data is in GitHub: [Data for SDC summer school](https://github.com/ahahmann/SDC-summer-school)

## Section 1: Read the data

In [None]:
# Special code for Jupyter Notebook
%matplotlib inline

# import other 3rd party libraries
from scipy.stats import linregress
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys,importlib
import xarray as xr

In [None]:
# Define some useful functions
def corr(X,Y):
    Y = np.where(np.isnan(X),np.nan,Y)
    X = np.where(np.isnan(Y),np.nan,X)
    n = np.count_nonzero(~np.isnan(X))
    r = np.nansum(X*Y) - n*np.nanmean(X)*np.nanmean(Y)
    r = r/(n*np.sqrt(np.nanvar(X)*np.nanvar(Y)))
    return(r)

def rmse(predictions, targets):
    return np.sqrt(((predictions - targets) ** 2).mean())

def read_hovsore(year=2000):
    if year != 2000 and (year < 2005 or year > 2017) :
        print("No data for ",year)
        return

    url = "https://raw.githubusercontent.com/ahahmann/SDC-summer-school/master/"
    filename = "hovsore80m_2005_2017_clean.csv"
    headerNames = ['time', 'U80', 'D100'] 
    data = pd.read_csv(url+filename, skiprows=1, names=headerNames)

    dates = pd.to_datetime(data.time)
    dates = dates - np.timedelta64(1,'h')  # Convert to UTC
    data.index = dates
    data = data.drop(['time'],axis=1)

    if year == 2000:
        return data
    else:
        dataY = data.loc[str(year)]
        return dataY

def MCP_Var(mast, ref, mast_U, ref_U):
    """
    Perform MCP on mast using reference, via variance-ratio method
    
    This will perform MCP on the mast and reference dataframes to 
    create a long-term corrected dataset at the mast location.
    
    .. note:: The mast and reference wind speeds should be at the
              same height.
    
    :param dataframe mast: Dataframe with mast data
    :param dataframe ref: Dataframe with reference site data
    :param string mast_U: Name of the wind speed column of mast
    :param string ref_U: Name of the wind speed column of ref
    :param string mast_time: Name of the date column of mast
    :param string ref_time: Name of the date column of ref
    """
    # Adjust wind speed names if identical
    if mast_U == ref_U:
        mrg_mast = mast_U + "_x"
        mrg_ref = ref_U + "_y"
    else:
        mrg_mast = mast_U
        mrg_ref = ref_U
    
    # Create a merged dataset, only times in both df's
    mrg = mast.join(ref, lsuffix='_x', rsuffix='_y').dropna()
    
   # Use variance-ratio method to fit wind speeds
    mnref = np.mean(mrg[mrg_ref])
    sdref = np.sqrt(((mrg[mrg_ref])**2).mean())
    mnmast = np.mean(mrg[mrg_mast])
    sdmast = np.sqrt(((mrg[mrg_mast])**2).mean())
    adj_U = mnmast + (sdmast/sdref)*(ref[ref_U]-mnref)
    
    return pd.DataFrame({"U80": adj_U})

def MCP_LinReg(mast, ref, mast_U, ref_U):
    """
    Perform MCP on mast using reference, via linear-regression
    
    This will perform MCP on the mast and reference dataframes to 
    create a long-term corrected dataset at the mast location.
    
    .. note:: The mast and reference wind speeds need to be at the
              same height.
    
    :param dataframe mast: Dataframe with mast data
    :param dataframe ref: Dataframe with reference site data
    :param string mast_U: Name of the wind speed column of mast
    :param string ref_U: Name of the wind speed column of ref
    :param string mast_time: Name of the date column of mast
    :param string ref_time: Name of the date column of ref
    """
     # Adjust wind speed names if identical
    if mast_U == ref_U:
        mrg_mast = mast_U + "_x"
        mrg_ref = ref_U + "_y"
    else:
        mrg_mast = mast_U
        mrg_ref = ref_U

    # Create a merged dataset; only times in both df's
    mrg = mast.join(ref, lsuffix='_x', rsuffix='_y').dropna()
    
    # Use linear regression for scaling mast speeds per long-term
    slope, intercept, _, _, _ = linregress(mrg[mrg_ref], mrg[mrg_mast])
    
    lt_U = ref[ref_U]*slope + intercept
    
    return pd.DataFrame({"U80": lt_U})   


## Section 1: Read the datasets

In [None]:
# Read the winds simulated by WRF
url = "https://raw.githubusercontent.com/ahahmann/SDC-summer-school/master/"

# This is the simulated data, WRF model, all years
TSfile = url+"WRF_Hovsore_2005-2017.csv"
ref = pd.read_csv(TSfile,parse_dates=True,index_col="Time")
ref.head()

# Read the observed winds for one year (choose the year you want to examine)
year = XXXX
hovsore = read_hovsore(year)

# Plot the two time series
ref['U80'].plot(figsize=(10,3),label="WRF")
hovsore['U80'].plot(figsize=(10,3),label="OBS")
plt.legend()
plt.show()


## Section 2: Visually examine the data

In [None]:
# Plot a short time period, maybe a month, then a day
fig,ax = plt.subplots(figsize=(10,3))
ax.plot(ref.U80.loc[str(year)+"-03"],label='Simulated')
ax.plot(hovsore.U80.loc[str(year)+"-03"],label="Observed")
ax.set_ylabel('wind speed [m/s]')
ax.set_title('Wind speed, March '+str(year))
plt.legend()
plt.show()

What do you see when comparing the observed and modeled data?

* What is correctly simulated?
* What is not correctly simulated?
* What type of errors are most common?

## Section 3: Compute some statistics


In [None]:
# Merge the WRF and OBS datasets
mrg = hovsore.join(ref, lsuffix='_x', rsuffix='_y').dropna()
# print(mrg)
plt.scatter(mrg.U80_x, mrg.U80_y, s=2)
plt.title('OBS versus REF wind speed, h=80m')
plt.xlabel('OBS wind')
plt.ylabel('REF wind')

print('U obs mean: {:4.2f} m/s'.format(mrg.U80_x.mean()))
print('U sim mean: {:4.2f} m/s'.format(mrg.U80_y.mean()))
print('Correlation: {:4.2f}'.format(corr(mrg.U80_x, mrg.U80_y)))
print('Bias: {:4.2%}'.format((mrg.U80_x - mrg.U80_y).mean()/mrg.U80_x.mean()))

## Section 4: Perform MCP
The functions MCP_LinReg and MCP_Var will be used to create long-term corrected datasets. The MCP functions take two dataframes as input, and will need to be provided with the names of the U columns to be used for the analysis. They can optionally take in a name of the date field as well.

In [None]:
lt_var = MCP_Var(hovsore, ref, "U80", "U80")
lt_lin = MCP_LinReg(hovsore, ref, "U80", "U80")
print(lt_var)

In [None]:
#lt_var back observed **all years** to evaluate LTC method

hovsore = read_hovsore()  ## read all years

# Join the dataframes to make sure same sample is used

eval_ref = hovsore.join(ref, lsuffix='_o', rsuffix='_LT').dropna()
eval_lt_var = hovsore.join(lt_var, lsuffix='_o', rsuffix='_LT').dropna()
eval_lt_lin = hovsore.join(lt_lin, lsuffix='_o', rsuffix='_LT').dropna()

In [None]:
# Compute the RMSE and BIAS between using the two methods
# Compare to the BIAS of using a single year of data to using the two LTC methods