# Process gefs_all

A script that takes gefs_all.csv from the IRLSetup github page and turns it into the long form for easy and quick analysis.

In [76]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def get_wspd_wdir(u, v):
    
    # return NAs if any are such
    if np.isnan([u, v]).any():
        return [np.nan, np.nan]
    
    # make sure u and v are floats
    u = float(u)
    v = float(v)
    
    wspd = math.sqrt(math.pow(u, 2) + math.pow(v, 2))
    wdir = math.atan2(-1 * u, -1 * v) * 57.2957795131 # 57.2957795131 is 180 / pi
    if wdir < 0:    # atan2 goes from -pi to pi, so you need to
        wdir += 360 # add 360 in case wdir is negative
    return [wspd, int(round(wdir))]

In [None]:
data_url = 'https://raw.githubusercontent.com/fit-winds/IRLSetup/master/data/gefs_all.csv'
gefs_all = pd.read_csv(data_url)

In [None]:
gefs_all['runtime'] = pd.to_datetime(gefs_all['runtime'])
gefs_all['validtime'] = pd.to_datetime(gefs_all['validtime'])

In [None]:
cols_u = [col for col in gefs_all.columns if '.u' in col]
cols_v = [col for col in gefs_all.columns if '.v' in col]
gefs_u_melt = pd.melt(gefs_all, id_vars=['runtime', 'validtime', 'fcsthour'], 
                      value_vars=cols_u, var_name='member', value_name='u')
gefs_v_melt = pd.melt(gefs_all, id_vars=['runtime', 'validtime', 'fcsthour'], 
                      value_vars=cols_v, var_name='member', value_name='v')
gefs_u_melt['member'] = gefs_u_melt['member'].str.replace('.u', '')
gefs_v_melt['member'] = gefs_v_melt['member'].str.replace('.v', '')
gefs_melt = gefs_u_melt.copy()
gefs_melt['v'] = gefs_v_melt['v']
del(data_url, gefs_all, cols_u, cols_v, gefs_u_melt, gefs_v_melt)

In [None]:
# create arrays to store wind run and setup info
wndrun_u = np.zeros(len(gefs_melt['runtime']))
wndrun_u[:] = np.nan
wndrun_v = wndrun_u.copy()
setup = wndrun_u.copy()

In [None]:
# now iterate over gefs_melt and calculate wndrun_u, wndrun_v, and setup.
# note that this only calculates these values for the ensemble members and
# it takes a LONG time to run!
i = 0
for row in gefs_melt.itertuples():
    # only perform calculation if we have enough data to calculate a wind run
    if (row[4] != 'kmlb') & (row[3] >= 9):
        wndrun = gefs_melt[(gefs_melt['runtime'] == row[1]) & (gefs_melt['member'] == row[4]) 
                           & (gefs_melt['fcsthour'] <= row[3]) & (gefs_melt['fcsthour'] > row[3] - 12)].mean()

        # update arrays with calculated wind runs
        wndrun_u[i], wndrun_v[i] = wndrun[1:3]

        # now on to calculating setup
        wndrun_wspd_wdir = get_wspd_wdir(wndrun[1], wndrun[2])

        u1 = wndrun_wspd_wdir[0] * math.cos(math.radians(wndrun_wspd_wdir[1] + 10))
        u2 = wndrun_wspd_wdir[0] * math.cos(math.radians(wndrun_wspd_wdir[1] + 26))
        u_r = -1 * (u1 * 28 + u2 * 70) / 98
        setup[i] = 1.637 * math.copysign(1, u_r) * math.pow(abs(u_r), 1.5)
            
    i += 1
    if i % 1050 == 0:
        print(str(i/1050) + '% complete.')

In [None]:
gefs_melt['wndrun_u'] = wndrun_u
gefs_melt['wndrun_v'] = wndrun_v
gefs_melt['setup'] = setup

In [None]:
gefs_melt.to_csv('member_setup_done.csv', index=False)

In [78]:
gefs_melt = pd.read_csv('member_setup_done.csv')
gefs_melt['runtime'] = pd.to_datetime(gefs_melt['runtime'])
gefs_melt['validtime'] = pd.to_datetime(gefs_melt['validtime'])
kmlb_asos = pd.read_csv('kmlb_setup.csv')
kmlb_asos['validtime'] = pd.to_datetime(kmlb_asos['validtime'])

In [79]:
# fill in kmlb wind run and setup values
i = 0
for row in gefs_melt.itertuples():
    if row[4] == 'kmlb':
        df = kmlb_asos[kmlb_asos['validtime'] == row[2]]
        # only update info if we have kmlb data for this valid time
        if len(df) > 0:
            gefs_melt.at[i, 'u'] = df['u']
            gefs_melt.at[i, 'v'] = df['v']
            gefs_melt.at[i, 'wndrun_u'] = df['wndrun_u']
            gefs_melt.at[i, 'wndrun_v'] = df['wndrun_v']
            gefs_melt.at[i, 'setup'] = df['setup']
    i += 1

In [80]:
gefs_melt.to_csv('gefs_all.csv', index=False)

In [84]:
# now calculate the setup error
gefs_melt['setup_error'] = np.nan
for i in range(0, len(gefs_melt)):
# for i in range(0, 1050):
    if gefs_melt.loc[i, 'member'] != 'kmlb':
        fcst_setup = gefs_melt.loc[i, 'setup']
        try:
            obs_setup = kmlb_asos[kmlb_asos['validtime'] == gefs_melt.loc[i, 'validtime']]['setup'].values[0]
        except IndexError:
            obs_setup = np.nan
        gefs_melt.loc[i, 'setup_error'] = fcst_setup - obs_setup
#         print(fcst_setup, obs_setup)
    if i % 1050 == 0:
        print(str(i/1050) + '% complete.')

0.0% complete.
1.0% complete.
2.0% complete.
3.0% complete.
4.0% complete.
5.0% complete.
6.0% complete.
7.0% complete.
8.0% complete.
9.0% complete.
10.0% complete.
11.0% complete.
12.0% complete.
13.0% complete.
14.0% complete.
15.0% complete.
16.0% complete.
17.0% complete.
18.0% complete.
19.0% complete.
20.0% complete.
21.0% complete.
22.0% complete.
23.0% complete.
24.0% complete.
25.0% complete.
26.0% complete.
27.0% complete.
28.0% complete.
29.0% complete.
30.0% complete.
31.0% complete.
32.0% complete.
33.0% complete.
34.0% complete.
35.0% complete.
36.0% complete.
37.0% complete.
38.0% complete.
39.0% complete.
40.0% complete.
41.0% complete.
42.0% complete.
43.0% complete.
44.0% complete.
45.0% complete.
46.0% complete.
47.0% complete.
48.0% complete.
49.0% complete.
50.0% complete.
51.0% complete.
52.0% complete.
53.0% complete.
54.0% complete.
55.0% complete.
56.0% complete.
57.0% complete.
58.0% complete.
59.0% complete.
60.0% complete.
61.0% complete.
62.0% complete.
63

In [85]:
gefs_melt

Unnamed: 0,runtime,validtime,fcsthour,member,u,v,wndrun_u,wndrun_v,setup,setup_error
0,2017-04-25 18:00:00,2017-04-25 18:00:00,0,gec00,1.965971e+00,0.971252,,,,
1,2017-04-25 18:00:00,2017-04-25 21:00:00,3,gec00,1.666644e+00,1.220502,,,,
2,2017-04-25 18:00:00,2017-04-26 00:00:00,6,gec00,1.363354e+00,1.795483,,,,
3,2017-04-25 18:00:00,2017-04-26 03:00:00,9,gec00,4.287284e+00,0.430937,2.320813,1.104544,0.102834,-2.294588
4,2017-04-25 18:00:00,2017-04-26 06:00:00,12,gec00,4.134421e+00,-1.345870,2.862926,0.525263,-0.710170,-4.314317
5,2017-04-25 18:00:00,2017-04-26 09:00:00,15,gec00,2.778217e+00,-1.080207,3.140819,-0.049914,-2.124726,-3.947886
6,2017-04-25 18:00:00,2017-04-26 12:00:00,18,gec00,1.841113e+00,0.298115,3.260259,-0.424256,-3.170190,-3.066276
7,2017-04-25 18:00:00,2017-04-26 15:00:00,21,gec00,1.346368e+00,2.165606,2.525030,0.009411,-1.434957,-1.451514
8,2017-04-25 18:00:00,2017-04-26 18:00:00,24,gec00,1.522402e-01,2.972480,1.529485,1.088998,0.472105,0.205243
9,2017-04-25 18:00:00,2017-04-26 21:00:00,27,gec00,-1.327186e+00,3.861133,0.503134,2.324334,4.521472,1.872159


In [86]:
gefs_melt.to_csv('gefs_all_w_error.csv', index=False)

In [87]:
gefs_melt

Unnamed: 0,runtime,validtime,fcsthour,member,u,v,wndrun_u,wndrun_v,setup,setup_error
0,2017-04-25 18:00:00,2017-04-25 18:00:00,0,gec00,1.965971e+00,0.971252,,,,
1,2017-04-25 18:00:00,2017-04-25 21:00:00,3,gec00,1.666644e+00,1.220502,,,,
2,2017-04-25 18:00:00,2017-04-26 00:00:00,6,gec00,1.363354e+00,1.795483,,,,
3,2017-04-25 18:00:00,2017-04-26 03:00:00,9,gec00,4.287284e+00,0.430937,2.320813,1.104544,0.102834,-2.294588
4,2017-04-25 18:00:00,2017-04-26 06:00:00,12,gec00,4.134421e+00,-1.345870,2.862926,0.525263,-0.710170,-4.314317
5,2017-04-25 18:00:00,2017-04-26 09:00:00,15,gec00,2.778217e+00,-1.080207,3.140819,-0.049914,-2.124726,-3.947886
6,2017-04-25 18:00:00,2017-04-26 12:00:00,18,gec00,1.841113e+00,0.298115,3.260259,-0.424256,-3.170190,-3.066276
7,2017-04-25 18:00:00,2017-04-26 15:00:00,21,gec00,1.346368e+00,2.165606,2.525030,0.009411,-1.434957,-1.451514
8,2017-04-25 18:00:00,2017-04-26 18:00:00,24,gec00,1.522402e-01,2.972480,1.529485,1.088998,0.472105,0.205243
9,2017-04-25 18:00:00,2017-04-26 21:00:00,27,gec00,-1.327186e+00,3.861133,0.503134,2.324334,4.521472,1.872159
