# ERA5 Resolution Improvement
---
This notebook is concerned with the improvement of the grid resolution of the time series forecast results. Each one of the 3,195 grids must be split into 10,000 (100 x 100) which corresponds to the resolution of the GWA map.

This notebook requires the following:
* the **resulting pickle file from NB # 1**, which could not be made available through Github due to filesize
* the **Global Wind Atlas map** used as input in NB # 1, available in the Github repository
* the **resulting pickle files from NB # 6**, available in the Github repository

In [1]:
import numpy as np
import pandas as pd
import rasterio as rio
import math
from datetime import datetime

In [2]:
gwa_df = pd.read_pickle("results/gwa0817_100m.pkl")
gwa_df

Unnamed: 0,x,y,windvelo
0,0,5801,5.230973
1,0,5802,5.231110
2,0,5803,5.231216
3,0,5804,5.231292
4,0,5805,5.232414
...,...,...,...
22423447,4950,5957,5.187756
22423448,4950,5958,5.187970
22423449,4950,5959,5.188166
22423450,4950,5960,5.188385


In [3]:
tif_file_name = "data/PHL_wind-speed_100m.tif"
with rio.open(tif_file_name) as img:
    affine_matrix = img.transform
    inverse_affine = ~affine_matrix
img.close()

In [4]:
print(affine_matrix) # longitude, latitude = affine_matrix * (x, y)
print(inverse_affine) # x, y = inverse_affine * (longitude, latitude)

| 0.00, 0.00, 116.04|
| 0.00,-0.00, 22.18|
| 0.00, 0.00, 1.00|
| 400.00, 0.00,-46416.63|
| 0.00,-400.00, 8871.31|
| 0.00, 0.00, 1.00|


In [11]:
x_list = gwa_df.x.unique()
len(x_list)

4951

In [12]:
y_list = gwa_df.y.unique()
len(y_list)

7662

In [17]:
gwa_df["lat"] = pd.Series(dtype="float32")
gwa_df["lon"] = pd.Series(dtype="float32")
gwa_df

Unnamed: 0,x,y,windvelo,lat,lon
0,0,5801,5.230973,,
1,0,5802,5.231110,,
2,0,5803,5.231216,,
3,0,5804,5.231292,,
4,0,5805,5.232414,,
...,...,...,...,...,...
22423447,4950,5957,5.187756,,
22423448,4950,5958,5.187970,,
22423449,4950,5959,5.188166,,
22423450,4950,5960,5.188385,,


## ! ! ! Warning ! ! !
The following 2 cells may run for at least 45 minutes each.

In [29]:
for i in range(len(x_list)):
    x = x_list[i]
    row_index = gwa_df.x == x
    gwa_df.loc[row_index, "lon"] = gwa_df[row_index].apply(lambda g: (affine_matrix * (g.x, 0))[0], axis=1)

In [30]:
for i in range(len(y_list)):
    y = y_list[i]
    row_index = gwa_df.y == y
    gwa_df.loc[row_index, "lat"] = gwa_df[row_index].apply(lambda g: (affine_matrix * (0, g.y))[1], axis=1)

In [36]:
gwa_df = gwa_df.astype({"lon": "float32"})

In [40]:
gwa_df.dtypes

x             int32
y             int32
windvelo    float32
lat         float32
lon         float32
dtype: object

In [39]:
gwa_df.describe()

Unnamed: 0,x,y,windvelo,lat,lon
count,22423450.0,22423450.0,22423450.0,22423450.0,22423450.0
mean,2461.363,4170.244,5.985596,11.97119,97.37663
std,1074.798,1803.497,1.743064,4.41081,24.93506
min,0.0,9.0,0.536395,3.003269,116.0416
25%,1615.0,2810.0,4.338831,8.115768,120.0791
50%,2406.0,4384.0,5.492495,11.21827,122.0566
75%,3221.0,5625.0,6.777289,15.15327,124.0941
max,4950.0,7670.0,17.63615,22.15577,128.4166


In [41]:
gwa_df.to_pickle("results/gwa_grids.pkl")

In [42]:
def get_square_bounds_ne(ctr_lat, ctr_lon, side_len=0.25):
    adjust = side_len / 2
    top_left = [ ctr_lat + adjust, ctr_lon - adjust ]
    top_right = [ ctr_lat + adjust, ctr_lon + adjust ]
    bottom_left = [ ctr_lat - adjust, ctr_lon - adjust ]
    bottom_right = [ ctr_lat - adjust, ctr_lon + adjust ]
    return [ top_left, bottom_right ]

In [43]:
mean2018_df = pd.read_pickle("results/mean2018.pkl")
mean2019_df = pd.read_pickle("results/mean2019.pkl")
mean2020_df = pd.read_pickle("results/mean2020.pkl")
mean2018_df.shape, mean2019_df.shape, mean2020_df.shape

((3195, 17), (3195, 17), (3195, 17))

In [44]:
mean2018_df["bounds"] = mean2018_df.apply(lambda x : get_square_bounds_ne(x["lat"], x["lon"]), axis=1)
mean2019_df["bounds"] = mean2019_df.apply(lambda x : get_square_bounds_ne(x["lat"], x["lon"]), axis=1)
mean2020_df["bounds"] = mean2020_df.apply(lambda x : get_square_bounds_ne(x["lat"], x["lon"]), axis=1)
mean2018_df.shape, mean2019_df.shape, mean2020_df.shape

((3195, 18), (3195, 18), (3195, 18))

In [45]:
mean2018_df.to_pickle("results/mean2018.pkl")
mean2019_df.to_pickle("results/mean2019.pkl")
mean2020_df.to_pickle("results/mean2020.pkl")