In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr
import time as time
import pyarrow as pa


In [2]:
import types
def imports():
    for name, val in globals().items():
        if isinstance(val, types.ModuleType):
            yield val.__name__
list(imports())

['builtins',
 'builtins',
 'matplotlib.pyplot',
 'numpy',
 'pandas',
 'xarray',
 'time',
 'pyarrow',
 'types']

## Defining Functions

### Creates functions for: data retrieval, extrapolating speed for height, wind power output

In [1]:
def extract_grib(file_list, u_elev_str, v_elev_str, save_as_str):
    """Retrieve dataset from grib file over total time period, takes in list of multiple file names and 
    concatenates them into one pandas array. Then saves all in a feather file"""
    
    #creating initial dataframe
    start = time.time()
    print('starting and outputting steps for first dataset')
    ds = xr.open_dataset(file_list[0], engine="cfgrib")        #opening the dataset
    
    end = time.time()
    print('opening:', end-start)
    
    ds = ds.get([u_elev_str, v_elev_str])         #selecting useful keys
    
    start = time.time()
    print('selecting keys:', start-end)
    
    df = ds.to_dataframe()                        #placing into pandas
    
    end = time.time()
    print('into pandas:', end-start)
    
    for i in range((len(file_list)-1)):
        print(i)
        ds = xr.open_dataset(file_list[i+1], engine="cfgrib")  #opening the dataset
        ds = ds.get([u_elev_str, v_elev_str])     #selecting useful keys
        df_add = ds.to_dataframe()                #placing into pandas
        
        df = pd.concat([df, df_add])
    
    df.sort_values(by='time', inplace=True)       #sorting by date  
    df = df.drop(['number','step','surface','valid_time'],axis=1)           #dropping useless columns
    df = df.reset_index(level=['latitude', 'longitude'])                    #set long and lat from index -> column
    df = df.reset_index(['time'])
    df['wind_mag'] = np.sqrt(df[u_elev_str]**2 + df[v_elev_str]**2)         #creating wind mag column
    #display(df)

    return df.to_feather(save_as_str)




def v_wind_eval(orig_height, target_height, wind_speeds, surface_roughness):
    """Evaluates the wind speed at a certain height using formula, outputs array"""
    
    alpha = 0.5*(surface_roughness/10)**0.2
    speeds = wind_speeds*(target_height/orig_height)**alpha
    
    return speed_arr




def wind_power_output(pwr_crv_str, speed_arr):
    """Uses power curve data from NRES and array of speeds to create correlated array of power outputs in MW of 
    that wind turbine in relation to each of those speeds. Power curve array has 3 columns: speed, power output, 
    capacity factor"""

    power_curve = np.loadtxt(pwr_crv_str,delimiter=",", dtype=str)
    power_curve = power_curve[1:,:].astype('float')
    speed_limited = np.where((speed_arr>25) | (speed_arr<0),0,speed_arr) #curtails power output above max
    power_outputs = np.interp(speed_limited,power_curve[:,0],power_curve[:,1])
    
    return power_outputs

## Extracting Data

In [5]:
str_list_100m = ['data/100m/1998-03','data/100m/2004-09',
                    'data/100m/2010-15','data/100m/2016-21']
str_list_10m = ['data/10m/1998-03','data/10m/2004-09',
                    'data/10m/2010-15','data/10m/2016-21']




filename_100m1998_03 = 'data/100m/100m_dataset.feather'
filename_10m = 'data/10m/10m_dataset.feather'


filename_10m1998_03 = 'data/10m/10m_dataset1998_03.feather'
str_list_10m1998_03 = ['data/10m/1998-03']
feather_dataset_10m1998_03 = extract_grib(str_list_10m1998_03,"u10","v10",filename_10m1998_03)


#speed_store_10m = retrieve_time_mag_list(latitude, longitude, str_list_10m,"u10","v10")

starting and outputting steps for first dataset


KeyboardInterrupt: 

In [None]:
filename_10m2004_09 = 'data/10m/10m_dataset2004-09.feather'
str_list_10m2004_09 = ['data/10m/2004-09']
feather_dataset_10m2004_09 = extract_grib(str_list_10m2004_09,"u10","v10",filename_10m2004_09)

In [None]:
filename_10m2010_15 = 'data/10m/10m_dataset2010-15.feather'
str_list_10m2010_15 = ['data/10m/2010-15']
feather_dataset_10m2010_15 = extract_grib(str_list_10m2010_15,"u10","v10",filename_10m2010_15)

In [None]:
filename_10m2016_21 = 'data/10m/10m_dataset2016-21.feather'
str_list_10m2016_21 = ['data/10m/2016-21']
feather_dataset_10m2016_21 = extract_grib(str_list_10m2016_21,"u10","v10",filename_10m2016_21)

In [None]:
feather_dataset_10m2010_15 = 0
feather_dataset_10m2016_21 = 0
feather_dataset_10m2004_09 = 0
feather_dataset_10m1998_03 = 0 

### Concatenating the data into one array

In [8]:
df1 = pd.read_feather('data/100m/100m_dataset1998_03.feather')
df2 = pd.read_feather('data/100m/100m_dataset2004-09.feather')
df3 = pd.read_feather('data/100m/100m_dataset2010-15.feather')
df4 = pd.read_feather('data/100m/100m_dataset2016-21.feather')

#just read in specific columns rather than all of them

In [7]:
display(df1)

Unnamed: 0,time,latitude,longitude,u100,v100,wind_mag
0,1998-01-01 00:00:00,62.5,-12.50,4.861343,18.728149,19.348804
1,1998-01-01 00:00:00,53.5,-1.50,4.376968,7.376587,8.577405
2,1998-01-01 00:00:00,53.5,-1.25,4.030289,7.614868,8.615651
3,1998-01-01 00:00:00,53.5,-1.00,3.419937,8.036743,8.734140
4,1998-01-01 00:00:00,53.5,-0.75,3.256851,8.141235,8.768512
...,...,...,...,...,...,...
228477475,2003-12-31 23:00:00,58.0,-4.50,-2.683487,7.584915,8.045623
228477476,2003-12-31 23:00:00,58.0,-4.25,-2.452042,8.388626,8.739655
228477477,2003-12-31 23:00:00,58.0,-4.00,-2.297745,9.318314,9.597427
228477478,2003-12-31 23:00:00,58.0,-7.50,3.442490,8.748978,9.401879


In [9]:
df1.drop(columns = ["wind_mag"], inplace = True)
df2.drop(columns = ["wind_mag"], inplace = True)
df3.drop(columns = ["wind_mag"], inplace = True)
df4.drop(columns = ["wind_mag"], inplace = True)

#could avoid via read in, or use different delete term

In [10]:
df = pd.concat([df1, df2, df3, df4])

#look into how pandas works fundementally, specifically, concat and drop

In [4]:
del df1
del df2
del df3
del df4



#use del to delete these variables

In [5]:
df["longitude"] = df["longitude"].astype('float16')
df["latitude"] = df["latitude"].astype('float16')

display(df)
types = df.dtypes
print(types)
df.memory_usage(deep=True)

Unnamed: 0,time,latitude,longitude,u100,v100,wind_mag
0,1998-01-01 00:00:00,62.5,-12.50,4.861343,18.728149,19.348804
1,1998-01-01 00:00:00,53.5,-1.50,4.376968,7.376587,8.577405
2,1998-01-01 00:00:00,53.5,-1.25,4.030289,7.614868,8.615651
3,1998-01-01 00:00:00,53.5,-1.00,3.419937,8.036743,8.734140
4,1998-01-01 00:00:00,53.5,-0.75,3.256851,8.141235,8.768512
...,...,...,...,...,...,...
228581755,2021-12-31 23:00:00,58.0,-4.50,-2.644821,3.248337,4.188887
228581756,2021-12-31 23:00:00,58.0,-4.25,-2.678024,2.962204,3.993303
228581757,2021-12-31 23:00:00,58.0,-4.00,-2.712204,2.777634,3.882177
228581758,2021-12-31 23:00:00,58.0,-7.50,-3.250290,13.588181,13.971508


time         datetime64[ns]
latitude            float16
longitude           float16
u100                float32
v100                float32
wind_mag            float32
dtype: object


Index        7312947840
time         7312947840
latitude     1828236960
longitude    1828236960
u100         3656473920
v100         3656473920
wind_mag     3656473920
dtype: int64

In [6]:
df.reset_index()

Unnamed: 0,index,time,latitude,longitude,u100,v100,wind_mag
0,0,1998-01-01 00:00:00,62.5,-12.50,4.861343,18.728149,19.348804
1,1,1998-01-01 00:00:00,53.5,-1.50,4.376968,7.376587,8.577405
2,2,1998-01-01 00:00:00,53.5,-1.25,4.030289,7.614868,8.615651
3,3,1998-01-01 00:00:00,53.5,-1.00,3.419937,8.036743,8.734140
4,4,1998-01-01 00:00:00,53.5,-0.75,3.256851,8.141235,8.768512
...,...,...,...,...,...,...,...
914118475,228581755,2021-12-31 23:00:00,58.0,-4.50,-2.644821,3.248337,4.188887
914118476,228581756,2021-12-31 23:00:00,58.0,-4.25,-2.678024,2.962204,3.993303
914118477,228581757,2021-12-31 23:00:00,58.0,-4.00,-2.712204,2.777634,3.882177
914118478,228581758,2021-12-31 23:00:00,58.0,-7.50,-3.250290,13.588181,13.971508


In [None]:
df = df.drop(['index'])

In [48]:
df.to_feather('data/100m/100m_dataset.feather')

ValueError: feather does not support serializing a non-default index for the index; you can .reset_index() to make the index into column(s)

In [12]:
start = time.time()
df1 = pd.read_feather('data/100m/100m_dataset2004-09.feather')
end = time.time()
print(end-start)

4.754119396209717


In [6]:


speed_dist = np.linspace(0,25,num=1000)
pwr_crv_str = "data/2016CACost_NREL_Reference_8MW_180.csv"

power_outputs = wind_power_output(pwr_crv_str,speed_dist)
print(power_outputs)

[0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.000000