In [1]:
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d
from numpy import pi,exp

In [9]:
def load_clean_dataframe(path : str):
    
    dataframe = pd.read_excel(io=path,names=['date_time','wind_speed','gust_speed','wind_direction'])
    
    dataframe =dataframe.dropna().reset_index(drop=True)
    dataframe['date_time'] = pd.to_datetime(dataframe['date_time'])
    
    dataframe['year'] = dataframe['date_time'].dt.year
    dataframe['month'] = dataframe['date_time'].dt.month
    dataframe['day'] = dataframe['date_time'].dt.day
    dataframe['hour'] = dataframe['date_time'].dt.hour + 1
    
    return dataframe

In [10]:
wind_data = load_clean_dataframe("data/original_data/wind_energy.xlsx")

In [11]:
month_data = wind_data[wind_data['month'] == 9]

In [12]:
def get_diurnal_variation(df : pd.DataFrame):
    
    # Create a copy of the passed DataFrame to avoid modifying the original
    dataframe = df.copy()

    # Group by day, hour and calculate average wind speed
    result = dataframe.groupby(['day', 'hour']).agg(avg_wind_speed=('wind_speed', 'mean')).reset_index()

    # Round the average wind speed to 3 decimal places
    result['avg_wind_speed'] = result['avg_wind_speed'].round(3)

    return result


In [14]:
diurnal_var = get_diurnal_variation(month_data)
diurnal_var

Unnamed: 0,day,hour,avg_wind_speed
0,1,1,0.000
1,1,2,0.000
2,1,3,0.000
3,1,4,0.000
4,1,5,0.000
...,...,...,...
715,30,20,0.542
716,30,21,1.578
717,30,22,0.358
718,30,23,0.512


In [16]:
def get_wind_rose(df : pd.DataFrame): 

    dataframe = df.copy()

    # Calculate AvgHourlyWindSpeed
    avg_hourly = dataframe.groupby(['day', 'hour']).agg(
        avg_wind_speed=('wind_speed', 'mean'),
        avg_wind_direction=('wind_direction', 'mean')
    ).reset_index()
    avg_hourly['avg_wind_speed'] = avg_hourly['avg_wind_speed'].round(3)
    avg_hourly['avg_wind_direction'] = avg_hourly['avg_wind_direction'].round(3)

    # Calculate MaxWindSpeed
    max_speed = np.ceil(avg_hourly['avg_wind_speed'].max())

    # Calculate BinnedSpeed
    avg_hourly['speed_bin'] = pd.cut(
        avg_hourly['avg_wind_speed'], 
        bins=np.linspace(0, max_speed, int(max_speed) + 1), 
        labels=False, 
        include_lowest=True
    )

    # Calculate CardinalDirections
    cardinal_directions = {
        (0, 22.5): 'N', (22.5, 67.5): 'NE', (67.5, 112.5): 'E', 
        (112.5, 157.5): 'SE', (157.5, 202.5): 'S', (202.5, 247.5): 'SW',
        (247.5, 292.5): 'W', (292.5, 337.5): 'NW', (337.5, 360): 'N'
    }
    avg_hourly['cardinal_direction'] = avg_hourly['avg_wind_direction'].apply(
        lambda x: next(direction for (min_dir, max_dir), direction in cardinal_directions.items() if min_dir <= x <= max_dir)
    )

    # Filter out rows with avg_wind_speed == 0
    avg_hourly = avg_hourly[avg_hourly['avg_wind_speed'] > 0]

    # Calculate Frequency
    frequency = avg_hourly.groupby(['day', 'cardinal_direction', 'speed_bin']).size().reset_index(name='count_speed_bin')

    # Calculate TotalCounts
    total_counts = avg_hourly.groupby(['day']).size().reset_index(name='count_total')

    # Calculate PercentFrequency
    percent_frequency = pd.merge(frequency, total_counts, on='day')
    percent_frequency['percent_frequency'] = percent_frequency.apply(
        lambda row: round((row['count_speed_bin'] * 100.0) / row['count_total'], 3) if row['count_total'] > 0 else 0,
        axis=1
    )

    # Calculate cumulative_percent_frequency
    percent_frequency['cumulative_percent_frequency'] = percent_frequency.groupby(['day', 'cardinal_direction'])['percent_frequency'].cumsum()

    return percent_frequency.sort_values(by=['day', 'cardinal_direction', 'speed_bin'])
    

In [17]:
wind_rose = get_wind_rose(month_data)
wind_rose

Unnamed: 0,day,cardinal_direction,speed_bin,count_speed_bin,count_total,percent_frequency,cumulative_percent_frequency
0,1,E,0,1,17,5.882,5.882
1,1,E,1,2,17,11.765,17.647
2,1,NE,1,1,17,5.882,5.882
3,1,NW,3,1,17,5.882,5.882
4,1,SE,1,1,17,5.882,5.882
...,...,...,...,...,...,...,...
322,30,SE,0,2,24,8.333,8.333
323,30,SE,1,2,24,8.333,16.666
324,30,SW,0,2,24,8.333,8.333
325,30,SW,1,1,24,4.167,12.500


In [19]:
def get_frequency_distribution(df : pd.DataFrame):
    
    dataframe = df.copy()
    
    # Calculate MaxWindSpeed
    max_speed = np.ceil(dataframe['wind_speed'].max())

    # Bin the wind speeds in BinnedSpeed
    bins = np.linspace(0,max_speed,int(max_speed) + 1)
    dataframe['speed_bin'] = pd.cut(dataframe['wind_speed'],bins=bins,labels=range(int(max_speed)))

    frequency_distirbution = dataframe['speed_bin'].value_counts().sort_index().reset_index()
    frequency_distirbution.columns = ['speed_bin','frequency']
    
    total_instances = frequency_distirbution['frequency'].sum()
    frequency_distirbution['percent_frequency'] = (frequency_distirbution['frequency'] / total_instances * 100).round(3)
   
    return frequency_distirbution

In [20]:
frequency_distribution = get_frequency_distribution(month_data)
frequency_distribution

Unnamed: 0,speed_bin,frequency,percent_frequency
0,0,2857,39.121
1,1,2874,39.354
2,2,1214,16.623
3,3,280,3.834
4,4,55,0.753
5,5,17,0.233
6,6,6,0.082


In [30]:
def get_wind_stats(df : pd.DataFrame):
    
    dataframe = df.copy()
    
    max_speed = dataframe['wind_speed'].max()
    min_speed = dataframe['wind_speed'].min()
    avg_speed = round(dataframe['wind_speed'].mean(),3)
    
    summary_df = pd.DataFrame({
        'Statistic': ['Max Monthly Speed', 'Min Monthly Speed', 'Average Monthly Wind Speed'],
        'Value': [max_speed, min_speed, avg_speed]
    })
    
    max_days_hours = dataframe[dataframe['wind_speed'] == max_speed][['wind_speed', 'day', 'hour']]
    min_days_hours = dataframe[dataframe['wind_speed'] == min_speed][['wind_speed', 'day', 'hour']]
    instances_df = pd.concat([max_days_hours, min_days_hours], ignore_index=True)
    
    return summary_df, instances_df

In [33]:
wind_stats = get_wind_stats(month_data)
wind_stats[0]
wind_stats[1]

Unnamed: 0,wind_speed,day,hour
0,6.68,29,15
1,0.00,1,1
2,0.00,1,1
3,0.00,1,1
4,0.00,1,1
...,...,...,...
1333,0.00,30,22
1334,0.00,30,22
1335,0.00,30,24
1336,0.00,30,24


In [52]:
def get_yey(df : pd.DataFrame):
    dataframe = df.copy()
    
    anemometer_height = 86  
    turbine_height = 109    
    shear_exponent = 0.34
    hours_in_day = 24
    hours_in_year = 8760
    
    def weibull_function(v,wind_shear):
        return (pi * v) / (2 * wind_shear ** 2) * exp((-pi / 4) * (v / wind_shear) ** 2)
    
    yey_df = pd.DataFrame()
    
    yey_df['wind_speeds'] = np.arange(0,25.5,0.5)
    yey_df['p(v)'] = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 8, 13, 19, 26, 32, 39, 46, 53, 59, 65, 71, 76, 80, 84, 88, 92, 95, 97, 100, 102, 104, 105, 107, 108, 109, 109, 109, 109, 109, 109, 109, 108, 108, 107, 106, 105, 104, 103, 102, 102])
    
    yey_df['avg_wind_speed'] = dataframe['wind_speed'].mean()
    
    yey_df['wind_speed_at_turbine'] = yey_df['avg_wind_speed'] * (turbine_height/anemometer_height) ** shear_exponent
    
    yey_df['f(v)'] = weibull_function(yey_df['wind_speeds'],yey_df['wind_speed_at_turbine'])
    
    yey_df['f(v)p(v)*24'] = yey_df['f(v)'] * yey_df['p(v)'] * 24
    yey_df['f(v)p(v)*8760'] = yey_df['f(v)'] * yey_df['p(v)'] * 8760
    
    summary_df = pd.DataFrame({
        'YEY Value': ['Daily YEY', 'Yearly YEY'],
        'Value': [yey_df['f(v)p(v)*24'].sum(),yey_df['f(v)p(v)*8760'].sum()]
    })
    
    return yey_df,summary_df

    

In [54]:
hotdog = get_yey(month_data)
hotdog[0]
# hotdog[1]

Unnamed: 0,wind_speeds,p(v),avg_wind_speed,wind_speed_at_turbine,f(v),f(v)p(v)*24,f(v)p(v)*8760
0,0.0,0,1.175172,1.273788,0.0,0.0,0.0
1,0.5,0,1.175172,1.273788,0.428884,0.0,0.0
2,1.0,0,1.175172,1.273788,0.5966268,0.0,0.0
3,1.5,0,1.175172,1.273788,0.4886697,0.0,0.0
4,2.0,0,1.175172,1.273788,0.2792957,0.0,0.0
5,2.5,0,1.175172,1.273788,0.1174824,0.0,0.0
6,3.0,0,1.175172,1.273788,0.03724271,0.0,0.0
7,3.5,0,1.175172,1.273788,0.009010808,0.0,0.0
8,4.0,0,1.175172,1.273788,0.001676564,0.0,0.0
9,4.5,0,1.175172,1.273788,0.0002410602,0.0,0.0
