In [21]:
import pandas as pd
from datetime import datetime
import numpy as np

from sklearn.model_selection import train_test_split
import xgboost as xgb

train_data = pd.read_csv('TrainData_A.csv')

In [22]:
def readTimeStamp(df):
    df['hour'] = df['TIMESTAMP'].apply(lambda x: datetime.strptime(x, '%Y%m%d %H:%M').hour)
    df['month'] = df['TIMESTAMP'].apply(lambda x: datetime.strptime(x, '%Y%m%d %H:%M').month)
    return df

In [23]:
def getAbsWindSpeed(u, v):
    abs_speed = np.sqrt(u**2+v**2)    
    return abs_speed
def getWindAngle(u, v):
    abs_speed = getAbsWindSpeed(u, v)
    angle = np.arcsin(v/abs_speed)
    return angle
def powerWindLaw(v10,h):
    a=0.11
    v = v10*(h/10)**a
    return v
def calculateAverages(u, v, abs_wind_speed, angle, height):
    elements = [u, v, abs_wind_speed, angle]
    element_names = ['u','v','abs_wind_speed','angle']
    colnames = ['avg_6_past','avg_24_past','avg_30_days_past','avg_5_current']
    df = pd.DataFrame()
    i = 0
    for element in elements:
        
        avg_6_past = element.rolling(window = 6, min_periods = 1).mean()
        avg_24_past = element.rolling(window = 24, min_periods = 1).mean()
        avg_30_days_past = element.rolling(window = 24*30, min_periods = 1).mean()
        avg_5_current = element.rolling(window = 5, min_periods = 1, center = True).mean()
        new_df = pd.concat([avg_6_past,avg_24_past,avg_30_days_past,avg_5_current], axis = 1)
        new_colnames = [name + '_' + element_names[i] + '_' + str(height) for name in colnames ]
        new_df.columns = new_colnames
        df = pd.concat([df, new_df], axis = 1 )
        i = i +1
    return df
        
def transform_df(df):
    df = df.dropna()
    readTimeStamp(df)
    df['abs_speed_10'] = getAbsWindSpeed(df['U10'],df['V10'])
    df['angle_10'] = getWindAngle(df['U10'], df['V10'])
    df_10 = calculateAverages(df['U10'], df['V10'], df['abs_speed_10'], df['angle_10'], 10)
    df['abs_speed_100'] = getAbsWindSpeed(df['U100'],df['V100'])
    df['angle_100'] = getWindAngle(df['U10'],df['V10'])
    df_100 = calculateAverages(df['U100'], df['V100'], df['abs_speed_100'], df['angle_100'], 100)
    df['U50'] = powerWindLaw(df['U10'],50)
    df['V50'] = powerWindLaw(df['V10'],50)
    df['abs_speed_50'] = getAbsWindSpeed(df['U50'],df['V50'])
    df['angle_50'] = getWindAngle(df['U50'],df['V50'])
    df_50 = calculateAverages(df['U50'], df['V50'], df['abs_speed_50'], df['angle_50'], 50)
    return pd.concat([df, df_10, df_100, df_50], axis = 1)
    
train_data_prepared = transform_df(train_data)
power = train_data_prepared['POWER']
train_data_prepared = train_data_prepared.drop(['TIMESTAMP'], axis = 1)
train_data_prepared.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['hour'] = df['TIMESTAMP'].apply(lambda x: datetime.strptime(x, '%Y%m%d %H:%M').hour)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['month'] = df['TIMESTAMP'].apply(lambda x: datetime.strptime(x, '%Y%m%d %H:%M').month)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['abs_speed_10'] = getAbs

Unnamed: 0,POWER,U10,V10,U100,V100,hour,month,abs_speed_10,angle_10,abs_speed_100,...,avg_30_days_past_v_50,avg_5_current_v_50,avg_6_past_abs_wind_speed_50,avg_24_past_abs_wind_speed_50,avg_30_days_past_abs_wind_speed_50,avg_5_current_abs_wind_speed_50,avg_6_past_angle_50,avg_24_past_angle_50,avg_30_days_past_angle_50,avg_5_current_angle_50
3,0.666693,5.99531,4.89334,9.52143,6.99094,3,1,7.738767,0.684538,11.812319,...,5.841066,6.468219,9.237586,9.237586,9.237586,10.129169,0.684538,0.684538,0.684538,0.694461
6,0.937501,6.17865,5.70277,9.73491,8.09731,6,1,8.408169,0.745367,12.662342,...,6.324165,6.497477,9.63711,9.63711,9.63711,10.457222,0.714953,0.714953,0.714953,0.674169
9,0.935002,7.39201,5.6601,11.2158,8.10729,9,1,9.310131,0.653477,13.839159,...,6.468219,6.964308,10.129169,10.129169,10.129169,10.859942,0.694461,0.694461,0.694461,0.69672
12,0.91294,7.83819,5.51678,12.5675,8.25853,12,1,9.584993,0.613294,15.038131,...,6.497477,7.816885,10.457222,10.457222,10.457222,11.778867,0.674169,0.674169,0.674169,0.723613
15,0.97108,7.37616,7.39868,11.3248,10.6524,15,1,10.447402,0.786922,15.547499,...,6.964308,8.58344,10.859942,10.859942,10.859942,12.760641,0.69672,0.69672,0.69672,0.732986


In [24]:
target = train_data_prepared['POWER'] 
params = train_data_prepared.drop('POWER', axis = 1)

In [25]:
params.columns.unique

<bound method Index.unique of Index(['U10', 'V10', 'U100', 'V100', 'hour', 'month', 'abs_speed_10',
       'angle_10', 'abs_speed_100', 'angle_100', 'U50', 'V50', 'abs_speed_50',
       'angle_50', 'avg_6_past_u_10', 'avg_24_past_u_10',
       'avg_30_days_past_u_10', 'avg_5_current_u_10', 'avg_6_past_v_10',
       'avg_24_past_v_10', 'avg_30_days_past_v_10', 'avg_5_current_v_10',
       'avg_6_past_abs_wind_speed_10', 'avg_24_past_abs_wind_speed_10',
       'avg_30_days_past_abs_wind_speed_10', 'avg_5_current_abs_wind_speed_10',
       'avg_6_past_angle_10', 'avg_24_past_angle_10',
       'avg_30_days_past_angle_10', 'avg_5_current_angle_10',
       'avg_6_past_u_100', 'avg_24_past_u_100', 'avg_30_days_past_u_100',
       'avg_5_current_u_100', 'avg_6_past_v_100', 'avg_24_past_v_100',
       'avg_30_days_past_v_100', 'avg_5_current_v_100',
       'avg_6_past_abs_wind_speed_100', 'avg_24_past_abs_wind_speed_100',
       'avg_30_days_past_abs_wind_speed_100',
       'avg_5_current_abs_w

In [26]:
params.columns


Index(['U10', 'V10', 'U100', 'V100', 'hour', 'month', 'abs_speed_10',
       'angle_10', 'abs_speed_100', 'angle_100', 'U50', 'V50', 'abs_speed_50',
       'angle_50', 'avg_6_past_u_10', 'avg_24_past_u_10',
       'avg_30_days_past_u_10', 'avg_5_current_u_10', 'avg_6_past_v_10',
       'avg_24_past_v_10', 'avg_30_days_past_v_10', 'avg_5_current_v_10',
       'avg_6_past_abs_wind_speed_10', 'avg_24_past_abs_wind_speed_10',
       'avg_30_days_past_abs_wind_speed_10', 'avg_5_current_abs_wind_speed_10',
       'avg_6_past_angle_10', 'avg_24_past_angle_10',
       'avg_30_days_past_angle_10', 'avg_5_current_angle_10',
       'avg_6_past_u_100', 'avg_24_past_u_100', 'avg_30_days_past_u_100',
       'avg_5_current_u_100', 'avg_6_past_v_100', 'avg_24_past_v_100',
       'avg_30_days_past_v_100', 'avg_5_current_v_100',
       'avg_6_past_abs_wind_speed_100', 'avg_24_past_abs_wind_speed_100',
       'avg_30_days_past_abs_wind_speed_100',
       'avg_5_current_abs_wind_speed_100', 'avg_6_past_an

In [27]:
   # Split the dataset into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(params,target, test_size=0.1, random_state=42)
xgb_model = xgb.XGBRegressor(base_score=0.5,booster='gbtree',max_depth=4,reg_lambda=15,reg_alpha=0.001,n_estimators=1000, early_stopping_rounds=50, learning_rate=0.01)
    # Initialize an XGBoost model with some hyperparametersxgb_model = xgb.XGBRegressor(n_estimators=1000, early_stopping_rounds=50, learning_rate=0.01)
xgb_model.fit(X_train, Y_train, eval_set=[(X_train, Y_train), (X_test, Y_test)], verbose=100)


[0]	validation_0-rmse:0.31094	validation_1-rmse:0.31347
[100]	validation_0-rmse:0.16642	validation_1-rmse:0.16740
[200]	validation_0-rmse:0.12951	validation_1-rmse:0.12950
[300]	validation_0-rmse:0.11972	validation_1-rmse:0.11962
[400]	validation_0-rmse:0.11565	validation_1-rmse:0.11570
[500]	validation_0-rmse:0.11298	validation_1-rmse:0.11331
[600]	validation_0-rmse:0.11078	validation_1-rmse:0.11147
[700]	validation_0-rmse:0.10921	validation_1-rmse:0.11009
[800]	validation_0-rmse:0.10797	validation_1-rmse:0.10913
[900]	validation_0-rmse:0.10689	validation_1-rmse:0.10832
[999]	validation_0-rmse:0.10579	validation_1-rmse:0.10759


In [28]:
test_data = pd.read_csv('WeatherForecastInput_A.csv')
test_data_prepared = transform_df(test_data)

In [29]:
test_data_prepared.head(5)

Unnamed: 0,TIMESTAMP,U10,V10,U100,V100,hour,month,abs_speed_10,angle_10,abs_speed_100,...,avg_30_days_past_v_50,avg_5_current_v_50,avg_6_past_abs_wind_speed_50,avg_24_past_abs_wind_speed_50,avg_30_days_past_abs_wind_speed_50,avg_5_current_abs_wind_speed_50,avg_6_past_angle_50,avg_24_past_angle_50,avg_30_days_past_angle_50,avg_5_current_angle_50
0,20180201 01:00,9.60919,4.04354,12.6693,5.03529,1,2,10.425294,0.398307,13.633243,...,4.826679,4.620583,12.44443,12.44443,12.44443,12.155111,0.398307,0.398307,0.398307,0.390019
1,20180201 02:00,9.57399,3.77546,12.8291,4.84658,2,2,10.29152,0.375622,13.714049,...,4.666679,4.902218,12.364588,12.364588,12.364588,11.912319,0.386965,0.386965,0.386965,0.427438
2,20180201 03:00,9.07057,3.79365,12.092,4.75025,3,2,9.831939,0.396129,12.991587,...,4.620583,5.147189,12.155111,12.155111,12.155111,11.824989,0.390019,0.390019,0.390019,0.454596
3,20180201 04:00,8.03763,4.81464,11.0656,6.14977,4,2,9.369325,0.539693,12.659667,...,4.902218,5.474982,11.912319,11.912319,11.912319,11.623677,0.427438,0.427438,0.427438,0.49509
4,20180201 05:00,8.12874,5.13294,11.1379,6.57495,5,2,9.613713,0.563228,12.933785,...,5.147189,5.915019,11.824989,11.824989,11.824989,11.516012,0.454596,0.454596,0.454596,0.541503


In [30]:
test_data_prepared.columns

Index(['TIMESTAMP', 'U10', 'V10', 'U100', 'V100', 'hour', 'month',
       'abs_speed_10', 'angle_10', 'abs_speed_100', 'angle_100', 'U50', 'V50',
       'abs_speed_50', 'angle_50', 'avg_6_past_u_10', 'avg_24_past_u_10',
       'avg_30_days_past_u_10', 'avg_5_current_u_10', 'avg_6_past_v_10',
       'avg_24_past_v_10', 'avg_30_days_past_v_10', 'avg_5_current_v_10',
       'avg_6_past_abs_wind_speed_10', 'avg_24_past_abs_wind_speed_10',
       'avg_30_days_past_abs_wind_speed_10', 'avg_5_current_abs_wind_speed_10',
       'avg_6_past_angle_10', 'avg_24_past_angle_10',
       'avg_30_days_past_angle_10', 'avg_5_current_angle_10',
       'avg_6_past_u_100', 'avg_24_past_u_100', 'avg_30_days_past_u_100',
       'avg_5_current_u_100', 'avg_6_past_v_100', 'avg_24_past_v_100',
       'avg_30_days_past_v_100', 'avg_5_current_v_100',
       'avg_6_past_abs_wind_speed_100', 'avg_24_past_abs_wind_speed_100',
       'avg_30_days_past_abs_wind_speed_100',
       'avg_5_current_abs_wind_speed_100', '

In [31]:
test_data_prepared = test_data_prepared.drop(['TIMESTAMP'], axis = 1)
prediction= pd.DataFrame(xgb_model.predict(test_data_prepared), columns = ['FORECAST'])
result = pd.concat([test_data['TIMESTAMP'],prediction], axis = 1)
result.to_csv('result.csv')

In [32]:
result


Unnamed: 0,TIMESTAMP,FORECAST
0,20180201 01:00,0.916935
1,20180201 02:00,0.888870
2,20180201 03:00,0.866106
3,20180201 04:00,0.860340
4,20180201 05:00,0.856565
...,...,...
667,20180228 20:00,0.740165
668,20180228 21:00,0.734581
669,20180228 22:00,0.725845
670,20180228 23:00,0.717028


In [33]:
result.head()

Unnamed: 0,TIMESTAMP,FORECAST
0,20180201 01:00,0.916935
1,20180201 02:00,0.88887
2,20180201 03:00,0.866106
3,20180201 04:00,0.86034
4,20180201 05:00,0.856565
