In [1]:
import re
import os
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb
from scipy import stats
from collections import Counter
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.models import Sequential
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, accuracy_score

In [2]:
data = pd.read_excel("/Users/nithya/Downloads/J comp/data/combined (1) (1).xlsx")

In [3]:
df = data.copy()

In [4]:
month = []
year = []
day = []
hour = []
for y in range(0,len(df)):
    x = df['time(UTC)'][y]
    year.append(x[:4])
    month.append(x[4:6])
    day.append(x[6:8])
    hour.append(x[9:11])
df['Year'] = year
df['Month'] = month
df['Day'] = day
df['Hour'] = hour

In [5]:
df.drop(['time(UTC)', 'Gb(n)', 'Gd(h)', 'IR(h)'], axis = 1, inplace = True)

In [6]:
df.head()

Unnamed: 0,Temp,Humidity,Radiation,WindSpeed,WindDirection,AirPressure,State,Year,Month,Day,Hour
0,19.78,47.65,693.0,1.17,158,90845,arunachal pradesh,2015,1,21,7
1,18.66,43.25,716.0,1.1,6,90712,arunachal pradesh,2015,1,16,6
2,18.0,60.4,89.0,1.66,96,98809,punjab,2015,2,1,12
3,27.58,88.55,606.0,0.21,149,99530,tripura,2015,10,7,5
4,33.38,37.55,247.0,1.59,334,99158,uttar pradesh,2015,10,7,11


In [7]:
input_features = df.drop('Radiation', axis = 1)
target = df['Radiation']

In [8]:
label_encoder = LabelEncoder()
train_Y = label_encoder.fit_transform(target)
input_features['State'] = label_encoder.fit_transform(input_features['State'])

In [9]:
target_cont = df['Radiation'].apply(lambda x : int(x*100))

In [10]:
scaled_input_features = MinMaxScaler().fit_transform(input_features)

In [11]:
xtrain, xtesta, ytrain, ytest = train_test_split(input_features, target, test_size=0.2, random_state=1)

In [12]:
scaler = StandardScaler()
xtrain = scaler.fit_transform(xtrain)
xtest = scaler.transform(xtesta)
xtrain.shape, xtest.shape

((4388, 10), (1098, 10))

In [34]:
#Artificial Neural Network layers
model = None
model = Sequential()
model.add(Dense(128, activation='relu', input_dim=10))
model.add(Dropout(0.33))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(metrics='mse', loss='mae', optimizer=Adam(learning_rate=0.001))
print(model.summary())

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_18 (Dense)            (None, 128)               1408      
                                                                 
 dropout_5 (Dropout)         (None, 128)               0         
                                                                 
 dense_19 (Dense)            (None, 64)                8256      
                                                                 
 dense_20 (Dense)            (None, 32)                2080      
                                                                 
 dense_21 (Dense)            (None, 32)                1056      
                                                                 
 dense_22 (Dense)            (None, 32)                1056      
                                                                 
 dense_23 (Dense)            (None, 1)                

In [41]:
history = model.fit(xtrain, ytrain, validation_split=0.1, epochs=30, batch_size=32)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [42]:
y_pred = model.predict(xtest)



In [43]:
xy = pd.DataFrame(xtesta)

In [44]:
scores = model.evaluate(xtest, ytest)
mae = scores[0]
mse = scores[1]



In [45]:
print('Mean absolute error: ', mae)
print('Mean squared error: ', mse)

Mean absolute error:  95.19105529785156
Mean squared error:  14136.7490234375


In [46]:
r2_score(ytest,y_pred) #R2 score for ANN

0.801418205985877

In [13]:
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators = 1000, random_state = 42)
rf.fit(xtrain, ytrain)

In [48]:
ypredrf = rf.predict(xtest)

In [49]:
r2_score(ytest,ypredrf)

0.8797315772730355

In [50]:
def pred(val):
    pred1 = model.predict(val)
    pred2 = rf.predict(val)
    ret = []
    for i in range(len(pred1)):
        ret.append((pred1[i] + pred2[i])/2)
    return ret

In [51]:
y_predarf = pred(xtest)



In [52]:
r2_score(ytest,y_predarf)

0.8622534109517492

In [14]:
params = {
    'learning_rate': 0.1,
    'max_depth': 6}
xgbmodel = XGBRegressor(**params)
xgbmodel.fit(xtrain, ytrain)

In [16]:
y_predxgb = xgbmodel.predict(xtest)

In [17]:
#Hybrid XGB, RF
def pred(val):
    pred1 = xgbmodel.predict(val)
    pred2 = rf.predict(val)
    ret = []
    for i in range(len(pred1)):
        ret.append((pred1[i] + pred2[i])/2)
    return ret

In [18]:
y_predxgbrf = pred(xtest)

In [20]:
r2_score(ytest,y_predxgbrf)

0.8633238742592064


In [53]:
#sample predictions for ANN, RF
df2 = pd.read_excel("/Users/nithya/Downloads/J comp/test data/state_test.xlsx")

In [54]:
df3 = df2.copy()
df2 = scaler.transform(df2)
y_predsample = pred(df2)



In [55]:
df3['State'] = label_encoder.inverse_transform(df3['State'])

In [56]:
df3['Predicted'] = y_predsample

In [57]:
ytestsample = pd.read_excel("/Users/nithya/Downloads/J comp/test data/ytesta.xlsx")

In [59]:
ytestsample.to_numpy().shape

(28, 1)

In [60]:
df3['Actual'] = ytestsample

In [61]:
df3

Unnamed: 0,Temp,Humidity,WindSpeed,WindDirection,AirPressure,State,Year,Month,Day,Hour,Predicted,Actual
0,30.71,36.15,2.83,55,95423,andhra pradesh,2015,11,12,9,[577.562],659.0
1,19.78,47.65,1.17,158,90845,arunachal pradesh,2015,1,21,7,[645.1732],515.0
2,27.78,75.6,0.55,280,91791,assam,2015,5,11,6,[720.396],395.0
3,22.59,51.2,0.48,260,100999,bihar,2015,1,6,11,[109.82243],102.0
4,28.21,48.6,2.62,59,98252,chhattisgarh,2015,11,10,5,[650.75574],843.0
5,10.79,92.5,1.59,305,98969,delhi,2015,3,12,2,[64.41555],333.0
6,28.29,45.2,0.76,343,100251,goa,2015,1,8,5,[634.0586],620.0
7,29.47,22.95,2.9,60,100685,gujarat,2015,2,4,9,[689.92346],242.0
8,19.54,87.6,1.52,113,98861,haryana,2015,4,13,1,[26.866844],16.0
9,16.18,43.0,1.1,252,78671,himachal pradesh,2015,5,10,9,[841.9659],1081.0


In [62]:
r2_score(ytestsample,y_predsample)

0.37906089769906426