In [None]:
# Standard imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import Sequential
from tensorflow import keras
from tensorflow.keras.layers import Dense, Input, Dropout
from tensorflow import feature_column
from sklearn.preprocessing import StandardScaler

## Steps we will follow in this notebook

* Download the data
* Become one with the data
* Preprocess the data
* Create a model
* Compile the model
* Fit the model 
* Evaluate the model

In [None]:
# Import the train data
train = pd.read_csv("https://raw.githubusercontent.com/amulyaprasanth/Wipro-sustainability-Machine-Learning-Model/main/train.csv?token=GHSAT0AAAAAABQFXU7ALXBH52PHIX2WJAGMYP2OARQ")
train.head()

Unnamed: 0,Year,Month,Day,Hour,Minute,Clearsky DHI,Clearsky DNI,Clearsky GHI,Cloud Type,Dew Point,Temperature,Pressure,Relative Humidity,Solar Zenith Angle,Precipitable Water,Wind Direction,Wind Speed,Fill Flag
0,2009,1,1,0,0,0,0,0,0,0.0,5.0,1010,75.34,106.15,0.499,346.1,3.1,0
1,2009,1,1,0,30,0,0,0,0,1.0,5.0,1010,80.81,112.28,0.49,346.1,3.1,0
2,2009,1,1,1,0,0,0,0,4,0.0,5.0,1010,78.27,118.5,0.482,347.9,3.2,0
3,2009,1,1,1,30,0,0,0,4,0.0,4.0,1010,78.27,124.78,0.478,347.9,3.1,0
4,2009,1,1,2,0,0,0,0,4,0.0,4.0,1010,76.45,131.12,0.475,350.0,3.0,0


In [None]:
# Import the test data
test = pd.read_csv("https://raw.githubusercontent.com/amulyaprasanth/Wipro-sustainability-Machine-Learning-Model/main/test.csv?token=GHSAT0AAAAAABQFXU7AEO63CT7XAEELZYL6YP2OBPA")
test.head()

Unnamed: 0,Year,Month,Day,Hour,Minute,Cloud Type,Dew Point,Temperature,Pressure,Relative Humidity,Solar Zenith Angle,Precipitable Water,Wind Direction,Wind Speed,Fill Flag,Clearsky DHI,Clearsky DNI,Clearsky GHI
0,2019,1,1,0,0,7,18.4,18.8,1008,97.7,106.23,3.5,190,2.3,0,,,
1,2019,1,1,0,30,3,18.4,18.6,1008,98.92,112.36,3.5,187,2.5,0,,,
2,2019,1,1,1,0,3,18.2,18.5,1008,98.35,118.58,3.5,184,2.8,0,,,
3,2019,1,1,1,30,3,18.2,18.3,1008,99.58,124.86,3.5,185,3.0,0,,,
4,2019,1,1,2,0,0,18.0,18.0,1008,99.71,131.2,3.6,186,3.1,0,,,


In [None]:
# These are the target variables which we have to predict
target_labels = ["Clearsky DHI", "Clearsky DNI", "Clearsky GHI"]

## Become one with the data

In [None]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 175296 entries, 0 to 175295
Data columns (total 18 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   Year                175296 non-null  int64  
 1   Month               175296 non-null  int64  
 2   Day                 175296 non-null  int64  
 3   Hour                175296 non-null  int64  
 4   Minute              175296 non-null  int64  
 5   Clearsky DHI        175296 non-null  int64  
 6   Clearsky DNI        175296 non-null  int64  
 7   Clearsky GHI        175296 non-null  int64  
 8   Cloud Type          175296 non-null  int64  
 9   Dew Point           175296 non-null  float64
 10  Temperature         175296 non-null  float64
 11  Pressure            175296 non-null  int64  
 12  Relative Humidity   175296 non-null  float64
 13  Solar Zenith Angle  175296 non-null  float64
 14  Precipitable Water  175296 non-null  float64
 15  Wind Direction      175296 non-nul

In [None]:
test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17520 entries, 0 to 17519
Data columns (total 18 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Year                17520 non-null  int64  
 1   Month               17520 non-null  int64  
 2   Day                 17520 non-null  int64  
 3   Hour                17520 non-null  int64  
 4   Minute              17520 non-null  int64  
 5   Cloud Type          17520 non-null  int64  
 6   Dew Point           17520 non-null  float64
 7   Temperature         17520 non-null  float64
 8   Pressure            17520 non-null  int64  
 9   Relative Humidity   17520 non-null  float64
 10  Solar Zenith Angle  17520 non-null  float64
 11  Precipitable Water  17520 non-null  float64
 12  Wind Direction      17520 non-null  int64  
 13  Wind Speed          17520 non-null  float64
 14  Fill Flag           17520 non-null  int64  
 15  Clearsky DHI        0 non-null      float64
 16  Clea

#### Splitting the training data into train and validation sets


In [None]:
# First we split the training data into training and validation sets
X_train, y_train, X_valid, y_valid = train.drop(target_labels, axis=1)[:156000], train[target_labels][:156000], train.drop(target_labels, axis=1)[156000:], train[target_labels][156000:]

In [None]:
X_train.head()

Unnamed: 0,Year,Month,Day,Hour,Minute,Cloud Type,Dew Point,Temperature,Pressure,Relative Humidity,Solar Zenith Angle,Precipitable Water,Wind Direction,Wind Speed,Fill Flag
90469,2014,2,28,18,30,0,2.0,13.0,1000,49.5,40.89,0.634,84.6,1.4,0
20090,2010,2,23,13,0,0,7.0,8.0,990,97.39,80.4,1.048,312.5,3.7,0
20170,2010,2,25,5,0,0,0.0,0.0,1000,95.49,154.65,0.429,312.3,3.4,1
126391,2016,3,18,3,30,6,18.0,18.0,1000,100.0,135.79,3.871,230.6,1.7,0
39700,2011,4,8,2,0,0,14.0,16.0,1000,92.57,115.24,2.482,169.5,1.2,0


In [None]:
y_train

Unnamed: 0,Clearsky DHI,Clearsky DNI,Clearsky GHI
90469,103,972,838
20090,57,405,125
20170,0,0,0
126391,0,0,0
39700,0,0,0
...,...,...,...
119879,0,0,0
103694,0,0,0
131932,118,741,598
146867,168,840,989


In [None]:
X_train.shape[-1:]

(15,)

In [None]:
# preprocessing the test data
test_data = test.drop(target_labels, axis=1)

In [None]:
# Normalising our data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
test_data = scaler.transform(test_data)

In [None]:
X_train.shape[-1]

15

## This is our baseline model 

Model is  stored as tensorflow_model0 in google Drive

In [None]:
# Set random seed
tf.random.set_seed(42)

# Create the model
model = Sequential()
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(3))

# Compile the model
model.compile(loss="mean_squared_error",
            optimizer='adam',
            metrics=['mean_squared_error'])

# Fit data to model
model.fit(X_train, y_train,
        batch_size=32,
        epochs=100,
        validation_data=[X_valid, y_valid],
        callbacks=[tf.keras.callbacks.EarlyStopping(patience=5)])

# Generate generalization metrics
score = model.evaluate(X_valid, y_valid, verbose=0)
print(f'Test loss: {score[0]} / Test accuracy: {score[1]}')

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100

KeyboardInterrupt: ignored

## Experimenatation

Now we perform all types of experiments in this section to beat our baseline

### Model_1: Let's create a neural network with CNN 1D layers

#### Creating a submissions file

In [None]:
# Predicting on test data
preds = model.predict(test_data)

pred1 = preds[:, 0]
pred2 = preds[:, 1]
pred3 = preds[:, 2]

In [None]:
submissions = pd.DataFrame()

In [None]:
submissions['Clearsky DHI'] = pred1
submissions['Clearsky DNI'] = pred2
submissions['Clearsky GHI'] = pred3

In [None]:
submissions.to_csv("tensorflow_model_results.csv", index=False)