# Importing Modules

In [73]:
import pandas as pd
import numpy as np
from keras.models import Model
from keras.layers import Input, Dense, ReLU
from keras import layers, Sequential
from tensorflow import matmul, Variable
from tensorflow import math
import warnings
warnings.filterwarnings('ignore')

# Importing Data
- Data is of Rainfall in months of June to September of years 1948 - 2020

In [74]:
import statistics

rain = pd.read_csv("./Data/Rainfall.csv")

lpa = statistics.mean(list(rain['Jun-Sep'][12:41]))
rain_fall_data = list(rain['Jun-Sep'])

rain_data = []
for i in range(len(rain_fall_data)):
  temp = (rain_fall_data[i]/lpa)*100
  rain_data.append(temp)

- Importing Sea-Level Pressure(SLP), Sea Surface Temperature(SST), and Zonal Wind(UWND) Data.

In [75]:
import glob

slp_path  = "./Data/Anomaly/slp/*"
sst_path  = "./Data/Anomaly/sst/*"
uwnd_path = "./Data/Anomaly/uwnd/*"
paths = [slp_path, sst_path, uwnd_path]

months = []
for path in paths:
  temp = []
  for i in glob.glob(path):
    temp.append(i)
  months.append(temp)

- Start and End Index for data (i.e 1948's index and 2000's index)

In [76]:
s = 0
e = 52

- Extracting data of all the months and stacking one above other. 
- 324 variables and 52 data points for SLP and UWND.
- 192 variables and 52 data points for SST.

In [77]:
def get_feature(data):
  new_data = []
  for i in range(len(data)):
    d = data.iloc[i]
    new_data.append(list(d))
  return np.array(new_data)

combine = [[] for _ in range(3)]
for i in range(3):
  for month in range(12):
    month_path = months[i][month]
    month_data = pd.read_csv(month_path, header=[0, 1], index_col=0)
    features = get_feature(month_data)[s:e]
    for feature in features:
      combine[i].append(feature)
  combine[i] = np.array(combine[i])


- Concanating the Data as required.

In [78]:
def get_data(List):
    N = len(List)
    if N == 1:
        return combine[List[0]]
    elif N == 2:
        return np.concatenate((combine[List[0]], combine[List[1]]), axis=1)
    else:
        return np.concatenate((combine[List[0]], combine[List[1]], combine[List[2]]), axis=1)


# Models 

## Auto-Encoders (SLP or UWND)

- 324 - 97 - 323

In [79]:
def get_model_324(n_inputs=324):
    visible = Input(shape=(n_inputs,))
    e = Dense(n_inputs)(visible)
    e = ReLU()(e)
    bottleneck = Dense(97)(e)
    output = Dense(n_inputs, activation='linear')(bottleneck)
    model = Model(inputs=visible, outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

- 97 - 29 - 97

In [80]:
def get_model_97(n_inputs=97):
    visible = Input(shape=(n_inputs,))
    e = Dense(n_inputs)(visible)
    e = ReLU()(e)
    bottleneck = Dense(29)(e)
    output = Dense(n_inputs, activation='linear')(bottleneck)
    model = Model(inputs=visible, outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

- 29 - 9 - 29

In [81]:
def get_model_29(n_inputs=29):
    visible = Input(shape=(n_inputs,))
    e = Dense(n_inputs)(visible)
    e = ReLU()(e)
    bottleneck = Dense(9)(e)
    output = Dense(n_inputs, activation='linear')(bottleneck)
    model = Model(inputs=visible, outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

#### Final Auto Encoder
- 324 - 97 - 29 - 9 - 29 - 97 - 324

In [82]:
def get_model_final_324(n_inputs=324):
    visible = Input(shape=(n_inputs,))
    e = Dense(n_inputs)(visible)
    e = ReLU()(e)
    e = Dense(97)(e)
    e = ReLU()(e)
    e = Dense(29)(e)
    e = ReLU()(e)
    bottleneck = Dense(9)(e)
    e = Dense(29)(bottleneck)
    e = ReLU()(e)
    e = Dense(97)(e)
    e = ReLU()(e)
    output = Dense(n_inputs, activation='linear')(e)
    model = Model(inputs=visible, outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

## Auto-Encoders (SST)

- 192 - 72 - 192

In [83]:
def get_model_192(n_inputs=192):
    visible = Input(shape=(n_inputs,))
    e = Dense(n_inputs)(visible)
    e = ReLU()(e)
    bottleneck = Dense(72)(e)
    output = Dense(n_inputs, activation='linear')(bottleneck)
    model = Model(inputs=visible, outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

- 72 - 21 - 72

In [84]:
def get_model_72(n_inputs=72):
    visible = Input(shape=(n_inputs,))
    e = Dense(n_inputs)(visible)
    e = ReLU()(e)
    bottleneck = Dense(21)(e)
    output = Dense(n_inputs, activation='linear')(bottleneck)
    model = Model(inputs=visible, outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

- 21 - 6 - 21

In [85]:
def get_model_21(n_inputs=21):
    visible = Input(shape=(n_inputs,))
    e = Dense(n_inputs)(visible)
    e = ReLU()(e)
    bottleneck = Dense(6)(e)
    output = Dense(n_inputs, activation='linear')(bottleneck)
    model = Model(inputs=visible, outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

#### Final Auto Encoder
- 192 - 72 - 21 - 6 - 21 - 72 - 192

In [86]:
def get_model_final_192(n_inputs=192):
    visible = Input(shape=(n_inputs,))
    e = Dense(n_inputs)(visible)
    e = ReLU()(e)
    e = Dense(72)(e)
    e = ReLU()(e)
    e = Dense(21)(e)
    e = ReLU()(e)
    bottleneck = Dense(6)(e)
    e = Dense(21)(bottleneck)
    e = ReLU()(e)
    e = Dense(72)(e)
    e = ReLU()(e)
    output = Dense(n_inputs, activation='linear')(e)
    model = Model(inputs=visible, outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

## Auto-Encoders (SST-SLP or SST-UWND)

- 516 - 169 - 516

In [87]:
def get_model_516(n_inputs=516):
    visible = Input(shape=(n_inputs,))
    e = Dense(n_inputs)(visible)
    e = ReLU()(e)
    bottleneck = Dense(169)(e)
    output = Dense(n_inputs, activation='linear')(bottleneck)
    model = Model(inputs=visible, outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

- 169 - 50 - 169

In [88]:
def get_model_169(n_inputs=169):
    visible = Input(shape=(n_inputs,))
    e = Dense(n_inputs)(visible)
    e = ReLU()(e)
    bottleneck = Dense(50)(e)
    output = Dense(n_inputs, activation='linear')(bottleneck)
    model = Model(inputs=visible, outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

- 50 - 15 - 50

In [89]:
def get_model_50(n_inputs=50):
    visible = Input(shape=(n_inputs,))
    e = Dense(n_inputs)(visible)
    e = ReLU()(e)
    bottleneck = Dense(15)(e)
    output = Dense(n_inputs, activation='linear')(bottleneck)
    model = Model(inputs=visible, outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

#### Final Auto Encoder
- 516 - 169 - 50 - 15 - 50 - 169 - 516

In [90]:
def get_model_final_516(n_inputs=516):
    visible = Input(shape=(n_inputs,))
    e = Dense(n_inputs)(visible)
    e = ReLU()(e)
    e = Dense(169)(e)
    e = ReLU()(e)
    e = Dense(21)(e)
    e = ReLU()(e)
    bottleneck = Dense(6)(e)
    e = Dense(21)(bottleneck)
    e = ReLU()(e)
    e = Dense(169)(e)
    e = ReLU()(e)
    output = Dense(n_inputs, activation='linear')(e)
    model = Model(inputs=visible, outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

## Auto-Encoders (SLP-UWND)

- 648 - 194 - 648

In [91]:
def get_model_648(n_inputs=648):
    visible = Input(shape=(n_inputs,))
    e = Dense(n_inputs)(visible)
    e = ReLU()(e)
    bottleneck = Dense(194)(e)
    output = Dense(n_inputs, activation='linear')(bottleneck)
    model = Model(inputs=visible, outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

- 194 - 58 - 194

In [92]:
def get_model_194(n_inputs=194):
    visible = Input(shape=(n_inputs,))
    e = Dense(n_inputs)(visible)
    e = ReLU()(e)
    bottleneck = Dense(58)(e)
    output = Dense(n_inputs, activation='linear')(bottleneck)
    model = Model(inputs=visible, outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

- 58 - 17 - 58

In [93]:
def get_model_58(n_inputs=58):
    visible = Input(shape=(n_inputs,))
    e = Dense(n_inputs)(visible)
    e = ReLU()(e)
    bottleneck = Dense(17)(e)
    output = Dense(n_inputs, activation='linear')(bottleneck)
    model = Model(inputs=visible, outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

#### Final Auto Encoder
- 648 - 194 - 58 - 17 - 58 - 194 - 648

In [94]:
def get_model_final_648(n_inputs=648):
    visible = Input(shape=(n_inputs,))
    e = Dense(n_inputs)(visible)
    e = ReLU()(e)
    e = Dense(194)(e)
    e = ReLU()(e)
    e = Dense(58)(e)
    e = ReLU()(e)
    bottleneck = Dense(17)(e)
    e = Dense(58)(bottleneck)
    e = ReLU()(e)
    e = Dense(194)(e)
    e = ReLU()(e)
    output = Dense(n_inputs, activation='linear')(e)
    model = Model(inputs=visible, outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

## Auto-Encoders (SLP-SST-UWND)

In [95]:
def get_model_840(n_inputs=840):
    visible = Input(shape=(n_inputs,))
    e = Dense(n_inputs)(visible)
    e = ReLU()(e)
    bottleneck = Dense(266)(e)
    output = Dense(n_inputs, activation='linear')(bottleneck)
    model = Model(inputs=visible, outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

- 194 - 58 - 194

In [96]:
def get_model_266(n_inputs=266):
    visible = Input(shape=(n_inputs,))
    e = Dense(n_inputs)(visible)
    e = ReLU()(e)
    bottleneck = Dense(79)(e)
    output = Dense(n_inputs, activation='linear')(bottleneck)
    model = Model(inputs=visible, outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

- 58 - 17 - 58

In [97]:
def get_model_79(n_inputs=79):
    visible = Input(shape=(n_inputs,))
    e = Dense(n_inputs)(visible)
    e = ReLU()(e)
    bottleneck = Dense(24)(e)
    output = Dense(n_inputs, activation='linear')(bottleneck)
    model = Model(inputs=visible, outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

#### Final Auto Encoder
- 840 - 266 - 79 - 24 - 79 - 266 - 840

In [98]:
def get_model_final_840(n_inputs=840):
    visible = Input(shape=(n_inputs,))
    e = Dense(n_inputs)(visible)
    e = ReLU()(e)
    e = Dense(266)(e)
    e = ReLU()(e)
    e = Dense(79)(e)
    e = ReLU()(e)
    bottleneck = Dense(24)(e)
    e = Dense(79)(bottleneck)
    e = ReLU()(e)
    e = Dense(266)(e)
    e = ReLU()(e)
    output = Dense(n_inputs, activation='linear')(e)
    model = Model(inputs=visible, outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

## Get all Models in a 2-D List

|       Outer Most Layer           |        Middle Layer             |      Inner Most Layer         |      Full Model                              |
|----------------------------------|---------------------------------|-------------------------------|----------------------------------------------|
| [ Model SLP  - 324-97-324        |  Model SLP  - 97-29-97          |  Model SLP  - 29-9-29         |  Model SLP  - 324-97-29-9-29-97-324  ]       |
| [ Model UWND - 324-97-324        |   Model UWND - 97-29-97         |   Model UWND - 29-9-29        |   Model UWND - 324-97-29-9-29-97-324 ]       |
| [ Model SST  - 192-72-192        |   Model SST  - 72-21-72         |   Model SST  - 21-6-21        |   Model SST  - 192-72-21-6-21-72-192 ]       |
| [ Model SLP-SST  - 516-169-516   |   Model SLP-SST  - 169-50-169   |   Model SLP-SST  - 50-15-50   |   Model SLP-SST  - 516-169-50-15-50-169-516] |
| [ Model UWND-SST - 516-169-516   |   Model UWND-SST - 169-50-169   |   Model UWND-SST - 50-15-50   |   Model UWND-SST - 516-169-50-15-50-169-516] |
| [ Model SLP-UWND - 648-194-648   |   Model SLP-UWND - 194-58-194   |   Model SLP-UWND - 58-17-58   |   Model SLP-UWND - 648-194-58-17-58-194-648] |

In [99]:
def get_all_model():
    models_slp  = [get_model_324(), get_model_97(), get_model_29(), get_model_final_324()]
    models_uwnd = [get_model_324(), get_model_97(), get_model_29(), get_model_final_324()]
    models_sst  = [get_model_192(), get_model_72(), get_model_21(), get_model_final_192()]
    models_slp_sst  = [get_model_516(), get_model_169(), get_model_50(), get_model_final_516()]
    models_uwnd_sst = [get_model_516(), get_model_169(), get_model_50(), get_model_final_516()]
    models_slp_uwnd = [get_model_648(), get_model_194(), get_model_58(), get_model_final_648()]
    models_all =  [get_model_840(), get_model_266(), get_model_79(), get_model_final_840()]

    models = [models_slp, models_uwnd, models_sst, models_slp_sst, models_uwnd_sst, models_slp_uwnd, models_all]
    return models

In [100]:
models = get_all_model()

In [101]:
for i in models[0]:
    i.summary()

Model: "model_108"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_99 (InputLayer)       [(None, 324)]             0         
                                                                 
 dense_384 (Dense)           (None, 324)               105300    
                                                                 
 re_lu_192 (ReLU)            (None, 324)               0         
                                                                 
 dense_385 (Dense)           (None, 97)                31525     
                                                                 
 dense_386 (Dense)           (None, 324)               31752     
                                                                 
Total params: 168,577
Trainable params: 168,577
Non-trainable params: 0
_________________________________________________________________
Model: "model_109"
________________________________

# Training

In [102]:
models = get_all_model()
model_index = 6
data_index = [0, 1, 2]
data = get_data(data_index)
data.shape

(624, 840)

In [103]:
def get_next_inputs(models, index, inputs):
    model_temp = Model(inputs=models[index].input, outputs=models[index].layers[3].output)
    return model_temp(inputs)

def train_all_models(models, inputs):
    x = inputs
    if type(models) == type([]):
        for i, model in enumerate(models[:-1]):
            model.fit(x, x, epochs=800, batch_size=16, verbose=2, validation_data=(x,x))
            x = get_next_inputs(models, i, x)

        models[-1].layers[2] = models[0].layers[3]
        models[-1].layers[11]= models[0].layers[4]
        models[-1].layers[4] = models[1].layers[3]
        models[-1].layers[9] = models[1].layers[4]
        models[-1].layers[6] = models[2].layers[3]
        models[-1].layers[7] = models[2].layers[4]
        models[-1].compile(optimizer='adam', loss='mse')

        models[-1].fit(inputs, inputs, epochs=800, batch_size=16, verbose=2, validation_data=(inputs,inputs))

def train_perticular_model(models, index, inputs):
    if not index == 0 and not index == -1:
        x = get_next_inputs(models, index-1, inputs)
    else:
        x = inputs
    models[index].fit(x, x, epochs=800, batch_size=16, verbose=2, validation_data=(x,x))

In [104]:
train_all_models(models[model_index], data)
# train_perticular_model(models[model_index], -1, data)

Epoch 1/800
39/39 - 0s - loss: 25.7713 - val_loss: 12.6170 - 484ms/epoch - 12ms/step
Epoch 2/800
39/39 - 0s - loss: 11.0353 - val_loss: 9.2986 - 252ms/epoch - 6ms/step
Epoch 3/800
39/39 - 0s - loss: 8.3443 - val_loss: 7.1761 - 244ms/epoch - 6ms/step
Epoch 4/800
39/39 - 0s - loss: 6.5301 - val_loss: 5.7131 - 240ms/epoch - 6ms/step
Epoch 5/800
39/39 - 0s - loss: 5.3897 - val_loss: 4.8023 - 234ms/epoch - 6ms/step
Epoch 6/800
39/39 - 0s - loss: 4.6278 - val_loss: 4.1657 - 237ms/epoch - 6ms/step
Epoch 7/800
39/39 - 0s - loss: 4.1164 - val_loss: 3.7243 - 242ms/epoch - 6ms/step
Epoch 8/800
39/39 - 0s - loss: 3.7350 - val_loss: 3.4656 - 237ms/epoch - 6ms/step
Epoch 9/800
39/39 - 0s - loss: 3.3961 - val_loss: 3.1001 - 238ms/epoch - 6ms/step
Epoch 10/800
39/39 - 0s - loss: 3.1219 - val_loss: 2.8676 - 247ms/epoch - 6ms/step
Epoch 11/800
39/39 - 0s - loss: 2.8907 - val_loss: 2.6619 - 243ms/epoch - 6ms/step
Epoch 12/800
39/39 - 0s - loss: 2.7049 - val_loss: 2.5248 - 241ms/epoch - 6ms/step
Epoch 13/

# Post Training Treatment

In [105]:
class Custom_Layer(layers.Layer):
    def __init__(self, weights, bias=False):
        super(Custom_Layer, self).__init__()
        self.w = weights
        if bias:
            self.b = bias

    def call(self, inputs):
        try: return matmul(inputs, self.w) + self.b
        except: return matmul(inputs, self.w)

In [106]:
print([[] if i in [1, 3, 4] else None for i in range(7)])

[None, [], None, [], [], None, None]


In [107]:
def std_and_discard(indexes):
    final_models = [models[i][-1] if i in indexes else None for i in range(7)]
    std = [[] if i in indexes else None for i in range(7)]
    for i, model in enumerate(final_models):
        if model is not None:
            for layer in model.layers:
                try: std[i].append(np.array(math.reduce_std(layer.weights[0])) * 2)
                except: std[i].append([])

    for index, model in enumerate(final_models):
        if model is not None:
            final_layers = Sequential([Input(shape=(None, data.shape[1]))])
            for i, layer in enumerate(model.layers):
                try:
                    temp_layer = []
                    for j in layer.weights[0]:
                        temp_weights = []
                        for k in j:
                            if k < std[index][i]: temp_weights.append(0)
                            else:          temp_weights.append(1)  
                        temp_layer.append(temp_weights)
                    weights = Variable(temp_layer, dtype='float32')
                    try: final_layers.add(Custom_Layer(weights, bias=layer.weights[1]))
                    except: final_layers.add(Custom_Layer(weights))
                except IndexError: final_layers.add(layer)

            final_layers.compile(optimizer='adam', loss='mse')
            final_layers.build(input_shape=(None, 324))
            # print(final_layers.summary())

            final_models[index] = final_layers
        
    return final_models

In [108]:
indexes = [model_index]
final_models = std_and_discard(indexes)

In [109]:
predictor_models = [[] if i in indexes else None for i in range(7)]
for i in range(7):
    if predictor_models[i] is not None:
        for j in [3, 5, 7]:
            predictor_models[i].append(Model(inputs=final_models[i].input, outputs=final_models[i].layers[j].output))

# Checking Co-relation

In [110]:
def get_feature(data):
  new_data = []
  for i in range(len(data)):
    d = data.iloc[i]
    new_data.append(list(d[1:]))
  return np.array(new_data)

def predictor(month,mod):
  # print(data_index)
  for i in data_index:
    month_path = months[i][month]
    month_data = pd.read_csv(month_path)
    feature = get_feature(month_data)
    try: features = np.concatenate((features, feature), axis=1)
    except: features = feature
  # print(features)
  # features = np.array(features)
  # print(features.shape)
  pred_m = predictor_models_toUse[mod](features)
  pred_f = pred_m.numpy()
  return pred_f

In [111]:
from sklearn.svm import SVR
from scipy.stats import pearsonr

def get_top_pred(top,pred_info,w_start,w_end):
    pred_f = pred_info
    cor_list = []
    for i in range(len(pred_f[0])):
      score = []
      for j in range(w_start,w_end):
        score.append(pred_f[j][i])
      corr, _ = pearsonr(rain_fall_data[w_start:w_end],score)
      cor_list.append(corr)
    list1=list(enumerate(cor_list))
    list2=sorted(list1, key=lambda x: x[1],reverse=True)
    top_feature_index = []

    for i in range(top):
        index = list2[i][0]
        top_feature_index.append(index)

    predictor = []
    for i in range(len(pred_f)):
        temp = []
        for j in top_feature_index:
            feature = pred_f[i][j]
            temp.append(feature)
        predictor.append(temp)
    
    return predictor


def get_predictors(months,top,w_start,w_end):
  for i in months:
    for mod in range(len(predictor_models_toUse)):
      y_pred = predictor(i,mod)
      b = get_top_pred(top, y_pred, w_start,w_end)
      try:    pred = np.concatenate((pred,b), axis=1)
      except: pred = np.array(b)
  return pred
  

def window_solution(months,top):
  reg = SVR(kernel = 'rbf',C=1.0,epsilon=0.45)
  k=0
  cor_all = []
  for window in range(10,53):
    k+=1
    score = []
    pred = get_predictors(months,top,53-window,53)
    for i in range(14):
      reg.fit(pred[53-window:53+i], rain_fall_data[53-window:53+i])
      
      score.append(reg.predict([pred[53+i]])[0])
    corr, _ = pearsonr(rain_fall_data[53:67],score)
    print("Window size = ",window,"   plcc",corr)
    cor_all.append(corr)
  return min(cor_all),max(cor_all)

In [112]:
# from itertools import combinations
# months_comb = [j for i in range(4) for j in combinations(range(5,9), i+1)]
months_comb = [[4]]
top = 9

predictor_models_toUse = predictor_models[model_index]

minimum, maximum = 1, -1
for month in months_comb:
  for i in range(1,top):
    left, right = window_solution(month,i)
    print("\n\nMonth combination = ",month,"    top = ",i,"   min and max",left,"   ",right)
    if minimum > left:  minimum, min_month, min_top = left, month, i
    if maximum < right: maximum, max_month, max_top = right, month, i
    print(f'Minimum: {minimum}, {min_month}, {min_top}    Maximum: {maximum}, {max_month}, {max_top}\n')

Window size =  10    plcc -0.3686250236717534
Window size =  11    plcc -0.32198375754610253
Window size =  12    plcc -0.5295602744904514
Window size =  13    plcc -0.5366771822234663
Window size =  14    plcc -0.410693717633637
Window size =  15    plcc -0.5366771822234663
Window size =  16    plcc -0.5295602744904514
Window size =  17    plcc -0.32198375754610253
Window size =  18    plcc -0.5295602744904514
Window size =  19    plcc -0.5366771822234663
Window size =  20    plcc -0.5295602744904514
Window size =  21    plcc -0.5374837738116592
Window size =  22    plcc -0.31047074671482255
Window size =  23    plcc -0.5374837738116592
Window size =  24    plcc -0.31047074671482255
Window size =  25    plcc -0.4608020716194253
Window size =  26    plcc -0.4704995396639676
Window size =  27    plcc -0.46334777610198036
Window size =  28    plcc -0.4704995396639676
Window size =  29    plcc -0.46334777610198036
Window size =  30    plcc -0.4704995396639676
Window size =  31    plcc -0.