In [1]:
#Import libraries
import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:
#Download data
data = pd.read_csv("../calls_final.csv")

In [3]:
#Preprocessing
data = data.drop(["Unnamed: 0","Unnamed: 0.1","tmed","tmin","tmax","velmedia","racha"],axis=1)

In [4]:
data["type0"] = data["type0"].astype(np.int32)
data["type1"] = data["type1"].astype(np.int32)
data["type2"] = data["type2"].astype(np.int32)
data["type3"] = data["type3"].astype(np.int32)
data["wd"] = data["wd"].astype("category")
data["prec"] = data["prec"].astype("int").astype("category")

In [5]:
date = data["d"].astype("str")+"-"+data["m"].astype("str")+"-"+(data["y"]+6).astype("str")
date = pd.to_datetime(date, dayfirst=True, format='%d-%m-%Y')
data = data.drop(["d","y",'m'],axis=1)

In [6]:
data = data.drop(["type1","type2","type3"],axis=1)

In [7]:
data = pd.concat([data.drop("wd",axis=1), pd.get_dummies(data['wd'],prefix="wd",drop_first=True)], axis=1)

In [8]:
col = data.columns

n = len(data)
train_0 = data[0:int(n*0.7)]
val_0 = data[int(n*0.7):int(n*0.9)]
test_0 = data[int(n*0.9):]

In [9]:
print(f'Training data: from {min(date[0:int(n*0.7)])} to {max(date[0:int(n*0.7)])}')
print(f'Validation data: from {min(date[int(n*0.7):int(n*0.9)])} to {max(date[int(n*0.7):int(n*0.9)])}')
print(f'Testing data: from {min(date[int(n*0.9):])} to {max(date[int(n*0.9):])}')

Training data: from 2007-09-01 00:00:00 to 2015-06-25 00:00:00
Validation data: from 2015-06-26 00:00:00 to 2017-09-18 00:00:00
Testing data: from 2017-09-19 00:00:00 to 2018-10-31 00:00:00


In [10]:
#Min-max scaler
minx = min(train_0["type0"])
maxx = max(train_0["type0"])

train_0["type0"] = (train_0["type0"] - minx) / (maxx - minx)
val_0["type0"] = (val_0["type0"] - minx) / (maxx - minx)
test_0["type0"] = (test_0["type0"] - minx) / (maxx - minx)
data["type0"] = (data["type0"] - minx) / (maxx - minx)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_0["type0"]=(train_0["type0"]-minx)/(maxx-minx)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  val_0["type0"]=(val_0["type0"]-minx)/(maxx-minx)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_0["type0"]=(test_0["type0"]-minx)/(maxx-minx)


In [11]:
# Window Generator for everything
class WindowGenerator():
    def __init__(self, input_width, label_width, shift,
               train_df=train_0, val_df=val_0, test_df=test_0,
               full_df=data,label_columns=None):
        # Store the raw data.
        self.train_df = train_df
        self.val_df = val_df
        self.test_df = test_df
        self.full_df = full_df
        # Work out the label column indices.
        self.label_columns = label_columns
        if label_columns is not None:
            self.label_columns_indices = {name: i for i, name in
                                        enumerate(label_columns)}
        self.column_indices = {name: i for i, name in
                               enumerate(train_df.columns)}
        # Work out the window parameters.
        self.input_width = input_width #Input of each batch
        self.label_width = label_width #Prediction of each batch
        self.shift = shift
        self.total_window_size = input_width + shift
        self.input_slice = slice(0, input_width)
        self.input_indices = np.arange(self.total_window_size)[self.input_slice]
        self.label_start = self.total_window_size - self.label_width
        self.labels_slice = slice(self.label_start, None)
        self.label_indices = np.arange(self.total_window_size)[self.labels_slice]
    def __repr__(self):
        return '\n'.join([
            f'Total window size: {self.total_window_size}',
            f'Input indices: {self.input_indices}',
            f'Label indices: {self.label_indices}',
            f'Label column name(s): {self.label_columns}'])
def split_window(self, features):
    inputs = features[:, self.input_slice, :]
    labels = features[:, self.labels_slice, :]
    if self.label_columns is not None:
        labels = tf.stack(
            [labels[:, :, self.column_indices[name]] for name in self.label_columns],
            axis=-1)
    # Slicing doesn't preserve static shape information, so set the shapes
    # manually. This way the `tf.data.Datasets` are easier to inspect.
    inputs.set_shape([None, self.input_width, None])
    labels.set_shape([None, self.label_width, None])
    return inputs, labels

WindowGenerator.split_window = split_window


def make_dataset(self, data):
    data = np.array(data, dtype=np.float32)
    ds = tf.keras.preprocessing.timeseries_dataset_from_array(
      data=data,
      targets=None,
      sequence_length=self.total_window_size,
      sequence_stride=1,
      batch_size=28*2,)
    ds = ds.map(self.split_window)
    return ds

WindowGenerator.make_dataset = make_dataset

@property
def train(self):
    return self.make_dataset(self.train_df)

@property
def val(self):
    return self.make_dataset(self.val_df)

@property
def test(self):
    return self.make_dataset(self.test_df)

@property
def full(self):
    return self.make_dataset(self.full_df)

WindowGenerator.train = train
WindowGenerator.val = val
WindowGenerator.test = test
WindowGenerator.full = full


In [12]:
#Modelling
tf.random.set_seed(613)

MAX_EPOCHS = 80
INPUT_WIDTH = 7*3 # Days of input in each batch
OUT_STEPS = 7
LABEL_WIDTH = 7 # Days of predictions
num_features = 8
multi_window = WindowGenerator(input_width=INPUT_WIDTH,
                               label_width=LABEL_WIDTH, shift=OUT_STEPS, train_df=train_0, 
                               val_df=val_0, test_df=test_0,
                                label_columns=["type0"])

In [13]:
#Define model
model = tf.keras.Sequential()


model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(16, 
                             return_sequences=False,
                            #recurrent_dropout = 0.1,
                             #dropout = 0.05,
                                activation="relu")))

model.add(tf.keras.layers.Dense(OUT_STEPS*1,
                          kernel_initializer=tf.initializers.zeros))

model.add(tf.keras.layers.Reshape([OUT_STEPS, 1]))

In [14]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                    patience=2,
                                                    mode='min')
model.compile(loss=tf.losses.MeanAbsoluteError(),
                optimizer=tf.optimizers.Adam(),
                metrics=[tf.metrics.MeanAbsoluteError()])
#Model training
model.fit(multi_window.train, epochs=MAX_EPOCHS,
                      validation_data=multi_window.val,
         callbacks=[early_stopping])

Input indices: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
Label indices: [21 22 23 24 25 26 27]
Label column name(s): ['type0']> and will run it as-is.
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Input indices: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
Label indices: [21 22 23 24 25 26 27]
Label column name(s): ['type0']> and will run it as-is.
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/

Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80


In [16]:
#Save training predictions
predtrain0 = model.predict(multi_window.train)

for i in range(predtrain0.shape[0]):
    predtrain0[i] = predtrain0[i] * (maxx - minx) + minx
    for j in range(predtrain0.shape[1]):
        predtrain0[i][j][0] = np.round(predtrain0[i][j][0])

for i in range(predtrain0.shape[1]):
    df1 = pd.DataFrame(predtrain0[:,i,:], columns=["type0"])
    df2 = pd.DataFrame(date.iloc[list(range(21, (int(n*0.7) - 6)))][:]).reset_index()
    df1.join(df2).to_csv('.\output\out-train' + str(i + 1) + 't0.csv')

In [17]:
#Save validation predictions
predval0 = model.predict(multi_window.val)

for i in range(predval0.shape[0]):
    predval0[i] = predval0[i] * (maxx - minx) + minx
    for j in range(predval0.shape[1]):
        predval0[i][j][0] = np.round(predval0[i][j][0])

for i in range(predval0.shape[1]):
    df1 = pd.DataFrame(predval0[:,i,:], columns=["type0"])
    df2 = pd.DataFrame(date.iloc[list(range(21, (int(n*0.7) - 6)))][:]).reset_index()
    df1.join(df2).to_csv('.\output\out-val' + str(i + 1) + 't0.csv')

In [18]:
#Save testing predictions
predtest0 = model.predict(multi_window.test)

for i in range(predtest0.shape[0]):
    predtest0[i] = predtest0[i] * (maxx - minx) + minx
    for j in range(predtest0.shape[1]):
        predtest0[i][j][0] = np.round(predtest0[i][j][0])

for i in range(predtest0.shape[1]):
    df1 = pd.DataFrame(predtest0[:,i,:], columns=["type0"])
    df2 = pd.DataFrame(date.iloc[list(range(21 + int(n*0.9), (len(data) - 6)))][:]).reset_index()
    df1.join(df2).to_csv('.\output\out-test' + str(i + 1) + 't0.csv')

In [19]:
#Save predictions of full dataset
predfull0 = model.predict(multi_window.full)

for i in range(predfull0.shape[0]):
    predfull0[i] = predfull0[i] * (maxx - minx) + minx
    for j in range(predfull0.shape[1]):
        predfull0[i][j][0] = np.round(predfull0[i][j][0])

for i in range(predfull0.shape[1]):
    df1 = pd.DataFrame(predfull0[:,i,:], columns=["type0"])
    df2 = pd.DataFrame(date.iloc[list(range(21, (len(data) - 6)))][:]).reset_index()
    df1.join(df2).to_csv('.\output\out-full' + str(i + 1) + 't0.csv')

### New scenarios

In [22]:
date_scenarios = date.iloc[int(n*0.9):]

In [23]:
aux_date = pd.Series([date[len(date)-1] + pd.to_timedelta(i, unit='D') for i in range(1, 8)])
date_scenarios = pd.concat([date_scenarios, aux_date], ignore_index=True)

#### Scenario 1- No rain

In [110]:
test_01 = test_0.copy()

In [111]:
test_01.iloc[test_01.shape[0]-14: test_01.shape[0], 1] = 0

In [112]:
test_01.iloc[test_01.shape[0]-21: test_01.shape[0], 1]

4058    1
4059    1
4060    1
4061    0
4062    1
4063    0
4064    0
4065    0
4066    0
4067    0
4068    0
4069    0
4070    0
4071    0
4072    0
4073    0
4074    0
4075    0
4076    0
4077    0
4078    0
Name: prec, dtype: category
Categories (2, int64): [0, 1]

In [113]:
aux1 = pd.DataFrame({
    'type0': [-9999999, -99999999, -99999999, -99999999, -99999999, -99999999, -99999999],
    'prec': [0, 0, 0, 0, 0, 0, 0],
    'wd_mon': [0, 0, 0, 0, 1, 0, 0],
    'wd_sat': [0, 0, 1, 0, 0, 0, 0],
    'wd_sun': [0, 0, 0, 1, 0, 0, 0],
    'wd_thu': [1, 0, 0, 0, 0, 0, 0],
    'wd_tue': [0, 0, 0, 0, 0, 1, 0],
    'wd_wed': [0, 0, 0, 0, 0, 0, 1]
})

test_01 = pd.concat([test_01, aux1], ignore_index=True)

In [114]:
test_01.iloc[test_01.shape[0]-21: test_01.shape[0], 1]

394    0
395    0
396    0
397    0
398    0
399    0
400    0
401    0
402    0
403    0
404    0
405    0
406    0
407    0
408    0
409    0
410    0
411    0
412    0
413    0
414    0
Name: prec, dtype: int64

In [115]:
impact_scenario_1 = WindowGenerator(input_width=INPUT_WIDTH,
                               label_width=LABEL_WIDTH, shift=OUT_STEPS, test_df=test_01,
                              label_columns=["type0"])

In [116]:
predtest01=model.predict(impact_scenario_1.test)

for i in range(predtest01.shape[0]):
    predtest01[i]=predtest01[i]*(maxx-minx)+minx
    for j in range(predtest01.shape[1]):
        predtest01[i][j][0]=np.round(predtest01[i][j][0])

for i in range(predtest01.shape[1]):
    df1=pd.DataFrame(predtest01[:,i,:],columns=["type0"])
    df2=pd.DataFrame(date_scenarios.iloc[list(range(21,(len(date_scenarios)-6)))][:]).reset_index()
    df1.join(df2).to_csv('.\output\out-test-scenario1'+str(i+1)+'t0.csv')

## Scenario 2- Heavy rain

In [117]:
test_02 = test_0.copy()

In [118]:
test_02.iloc[test_02.shape[0]-21: test_02.shape[0], 1] 

4058    1
4059    1
4060    1
4061    0
4062    1
4063    0
4064    0
4065    1
4066    1
4067    1
4068    1
4069    0
4070    0
4071    0
4072    0
4073    1
4074    1
4075    0
4076    1
4077    1
4078    1
Name: prec, dtype: category
Categories (2, int64): [0, 1]

In [119]:
test_02.iloc[test_02.shape[0]-14: test_02.shape[0], 1] = 1

In [120]:
test_02.iloc[test_02.shape[0]-21: test_02.shape[0], 1]

4058    1
4059    1
4060    1
4061    0
4062    1
4063    0
4064    0
4065    1
4066    1
4067    1
4068    1
4069    1
4070    1
4071    1
4072    1
4073    1
4074    1
4075    1
4076    1
4077    1
4078    1
Name: prec, dtype: category
Categories (2, int64): [0, 1]

In [121]:
aux2 = pd.DataFrame({
    'type0': [0, 0, 0, 0, 0, 0, 0],
    'prec': [1, 1, 1, 1, 1, 1, 1],
    'wd_mon': [0, 0, 0, 0, 1, 0, 0],
    'wd_sat': [0, 0, 1, 0, 0, 0, 0],
    'wd_sun': [0, 0, 0, 1, 0, 0, 0],
    'wd_thu': [1, 0, 0, 0, 0, 0, 0],
    'wd_tue': [0, 0, 0, 0, 0, 1, 0],
    'wd_wed': [0, 0, 0, 0, 0, 0, 1]
})

test_02 = pd.concat([test_02, aux2], ignore_index=True)

In [122]:
impact_scenario_2 = WindowGenerator(input_width=INPUT_WIDTH,
                               label_width=LABEL_WIDTH, shift=OUT_STEPS, test_df=test_02,
                              label_columns=["type0"])

In [123]:
predtest02=model.predict(impact_scenario_2.test)

for i in range(predtest02.shape[0]):
    predtest02[i]=predtest02[i]*(maxx-minx)+minx
    for j in range(predtest02.shape[1]):
        predtest02[i][j][0]=np.round(predtest02[i][j][0])

for i in range(predtest02.shape[1]):
    df1=pd.DataFrame(predtest02[:,i,:],columns=["type0"])
    df2=pd.DataFrame(date_scenarios.iloc[list(range(21,(len(date_scenarios)-6)))][:]).reset_index()
    df1.join(df2).to_csv('.\output\out-test-scenario2'+str(i+1)+'t0.csv')