In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

import numpy as np
import math, random
import matplotlib.pyplot as plt

import pandas as pd
from pandas.plotting import scatter_matrix
import h5py, json
import os,time,sys


from importlib import reload

sys.path.append('/content/drive/MyDrive/Colab Notebooks/fidle-master')
import fidle.pwk as pwk

run_dir = '/content/drive/MyDrive/Colab Notebooks'


pd.set_option('display.max_rows',200)

In [None]:
 # ---- About dataset (no need to change)
#
dataset_dir      = '/content/drive/MyDrive/Colab Notebooks'                  # Enhanced dataset is very small, so ./data in a good choice :-)
dataset_filename = 'synop-LYSinterpole.csv'
schema_filename  = 'synopinterpole.json'
features         = ['Température','mois_de_l_annee','Humidité']
features_len     = len(features)

# ---- About training (Can be changed !)
#
scale            = 1        # Percentage of dataset to be used (1=all)
train_prop       = .8       # Percentage for train (the rest being for the test)
sequence_len     = 32
batch_size       = 64
epochs           = 10
fit_verbosity    = 1        # 0 = silent, 1 = progress bar, 2 = one line per epoch

In [None]:
 pwk.override('scale', 'train_prop', 'sequence_len', 'batch_size', 'epochs', 'fit_verbosity')

In [None]:
# ---- Read dataset from ./data

df = pd.read_csv(f'{dataset_dir}/{dataset_filename}', header=0, sep=';')

# ---- Scaling

df = df[:int(scale*len(df))]
train_len=int(train_prop*len(df))
print (train_len)
# ---- Train / Test
dataset_train = df.loc[ :train_len-1, features ]
dataset_test  = df.loc[train_len:,    features ]
pwk.subtitle('Train dataset example :')
display(dataset_train.head(31))

# ---- Normalize, and convert to numpy array

mean = dataset_train.mean()
std  = dataset_train.std()
dataset_train = (dataset_train - mean) / std
dataset_test  = (dataset_test  - mean) / std

pwk.subtitle('After normalization :')
display(dataset_train.describe().style.format("{0:.2f}"))

dataset_train = dataset_train.to_numpy()
dataset_test  = dataset_test.to_numpy()

pwk.subtitle('Shapes :')
print('Dataset       : ',df.shape)
print('Train dataset : ',dataset_train.shape)
print('Test  dataset : ',dataset_test.shape)

In [None]:
# ---- Train generator

train_generator = TimeseriesGenerator(dataset_train, dataset_train, length=sequence_len,  batch_size=batch_size)
test_generator  = TimeseriesGenerator(dataset_test,  dataset_test,  length=sequence_len,  batch_size=batch_size)

# ---- About

pwk.subtitle('About the splitting of our dataset :')

x,y=train_generator[0]
print(f'Nombre de train batchs disponibles : ', len(train_generator))
print('batch x shape : ',x.shape)
print('batch y shape : ',y.shape)

x,y=train_generator[0]
pwk.subtitle('What a batch looks like (x[0]) :')
pwk.np_print(x[0] )
pwk.subtitle('What a batch looks like (y[0]) :')
pwk.np_print(y[0])

In [None]:
model = keras.models.Sequential()
model.add( keras.layers.InputLayer(input_shape=(sequence_len, features_len)) )
model.add( keras.layers.LSTM(100, activation='relu') ) #bon ca doit pas etre normal
model.add( keras.layers.Dropout(0.2) )
model.add( keras.layers.Dense(features_len) )

model.summary()

In [None]:
pwk.mkdir(run_dir)
save_dir = f'{run_dir}/best_model.h10'
bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, save_best_only=True)

In [None]:
 model.compile(optimizer='adam', 
              loss='mse', 
              metrics   = ['mae'] )

In [None]:
 pwk.chrono_start()

history=model.fit(train_generator, 
                  epochs  = epochs, 
                  verbose = fit_verbosity,
                  validation_data = test_generator,
                  callbacks = [bestmodel_callback])

pwk.chrono_show()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Duration :  00:03:08 508ms


In [None]:
 pwk.plot_history(history,plot={'loss':['loss','val_loss'], 'mae':['mae','val_mae']}, save_as='01-history') 

In [None]:
 loaded_model1 = tf.keras.models.load_model(f'{run_dir}/best_model.h5')

A partir de là, on double tout le code pour cette fois faire le modele uniquement sur les données d'une ville


In [None]:
dataset_filename = 'synop-LYSinterpole.csv'
schema_filename  = 'synopinterpole.json'
features         = ['Température','mois_de_l_annee','Humidité']
features_len     = len(features)

# ---- About training (Can be changed !)
#
scale            = 1        # Percentage of dataset to be used (1=all)
train_prop       = .8       # Percentage for train (the rest being for the test)
sequence_len     = 32
batch_size       = 64
epochs           = 10
fit_verbosity    = 1        # 0 = silent, 1 = progress bar, 2 = one line per epoch

In [None]:
 pwk.override('scale', 'train_prop', 'sequence_len', 'batch_size', 'epochs', 'fit_verbosity')

In [None]:
# ---- Read dataset from ./data

df = pd.read_csv(f'{dataset_dir}/{dataset_filename}', header=0, sep=';')
df1 = pd.DataFrame( df.iloc[0])
# ---- Scaling

for i in range (0,len(df),3):
  df1[i//3]=(df.iloc[i])
df1 = df1[:int(scale*len(df1))]
df1=df1.T
train_len1=int(train_prop*len(df1))


# ---- Train / Test
dataset_train1 = df1.loc[ :train_len1-1, features ]
dataset_test1  = df1.loc[train_len1:,    features ]
pwk.subtitle('Train dataset example :')
display(dataset_train1.head(31))

# ---- Normalize, and convert to numpy array

mean1 = dataset_train1.mean()
std1  = dataset_train1.std()
dataset_train1 = (dataset_train1 - mean1) / std1
dataset_test1  = (dataset_test1  - mean1) / std1

pwk.subtitle('After normalization :')
display(dataset_train1.describe().style.format("{0:.2f}"))

dataset_train1 = dataset_train1.to_numpy()
dataset_test1  = dataset_test1.to_numpy()

pwk.subtitle('Shapes :')
print('Dataset       : ',df1.shape)
print('Train dataset : ',dataset_train1.shape)
print('Test  dataset : ',dataset_test1.shape)

In [None]:
# ---- Train generator
sequence_len1=sequence_len//3
batch_size1=2*sequence_len1

#sequence_len1=sequence_len
#batch_size1=batch_size

train_generator1 = TimeseriesGenerator(dataset_train1, dataset_train1, length=sequence_len1,  batch_size=batch_size1)
test_generator1  = TimeseriesGenerator(dataset_test1,  dataset_test1,  length=sequence_len1,  batch_size=batch_size1)

# ---- About

pwk.subtitle('About the splitting of our dataset :')

x,y=train_generator1[0]
print(f'Nombre de train batchs disponibles : ', len(train_generator1))
print('batch x shape : ',x.shape)
print('batch y shape : ',y.shape)

x,y=train_generator1[0]
pwk.subtitle('What a batch looks like (x[0]) :')
pwk.np_print(x[0] )
pwk.subtitle('What a batch looks like (y[0]) :')
pwk.np_print(y[0])
print (batch_size1)

In [None]:
model1 = keras.models.Sequential()
model1.add( keras.layers.InputLayer(input_shape=(sequence_len1, features_len)) )
model1.add( keras.layers.LSTM(100, activation='relu') ) #bon ca doit pas etre normal
model1.add( keras.layers.Dropout(0.2) )
model1.add( keras.layers.Dense(features_len) )

model1.summary()

In [None]:
pwk.mkdir(run_dir)
save_dir1 = f'{run_dir}/best_model1.h5'
bestmodel_callback1 = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir1, verbose=0, save_best_only=True)
print(bestmodel_callback1)


In [None]:
 model1.compile(optimizer='adam', 
              loss='mse', 
              metrics   = ['mae'] )

In [None]:
 pwk.chrono_start()

history1=model1.fit(train_generator1, 
                  epochs  = epochs, 
                  verbose = fit_verbosity,
                  validation_data = test_generator1,
                  callbacks = [bestmodel_callback1])

pwk.chrono_show()

In [None]:
s1=random.randint(0,(len(dataset_test)-sequence_len)//3)
s=3*s1      #comme ca on peut décider où on commence pour la séquence (ici à Barberey)
#sequence      = dataset_test[i in range(s,s+sequence_len,1)]
sequence      = dataset_test[s:s+sequence_len]
#sequence_true = dataset_test[s:s+sequence_len+1]
sequence2=[]
sequence_true2=[]
for i in range (s,s+sequence_len,1):
  sequence2.append(dataset_test[i])
for i in range (s,s+sequence_len+3,3):
  sequence_true2.append(dataset_test[i])

pred = loaded_model1.predict( np.array([sequence2]) )

# ---- Show result
pwk.plot_multivariate_serie(np.array(sequence_true2), predictions=pred, labels=features, save_as='02-prediction-norm')

In [None]:
def denormalize(mean,std,seq):
    nseq = seq.copy()
    for i,s in enumerate(nseq):
        s = s*std + mean
        nseq[i]=s
    return nseq


# ---- Get a sequence
s1=random.randint(0,(len(dataset_test)-sequence_len)//3)
s=3*s1      #comme ca on peut décider où on commence pour la séquence (ici à Barberey)
#sequence      = dataset_test[i:i+sequence_len]
#sequence_true = dataset_test[i:i+sequence_len+1]
sequence2=[]
sequence_true2=[]
for i in range (s,s+sequence_len,1):
  sequence2.append(dataset_test[i])
for i in range (s,s+sequence_len+3,3):
  sequence_true2.append(dataset_test[i])




# ---- Prediction
print (np.array([sequence2]).shape)
pred = loaded_model1.predict( np.array([sequence2]) )


# ---- De-normalization

sequence_true = denormalize(mean,std, np.array(sequence_true2))
pred          = denormalize(mean,std, pred)

# ---- Show it
feat=0 #la feature "température"
feat2=2 #la feature 'humidité"

pwk.plot_multivariate_serie(sequence_true, predictions=pred, labels=features, only_features=[feat],width=14, height=8, save_as='03-prediction')

delta_deg=abs(sequence_true[-1][feat]-pred[-1][feat])

pwk.plot_multivariate_serie(sequence_true, predictions=pred, labels=features, only_features=[feat2],width=14, height=8, save_as='03-prediction')

delta_deg=abs(sequence_true[-1][feat2]-pred[-1][feat])
print(f'Gap between prediction and reality : {delta_deg:.2f} °C')