### Loading in Data

Remember to **ONLY** uncomment and run the next two cells if **USING** Google Colab

In [None]:
# from google.colab import drive
# drive.mount('/content/gdrive/')

In [None]:
# import sys
# sys.path.append('/content/gdrive/MyDrive/ACSE-9')

In [None]:
# Imports
from numpy import *
from math  import *
import sys, os
import numpy as np
!pip install vtk -q
import vtk
import vtktools
import matplotlib.pyplot as plt
import datetime, time

import pprint as pprint

[K     |████████████████████████████████| 59.5 MB 53 kB/s 
[K     |████████████████████████████████| 3.1 MB 44.6 MB/s 
[K     |████████████████████████████████| 495 kB 76.6 MB/s 
[K     |████████████████████████████████| 3.2 MB 47.0 MB/s 
[K     |████████████████████████████████| 74 kB 4.1 MB/s 
[K     |████████████████████████████████| 251 kB 52.8 MB/s 
[K     |████████████████████████████████| 1.3 MB 42.1 MB/s 
[K     |████████████████████████████████| 142 kB 60.9 MB/s 
[K     |████████████████████████████████| 294 kB 74.7 MB/s 
[?25h

In [None]:
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# TensorFlow ≥2.0 is required
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= "2.0"

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# import pandas as pd
import joblib

# Load Data 

Function to load the `.vtu` files 

In [None]:
def loadvtufile(path, name_simu, fieldname, vtu_start, vtu_end, vtu_step):
    '''
    Read in .vtu files for a particular field
    
    Parameters
    ----------
    path : str 
        path to the folder containing the .vtu files
    name_simu : str
        name of the simulation data
    field_name : str
        field to read in e.g. CO2_ppm 
    vtu_start : int
        file number to start reading from  
    vtu_end : int
        file number to read up to
    vtu_step : int
        how many files to step across
    Returns
    -------
    numpy.ndarray 
        Returns the data from a particular field 
    '''
    tic = time.time()
    
    extension = '.vtu'
    
    all_data = []

    #---------------------------------------------------------------------
    # EXTRACT DATA
    #---------------------------------------------------------------------
    for vtuID in range(vtu_start,vtu_end,vtu_step):
        filename=path+name_simu+'_'+str(vtuID)+extension
        print ('\n  '+str(filename))

        vtu_data = vtktools.vtu(filename)
        data     = vtu_data.GetField(fieldname)
        #print (data)
        all_data.append(data)


    toc = time.time() #added 
    print ('\n\nTime : ', toc - tic, 'sec')
    return np.array(all_data)
    #pprint.pprint(data)

In [None]:
#--------------------------------#
#-- Choose variables           --#
#--------------------------------#
# Vtu files
path      = 'ClarenceCentre/run_Clip_ToSend/'
#path = '/content/gdrive/MyDrive/ACSE-9/ClarenceCentre/run_Clip_ToSend/'
name_simu = 'ClarenceCentre'
vtu_start = 0
vtu_end = 410
#vtu_end   = 455
vtu_step  = 1

In [None]:
CO2data = loadvtufile(path, name_simu, 'CO2_ppm', vtu_start, vtu_end, vtu_step)
Udata = loadvtufile(path, name_simu, 'Velocity', vtu_start, vtu_end, vtu_step)


  /content/gdrive/MyDrive/ACSE-9/ClarenceCentre/run_Clip_ToSend/ClarenceCentre_0.vtu

  /content/gdrive/MyDrive/ACSE-9/ClarenceCentre/run_Clip_ToSend/ClarenceCentre_1.vtu

  /content/gdrive/MyDrive/ACSE-9/ClarenceCentre/run_Clip_ToSend/ClarenceCentre_2.vtu

  /content/gdrive/MyDrive/ACSE-9/ClarenceCentre/run_Clip_ToSend/ClarenceCentre_3.vtu

  /content/gdrive/MyDrive/ACSE-9/ClarenceCentre/run_Clip_ToSend/ClarenceCentre_4.vtu

  /content/gdrive/MyDrive/ACSE-9/ClarenceCentre/run_Clip_ToSend/ClarenceCentre_5.vtu

  /content/gdrive/MyDrive/ACSE-9/ClarenceCentre/run_Clip_ToSend/ClarenceCentre_6.vtu

  /content/gdrive/MyDrive/ACSE-9/ClarenceCentre/run_Clip_ToSend/ClarenceCentre_7.vtu

  /content/gdrive/MyDrive/ACSE-9/ClarenceCentre/run_Clip_ToSend/ClarenceCentre_8.vtu

  /content/gdrive/MyDrive/ACSE-9/ClarenceCentre/run_Clip_ToSend/ClarenceCentre_9.vtu

  /content/gdrive/MyDrive/ACSE-9/ClarenceCentre/run_Clip_ToSend/ClarenceCentre_10.vtu

  /content/gdrive/MyDrive/ACSE-9/ClarenceCentre/run_

In [None]:
joblib.dump(CO2data, 'output_files/CO2data_410.pkl')
joblib.dump(Udata, 'output_files/Udata_410.pkl')

In [None]:
CO2data = joblib.load('output_files/CO2data_410.pkl') 
Udata = joblib.load('output_files/Udata_410.pkl')

print("Velocity matrix shape: ", Udata.shape)

Velocity matrix shape:  (410, 148906, 3)


# Standardise the Data 
Using the library `MinMaxScaler` from `sklearn.preprocessing` we implement this for each relevant field $CO_{2}$, $velocity-x$, $velocity-y$, $velocity-z$) </br>
Scaling all between [-1, 1]

In [None]:
nfiles = CO2data.shape[0]
nNodes = CO2data.shape[1]
xdata = Udata[:,:,0]
ydata = Udata[:,:,1]
zdata = Udata[:,:,2]
CO2_data = np.copy(CO2data).reshape((-1, 148906))
print("Snapshot matrix shape: ", CO2_data.shape)

Snapshot matrix shape:  (410, 148906)


In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
scaler_std = MinMaxScaler((-1, 1))
scaler_x = MinMaxScaler((-1, 1))
scaler_y = MinMaxScaler((-1, 1))
scaler_z = MinMaxScaler((-1, 1))

scaler_std.fit(CO2_data)
scaler_x.fit(xdata)
scaler_y.fit(ydata)
scaler_z.fit(zdata)

# Apply transform to both the training set and the test set.
norm_co2 = scaler_std.transform(CO2_data)
norm_x = scaler_x.transform(xdata)
norm_y = scaler_y.transform(ydata)
norm_z = scaler_z.transform(zdata)

print("Standardised CO2 data")
print(norm_co2)
print(norm_co2.max())
print("\nStandardised Velocity data (x, y, z)")
print(norm_x)
print(norm_x.max())
print(norm_y)
print(norm_y.max())
print(norm_z)
print(norm_z.max())

Standardised CO2 data
[[ 1.          1.          1.         ...  0.99348164  1.
   1.        ]
 [ 0.97360818  0.98210026  0.96064208 ...  0.91376529 -0.99998696
  -0.99222618]
 [ 0.63525847 -0.1372478  -0.38702153 ...  0.8247141  -0.9999803
  -0.99097205]
 ...
 [-0.99976513 -0.99991271 -0.99938171 ...  0.7330104   0.68586275
   0.77050793]
 [-0.99974408 -0.99990316 -0.9993286  ...  0.74076259  0.68249343
   0.7751542 ]
 [-0.99972373 -0.99989336 -0.9992791  ...  0.74722864  0.68295058
   0.77891166]]
1.0000000000004547

Standardised Velocity data (x, y, z)
[[ 1.          1.          1.         ...  1.          1.
   1.        ]
 [-0.86328312 -0.84303418 -0.78699626 ... -0.98939588 -0.76506549
  -0.84928302]
 [-0.83570974 -0.83261123 -0.80444392 ... -0.99247996 -0.80450159
  -0.87499324]
 ...
 [-0.97067307 -0.97098008 -0.96601997 ... -0.99635119 -0.93792194
  -0.95824746]
 [-0.97671718 -0.97713371 -0.9729094  ... -0.997468   -0.94774517
  -0.96531361]
 [-0.97761454 -0.97801413 -0.9740071

### Check Scaling
Checking parameters of the scaled fields

In [None]:
print("CO2 mean ", np.sum(norm_co2[:,6])/len(norm_co2[:,6]))
print("x mean ", np.sum(norm_x[:,0])/len(norm_x[:,0]))
print("y mean ", np.sum(norm_y[:,0])/len(norm_y[:,0]))
print("z mean ", np.sum(norm_z[:,0])/len(norm_z[:,0]))
print("  ")
print("CO2 std ", np.std(norm_co2[:,1]))
print("x std ", np.std(norm_x[:,1]))
print("y std ", np.std(norm_y[:,0]))
print("z std ", np.std(norm_z[:,5]))

CO2 mean  -0.5994756942017375
x mean  -0.9433596305642563
y mean  0.41751372007208054
z mean  0.472883095945737
  
CO2 std  0.3708711319083335
x std  0.10303102864913727
y std  0.14977976708411547
z std  0.2455419836147052


# Create Snapshot Matrix
The snapshot matrix $\mathbf{\underline{X}}$:
$$dim (\mathbf{\underline{X}}) = \text{#Snapshots, #TotalNodes}$$ </br>
TotalNodes: #nodes x #dimensions(CO2, U_x, U_y, U_z} </br>
For each timestep, we have $CO_2$ data and velocity data. 

In [None]:
alldata1 = np.concatenate((norm_co2, norm_x), axis=1) #concatenating normalised CO2 data with x-dimension velocity component
alldata2 = np.concatenate((alldata1, norm_y), axis=1) #concatenating alldata1 data with y-dimension velocity component
alldata = np.concatenate((alldata2, norm_z), axis=1) #concatenating alldata2 CO2 data with z-dimension velocity component

In [None]:
# Check the shape of the array
print("initial shape", alldata.shape)

# #reshape st it takes the appropriate form
# all_values = np.array(alldata).reshape(4*nfiles, nNodes)

all_values = alldata
print("Snapshot Matrix shape", all_values.shape)

initial shape (410, 595624)
1.0000000000004547
Snapshot Matrix shape (410, 595624)


In [None]:
import joblib
joblib.dump(all_values, 'output_files/snapshot_matrix_410.pkl')
joblib.dump(scaler_std, 'output_files/scaler_std_410.pkl')
joblib.dump(scaler_x, 'output_files/scaler_x_410.pkl')
joblib.dump(scaler_y, 'output_files/scaler_y_410.pkl')
joblib.dump(scaler_z, 'output_files/scaler_z_410.pkl') 

['/content/gdrive/MyDrive/ACSE-9/output_pod_files/scaler_z_410.pkl']