In [None]:
import tensorflow as tf

import os
import tensorflow_datasets as tfds

resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')
tf.config.experimental_connect_to_cluster(resolver)
# This is the TPU initialization code that has to be at the beginning.
tf.tpu.experimental.initialize_tpu_system(resolver)

<tensorflow.python.tpu.topology.Topology at 0x7fbbc53f3820>

In [None]:
import math
import matplotlib.pyplot as plt
import numpy as np
from numpy import genfromtxt
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.python.client import device_lib 
from keras import layers
from keras import Model
from keras.models import Sequential
from keras.layers import Layer
from keras.layers import concatenate
from keras.layers import ReLU
from keras.layers import Concatenate
from keras.layers import Conv1D
from keras.layers import GlobalAveragePooling1D
from keras.layers import MaxPooling1D
from keras.layers import MultiHeadAttention
from keras.layers import Dropout
from keras.layers import Dense
from keras.layers import GRU
from keras.layers import LayerNormalization
from keras.layers import LSTM
from keras.layers import TimeDistributed
from keras.layers import Flatten
from keras.backend import dot
from keras.backend import sin
from keras.backend import reshape
from keras.backend import flatten
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import accuracy_score
from sklearn.metrics import r2_score
from sklearn.preprocessing import minmax_scale


def retrieve_data(filename):
    # Remove redundant features if they are found
    data = pd.read_csv(filename) 
    data = data.drop(['open-time','close-time'], axis=1, errors='ignore')
    # Variables: ['open-price', 'high-price','low-price', 'close-price', 'volume','quote-asset-volume', 'number-of-trades', 'taker-buy-base-asset-volume', 'taker-buy-quote-asset-volume']
    return data.to_numpy()


def scaleAndFilterData(data, testsize=0.1):
    # Split the train and test data
    split_index = math.floor((1-testsize)*len(data))
    traindata = data[:split_index]
    testdata = data[split_index:]

    # Perform minmax-scaling (separately for train and test data)
    traindata_scaled = minmax_scale(traindata)
    testdata_scaled = minmax_scale(testdata)
    
    return traindata, testdata, traindata_scaled, testdata_scaled


def convertDataFormat(data, timeframe:int=24): #if want day timeframe to train, put in 24 for timeframe, training on 23 hours and predicting close price of the 24th hour
    iterations = data.shape[0]-timeframe+1
    x_data = np.zeros((iterations,timeframe-1,data.shape[1]))
    y_data = np.zeros((iterations))
    for i in range(iterations):
        x_data[i] = data[i:i+timeframe-1,:]
        y_data[i] = data[i+timeframe-1, 3] # 'close-price'
    return x_data, y_data


class multi_Head_Attention_encoder(Layer): #this only builds an encoder

    def __init__(self, dropout, amount_of_heads, size_of_head,output_dim, **kwargs):
        super(multi_Head_Attention_encoder,self).__init__(**kwargs)
        self.dropout = dropout
        self.amount_of_heads= amount_of_heads 
        self.size_of_head = size_of_head
        self.output_dim = output_dim

    def build(self, input_shape):
        self.multi_Attention = MultiHeadAttention(key_dim=self.size_of_head, num_heads=self.amount_of_heads, value_dim= self.size_of_head, dropout=self.dropout,   attention_axes= (1,2))
        self.norm_att = LayerNormalization() #can set epsiolon, standard = 0.001
        self.feed_forward = LSTM(self.output_dim,activation='tanh', use_bias=True, return_sequences=True) #must be as same as features
        self.feed_forward_2 = LSTM(self.output_dim*4,activation='tanh', use_bias=True, return_sequences=True) #must be as same as features
        self.feed_forward_3 = LSTM(self.output_dim,activation='tanh', use_bias=True, return_sequences=True) #must be as same as features
        self.dropout_ff = Dropout(self.dropout)
        self.norm_ff = LayerNormalization() #can set epsilon, standard = 0.001
        super(multi_Head_Attention_encoder, self).build(input_shape)
    
    def call(self, inputs, training = None):
        attention_output = self.multi_Attention(inputs, inputs, training = training)
        normalization_output = self.norm_att(inputs+attention_output)
        forward = self.feed_forward(normalization_output)
        forward = self.dropout_ff(forward, training = training)
        forward = self.feed_forward_2(forward)
        forward = self.dropout_ff(forward, training = training)
        forward = self.feed_forward_3(forward)
        forward = self.norm_ff(forward+normalization_output)
        return forward
    
class multi_head_attention_decoder(Layer):

    def __init__(self, dropout, amount_of_heads, size_of_head, output_dim, **kwargs):
        super(multi_head_attention_decoder,self).__init__(**kwargs)
        self.dropout = dropout
        self.amount_of_heads= amount_of_heads 
        self.size_of_head = size_of_head
        self.output_dim = output_dim
    
    def build(self, input_shape):
        self.multi_Attention_1 = MultiHeadAttention(key_dim=self.size_of_head, num_heads=self.amount_of_heads, value_dim= self.size_of_head, dropout=self.dropout)
        self.norm_att_1 = LayerNormalization() #can set epsiolon, standard = 0.001
        self.multi_Attention = MultiHeadAttention(key_dim=self.size_of_head, num_heads=self.amount_of_heads, value_dim= self.size_of_head, dropout=self.dropout,  attention_axes= (1,2))
        self.norm_att = LayerNormalization() #can set epsiolon, standard = 0.001
        self.feed_forward = LSTM(self.output_dim,activation='tanh', return_sequences=True) #must be as same as features
        self.feed_forward_2 = LSTM(self.output_dim*4,activation='tanh', use_bias=True, return_sequences=True) #must be as same as features
        self.feed_forward_3 = LSTM(self.output_dim,activation='tanh', use_bias=True, return_sequences=True) #must be as same as features
        self.dropout_ff = Dropout(self.dropout)
        self.norm_ff = LayerNormalization()
        super(multi_head_attention_decoder, self).build(input_shape)   

    def call(self, inputs, training = None):
        encoder_input, target = inputs
        attention_output_1 = self.multi_Attention(query =target, key = target, value = target, training = training)
        norm_output_1 = self.norm_att(attention_output_1+target)
        attention_output = self.multi_Attention(query =encoder_input, key = norm_output_1, value = norm_output_1, training = training)
        norm_output = self.norm_att(attention_output+norm_output_1)  
        forward =self.feed_forward(norm_output)
        forward = self.dropout_ff(forward, training = training)
        forward = self.feed_forward_2(forward)
        forward = self.dropout_ff(forward, training = training)
        forward = self.feed_forward_3(forward)
        forward = self.norm_ff(forward+norm_output)
        return forward

class Linear(Layer):

    def __init__(self, dim_list, dropout,  **kwargs):
        super(Linear,self).__init__(**kwargs)
        self.dim_list = dim_list
        self.dropout = dropout
        self.dense_layers = []

    def build(self, input_shape):
        for i in self.dim_list:
            self.dense_layers.append(Dense(i,activation='linear')  )
        self.dropout_ff = Dropout(0.004)
        super(Linear, self).build(input_shape)
    
    def call(self, inputs, training = None):
        forward = inputs
        for layer in self.dense_layers[:-1]:
            forward = layer(forward)
            forward = self.dropout_ff(forward, training = training)
        last_layer = self.dense_layers[-1]
        return last_layer(forward)

class Time2Vec(Layer):

    def __init__(self, k, **kwargs):
        self.k = k
        super(Time2Vec, self).__init__(**kwargs)
        
    def build(self, input_shape):
        #times the input, so amount of rows of w must be equal to amount of colums of input
        self.w = self.add_weight(shape=(input_shape[-1], self.k), initializer='uniform',trainable=True)#weights if i>0 
        self.fi = self.add_weight(shape=(input_shape[1],self.k),initializer='uniform',trainable=True)#weights if i>0     
        self.w0 = self.add_weight(shape=(input_shape[-1],1),initializer='uniform', trainable=True) #weights for i=0
        self.fi0 = self.add_weight(shape=(input_shape[1],1),initializer='uniform', trainable=True) #weights for i=0     
        super(Time2Vec, self).build(input_shape)
        
    def call(self, inputs):
        first_entry = tf.matmul(inputs,self.w0) +self.fi0
        rest_of_time_vector = tf.matmul(inputs,self.w) + self.fi
        output = sin(rest_of_time_vector)
        return_value = concatenate([first_entry,output], -1)
        #also flatten output? but what is the point then of the timedistributed layer
        return return_value

class BuildModel(Model):

    def __init__(self, k, amount_of_layers_encoder, amount_of_layers_decoder, dropout,amount_of_heads,size_of_head, **kwargs): #ff dim must be equal to amount of features to work
        #k=5? because hour, day, week,month, year to identify time? dimension for each thing
        super().__init__(**kwargs)
        self.time2vec_layer_encode = Time2Vec(k)
        self.time2vec_layer_decode = Time2Vec(k)
        self.attention_layers_encoder = []
        self.attention_layers_decoder = []
        for _ in range(amount_of_layers_encoder):
            self.attention_layers_encoder.append(multi_Head_Attention_encoder(dropout,amount_of_heads,size_of_head, output_dim = k+10))
        for _ in range(amount_of_layers_decoder):
            self.attention_layers_decoder.append(multi_head_attention_decoder(dropout,amount_of_heads,size_of_head, output_dim = k+10))
        self.linear_layer = Linear(dim_list= [1], dropout= dropout)  
        self.flatten = Flatten()    
        self.concatenate = Concatenate()

    def call(self, inputs, training=None):
        input,target = inputs
        #timevector embedding encoder input
        time_vector_embedded_encode =self.time2vec_layer_encode(input)
        time_vector_embedded_encode = self.concatenate([input,time_vector_embedded_encode])
        #timevector embedding decoder input
        time_vector_embedded_decode =self.time2vec_layer_decode(target)
        time_vector_embedded_decode = self.concatenate([target,time_vector_embedded_decode])
        #pass input forward trhough first all encoder layers
        attention_encoder_input = time_vector_embedded_encode
        for layer_encode in self.attention_layers_encoder:
            attention_encoder_input = layer_encode(attention_encoder_input)
        attention_decoder_input = time_vector_embedded_decode
        for layer_decoder in self.attention_layers_decoder:
            attention_decoder_input = layer_decoder((attention_encoder_input, attention_decoder_input))
        
        flattened_output = self.flatten(attention_decoder_input)
        output = self.linear_layer(flattened_output)
        return output


def grid_search_transformer(x_train, target_train, y_train, x_val, target_val, y_val, epochs, params):
    layers = params['layers']
    k_vals = params['k']
    nr_heads = params['nr_heads']
    size_heads = params['size_heads']
    dropout = params['dropout']
    predictions = {}
    for la in layers:
      for he in nr_heads:
        for siz in size_heads:
          for dr in dropout:
            for k in k_vals:
              print('Building model with: layers' + str(la)
              + ', nr_heads=' + str(he)
              + ', size_heads=' + str(siz)
              + ', dropout=' + str(dr)
              + ', k=' + str(k) )
              model = BuildModel(k=k, amount_of_layers_encoder=la, 
                                  amount_of_layers_decoder=la, dropout=dr,
                                  amount_of_heads=he, size_of_head=siz)
              model.compile(optimizer = 
                            keras.optimizers.Adam(learning_rate = 1e-3), 
                            loss = keras.losses.mse)
              model.fit((x_train, target_train), y_train,
                        batch_size=64, epochs=epochs, verbose=1,
                        validation_data=((x_val, target_val), y_val))
              y_pred = model.predict((x_val, target_val))

              predictions[
                      'la_' + str(la) 
              + '_' + 'he_' + str(he) 
              + '_' + 'siz_' + str(siz) 
              + '_' + 'dr_' + str(dr) 
              + '_' + 'k_' + str(k) 
              ] = y_pred
              print('mean abs perc error of: ' 
                    + str(mean_absolute_percentage_error(y_pred.flatten(), y_val)))
    return predictions


def grid_search_rnn(x_train, y_train, x_val, y_val, epochs, params):
    dropout = params['dropout']
    predictions = {}
    for dr in dropout:
      print('Building model with: dropout=' + str(dr))
      model = Sequential()
      model.add(GRU(64, input_shape=(x_train.shape[1], x_train.shape[2]), 
                    return_sequences=True, dropout=dr))
      model.add(GRU(16, return_sequences=False))
      model.add(Dense(units=1))
      model.compile(optimizer = 
                    keras.optimizers.Adam(learning_rate = 1e-3), 
                    loss = keras.losses.mse)
      model.fit(x_train, y_train,
                batch_size=64, epochs=epochs, verbose=1,
                validation_data=(x_val, y_val))
      y_pred = model.predict(x_val)

      predictions['dr_' + str(dr)] = y_pred
      print('mean abs perc error of: ' 
            + str(mean_absolute_percentage_error(y_pred.flatten(), y_val)))
    return predictions


def calc_best_params_grid(grid, labels_true):
    best_score = 10000
    best_labels = []
    best_params = None
    for params, labels_pred in grid.items():
        current_score = mean_absolute_percentage_error(labels_pred.flatten(), labels_true)
        grid[params] = [labels_pred, current_score]
        if current_score < best_score:
            best_score = current_score
            best_params = params
            best_labels = labels_pred

    params = best_params.split('_')[::2]
    values = best_params.split('_')[1::2]
    param_dict, i = {}, 0
    for param in params:
        param_dict[param] = values[i]
        i += 1
    return param_dict, best_score, best_labels, grid


def reverseMinMaxScaling(y_scaled, input_min:float, input_max:float) -> float:
    # Reverse the minmax-scaling to convert prediction values to USD

    # a is scaled, b is USD
    # a = (b - input_min) / (input_max - input_min) # Minmax-scaling formula
    # a = (b - input_min) / (input_max - input_min) 
    # a * (input_max - input_min) = b - input_min
    # b = a * (input_max - input_min) + input_min
    return y_scaled * (input_max - input_min) + input_min


def plotPrediction(y_true, y_pred, timeframe:int, model_name:str) -> None:
    plt.plot(y_true)
    plt.plot(y_pred)
    if str(timeframe)[-1] == '1': plt.title(f'True vs. predicted {timeframe}st hour Ethereum value')
    elif str(timeframe)[-1] == '2': plt.title(f'True vs. predicted {timeframe}nd hour Ethereum value')
    elif str(timeframe)[-1] == '3': plt.title(f'True vs. predicted {timeframe}rd hour Ethereum value')
    else: plt.title(f'True vs. {model_name}-predicted {timeframe}th hour Ethereum value')
    plt.xlabel('Prediction number')
    plt.ylabel('Value (USD)')
    plt.legend(['True', 'Predicted'])
    plt.show()


def plotAbsPctError(y_true, y_pred, timeframe:int, model_name:str) -> list: # Also returns the top 1% outliers as list of tuples, (prediction index, APE)
    y_true = y_true.flatten()
    y_pred = y_pred.flatten()
    ape = abs(y_true-y_pred)/y_true*100
    plt.scatter(range(len(ape)), ape)
    plt.title(f'Absolute Percentage Error per {model_name} prediction (timeframe = {timeframe})')
    plt.xlabel('Prediction number')
    plt.ylabel('Absolute Percentage Error')
    plt.show()

    print(f'Model = {model_name}\tTimeframe = {timeframe}\n')
    print(f'Median APE: {np.median(ape)}')
    print(f'Mean APE: {np.mean(ape)}\n')

    outliercount = math.ceil(0.01*len(ape))
    outlier_index_tuples = sorted(enumerate(ape), reverse=True, key=lambda x: x[1])[:outliercount]

    print(f'Top 1% (={outliercount}) outliers (prediction index, APE): {outlier_index_tuples}')
    return outlier_index_tuples


def plot_predictions(y_true, y_pred, y_original):
    y_pred_reverse = reverseMinMaxScaling(y_pred, y_original.min(), y_original.max())
    plotPrediction(y_original, y_pred_reverse, 24, "RNN")


def retrieve_data_create_train_test_val_target():
    # Retrieve the data, 
    filename = 'dataset.csv'
    data = retrieve_data(filename)

    # Then split and scale according to min_max scaling
    split_size = 0.1
    train_val_data_orig, test_data_orig, train_val_data_scaled, test_data_scaled = scaleAndFilterData(data, split_size)
    train_data_orig, val_data_orig, train_data_scaled, val_data_scaled = scaleAndFilterData(train_val_data_orig, split_size)

    # Create extra data according to a sliding frame with the timeframe and split into x and y
    timeframe = 24
    x_train, y_train = convertDataFormat(train_data_scaled)
    x_val, y_val = convertDataFormat(val_data_scaled)

    x_trainval, y_trainval = convertDataFormat(train_val_data_scaled)
    x_test, y_test = convertDataFormat(test_data_scaled)

    # Create target data for the transformer
    target_train = x_train[:,-1,:]
    target_train = np.reshape(target_train,(len(target_train),1,9))
    target_val = x_val[:,-1,:]
    target_val = np.reshape(target_val,(len(target_val),1,9))

    target_trainval = x_trainval[:,-1,:]
    target_trainval = np.reshape(target_trainval,(len(target_trainval),1,9))
    target_test = x_test[:,-1,:]
    target_test = np.reshape(target_test,(len(target_test),1,9))

    final_data = {
        'data_raw' : data,
        'train_val_data_orig'     : train_val_data_orig,
        'train_val_data_scaled'   : train_val_data_scaled,
        'test_data_orig'          : test_data_orig,
        'test_data_scaled'        : test_data_scaled,
        'train_data_orig'         : train_data_orig,
        'train_data_scaled'       : train_data_scaled,
        'val_data_orig'           : val_data_orig,
        'val_data_scaled'         : val_data_scaled,
        'x_train'                 : x_train,
        'y_train'                 : y_train,
        'x_val'                   : x_val,
        'y_val'                   : y_val,
        'x_trainval'              : x_trainval,
        'y_trainval'              : y_trainval,
        'x_test'                  : x_test,
        'y_test'                  : y_test,
        'target_train'            : target_train,
        'target_val'              : target_val,
        'target_trainval'         : target_trainval,
        'target_test'             : target_test
                  }
    return final_data


def grid_search_transformer_and_calc_best(data, epochs_grid, params):
    # Perform grid search to find the best parameters
    grid_tr = grid_search_transformer(data['x_train'], data['target_train'], 
                                      data['y_train'], data['x_val'], data['target_val'], 
                                      data['y_val'], epochs_grid, params)
    best_params_vals_tr = calc_best_params_grid(grid_tr, data['y_val'])
    bp_tr = best_params_vals_tr[0]
    print('best params: '+ str(bp_tr))
    print('with mean abs percentage error of: '+ str(best_params_vals_tr[1]))
    return {'grid_tr' : grid_tr, 'best_parms_vals' : best_params_vals_tr}


def grid_search_rnn_and_calc_best(data, epochs, params):
    # Perform grid search to find the best parameters
    grid_rnn = grid_search_rnn(data['x_train'], data['y_train'], data['x_val'],
                           data['y_val'], epochs, params)
    best_params_vals_rnn = calc_best_params_grid(grid_rnn, data['y_val'])
    bp = best_params_vals_rnn[0]
    print('best params: '+ str(bp))
    print('with mean abs perc error of: '+ str(best_params_vals_rnn[1]))
    return {'grid_rnn' : grid_rnn, 'best_parms_vals' : best_params_vals_rnn}


def build_best_transformer(data, epochs, params ,tpu = False):
    # Build the model with the best parameters
    la = params['la']
    head = params['he']
    size = params['siz']
    dr = params['dr']
    k = params['k']
    
    model_tr = BuildModel(k=k, amount_of_layers_encoder=la, 
                          amount_of_layers_decoder=la, dropout=dr,
                      amount_of_heads=head, size_of_head=size)
 
    model_tr.compile(optimizer 
                     = keras.optimizers.Adam(learning_rate = 1e-3,), 
                     loss=keras.losses.mse)
    
    if tpu:
      device_name = os.environ['COLAB_TPU_ADDR']
      TPU_ADDRESS = 'grpc://' + device_name
      model_tr = tf.contrib.tpu.keras_to_tpu_model(model_tr,
      strategy=tf.contrib.tpu.TPUDistributionStrategy(
          tf.contrib.cluster_resolver.TPUClusterResolver(TPU_ADDRESS)))
    
    model_tr.fit((data['x_trainval'], data['target_trainval']), data['y_trainval'],
                 batch_size=64, epochs=epochs, verbose=1)
    
    y_pred_tr = model_tr.predict((data['x_test'], data['target_test']))

    print('Final mean abs perc error of:' + 
          str(mean_absolute_percentage_error(y_pred_tr.flatten(), data['y_test'])))
    
    return y_pred_tr
    

def build_best_rnn(data, epochs, params):
    # Build the model with the best parameters
    model_rnn = Sequential()

    model_rnn.add(GRU(64, input_shape=(data['x_trainval'].shape[1], 
                                       data['x_trainval'].shape[2]), 
                  return_sequences=True, dropout=float(params['dr'])))
    model_rnn.add(GRU(16, return_sequences=False))
    model_rnn.add(Dense(units=1))

    model_rnn.compile(optimizer = keras.optimizers.Adam(learning_rate = 1e-3), 
                      loss =keras.losses.mse)
    
    model_rnn.fit(data['x_trainval'], data['y_trainval'], 
                  batch_size=64, epochs=epochs, verbose=1)

    y_pred_rnn = model_rnn.predict(data['x_test'])

    print('Final mean abs perc error of:' + 
          str(mean_absolute_percentage_error(y_pred_rnn.flatten(), data['y_test'])))
    
    return y_pred_rnn
    

def split_train_test(data, testsize=0.1):
    split_index = math.floor((1-testsize)*len(data))
    train_data = data[:split_index]
    test_data = data[split_index:]
    return train_data, test_data


def scale_data_y(data):
    data_reshaped = data.reshape(-1, 1)
    scaler = MinMaxScaler().fit(data_reshaped) 
    data_scaled = scaler.transform(data_reshaped)
    return data_scaled, scaler

def reverse_predicted_labels(y_pred):
    # retrieve data
    filename = 'dataset.csv'
    data = retrieve_data(filename)

    # split data in train, val and test
    split_size = 0.1
    train_val_unscaled, test_unscaled = split_train_test(data, split_size)

    # create timeframes 
    x_test, y_test = convertDataFormat(test_unscaled)
    y_test_scaled, y_test_scaler = scale_data_y(y_test)
    y_test_reversed = y_test_scaler.inverse_transform(y_test_scaled.reshape(-1, 1))
    y_pred_reversed = y_test_scaler.inverse_transform(y_pred.reshape(-1,1))

    return y_test, y_pred_reversed


def calculate_metrics(y_true, y_pred_reversed):
    r2 = r2_score(y_true, y_pred_reversed)
    mse = mean_squared_error(y_true, y_pred_reversed)
    rmse = math.sqrt(mean_squared_error(y_true, y_pred_reversed))
    mae = mean_absolute_error(y_true, y_pred_reversed)
    mape = mean_absolute_percentage_error(y_true, y_pred_reversed)

    print('r2: ' + str(r2))
    print('mse: ' + str(mse))
    print('rmse: ' + str(rmse))
    print('mae: ' + str(mae))
    print('mape: ' + str(mape))
    
    return r2, mse, rmse, mae, mape


In [None]:
data = retrieve_data_create_train_test_val_target()

# Grid search transformer
epochs_grid = 5
params_tr = {
    'layers' : [3],               # 3, 5
    'nr_heads' : [8],             # 8, 12
    'size_heads' : [11],          # 11, 23
    'dropout' : [0.01],           # 0.03, 0.01 ???0.005, 0.001
    'k' : [9]                     # 9, 12
                                  # TOTAAL = 2 * 2^2 * 2 * 2 * 2 * 2 = 128
}

grid_best_params_tr = grid_search_transformer_and_calc_best(data, epochs_grid, params_tr)

# Build best transformer
epochs = 50
params_best = {
    'la' : 5,               # 3, 5
    'he' : 12,             # 8, 12
    'siz' : 11,          # 11, 23
    'dr' : 0.03,           # 0.03, 0.01 ???0.005, 0.001
    'k' : 9                   # 9, 12                     # TOTAAL = 2 * 2^2 * 2 * 2 * 2 * 2 = 128
}

build_best_transformer(data, epochs, params_best)

# Grid search GRU
params_rnn = {
    'dropout' : [0.01, 0.005, 0.001]
}

grid_best_params_rnn = grid_search_rnn_and_calc_best(data, epochs_grid, params_rnn)


# Build best GRU
params_rnn_best = {
    'dropout' : [0.001]
}
build_best_rnn(data, epochs, params_rnn_best)



# Nieuwe sectie

In [None]:
!cat /proc/cpuinfo

In [None]:
!nvidia-smi