<a href="https://colab.research.google.com/github/amiralitaheri/tsml/blob/metanet/Metanet_Python.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
import glob
import sklearn.metrics as metrics
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from math import log
import os

In [None]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
print('Num GPU Available: ', len(physical_devices))
tf.config.experimental.set_memory_growth(physical_devices[0], True)

Num GPU Available:  1


In [None]:
def read_csv(file_path: str):
    with open(file_path, 'r') as file:
        number_of_classes = int(next(file))
    df = pd.read_csv(file_path, skiprows=1, header=None)
    features = 5 * number_of_classes
    data = df.loc[:, 0: features - 1].to_numpy()
    labels = df.loc[:, features:features].to_numpy()
    return number_of_classes, data, labels


def find_AUROC(c: int, true_values, predictions):
    num_instances = len(true_values)
    p = []
    nos_positive = 0
    for i in range(num_instances):
        temp = (predictions[i][c], true_values[i][0])
        if c == true_values[i][0]:
            nos_positive += 1
        p.append(temp)

    nos_negative = num_instances - nos_positive
    p.sort(reverse=True)

    # http://www.cs.waikato.ac.nz/~remco/roc.pdf
    #         Determine points on ROC curve as follows;
    #         starts in the origin and goes one unit up, for every
    # negative outcome the curve goes one unit to the right. Units on the x-axis
    # are 1
    # #TN and on the y-axis 1
    # #TP where #TP (#TN) is the total number
    # of true positives (true negatives). This gives the points on the ROC curve
    # (0; 0); (x1; y1); : : : ; (xn; yn); (1; 1).

    roc = []
    x = 0.0
    y = 0.0
    xAdd = 0
    yAdd = 0
    xLast = False
    yLast = False
    roc.append((x, y))
    for i in range(num_instances):
        if p[i][1] == c:
            if yLast:
                roc.append((x, y))
            xLast = True
            yLast = False
            x += 1 / nos_positive
            xAdd += 1
            if xAdd == nos_positive:
                x = 1.0

        else:
            if xLast:
                roc.append((x, y))
            yLast = True
            xLast = False
            y += 1 / nos_negative
            yAdd += 1
            if yAdd == nos_negative:
                y = 1.0

    roc.append((1.0, 1.0))

    # Calculate the area under the ROC curve, as the sum over all trapezoids with
    # base xi+1 to xi , that is, A
    auroc = 0.0
    for i in range(len(roc)-1):
        auroc += (roc[i + 1][1] - roc[i][1]) * roc[i + 1][0]

    return auroc


def find_mean_AUROC(number_of_classes, true_values, predictions):
    a = 0.0
    class_dist = find_class_distributions(true_values, number_of_classes)
    if number_of_classes == 2:
        if class_dist[0] < class_dist[1]:
            a = find_AUROC(0, true_values, predictions)
        else:
            a = find_AUROC(1, true_values, predictions)
    else:
        for i in range(number_of_classes):
            a += find_AUROC(i, true_values, predictions) * class_dist[i]

    return a


def find_class_distributions(true_values, number_of_classes):
    dist = [0] * number_of_classes
    for c in true_values:
        dist[c[0]] += 1
    dist = [d / len(true_values) for d in dist]
    return dist


def findNLL(true_class_values, probability_distribution):
    nll = 0.0
    nll_penalty = -6.64
    for i in range(len(true_class_values)):
        dist = probability_distribution[i]
        true_class = true_class_values[i]

        if dist[true_class] == 0:
            nll += nll_penalty
        else:
            nll += log(dist[true_class]) / log(2)
    return -nll / len(true_class_values)



In [None]:
def experiment(train_file_path: str):
    num_of_classes, train_data, train_labels = read_csv(train_file_path)
    num_of_classes, test_data, test_labels = read_csv(train_file_path.replace('/train/', '/test/'))
    num_of_features = 5 * num_of_classes

    model = Sequential([
        Dense(units=(num_of_classes + num_of_features) / 2, activation='tanh'),
        Dense(units=num_of_classes, activation='softmax')
    ])

    model.compile(optimizer=Adam(learning_rate=0.01), loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    callback = EarlyStopping(monitor='accuracy', patience=100, restore_best_weights=True)

    model.fit(x=train_data, y=train_labels, batch_size=50, epochs=1000,
              verbose=0, callbacks=[callback])
    
    name = train_file_path.split('/')[-1].split('.')[0]
    model.save(f'drive/My Drive/metanet_results/models/{name}.h5')
    predictions = model.predict(x=test_data, batch_size=50)
    rounded_predictions = np.argmax(predictions, axis=-1)
    cm = metrics.confusion_matrix(y_true=test_labels, y_pred=rounded_predictions)
    acc = metrics.accuracy_score(y_true=test_labels, y_pred=rounded_predictions)
    balanced_acc = metrics.balanced_accuracy_score(y_true=test_labels, y_pred=rounded_predictions)
    nll = findNLL(test_labels, predictions)
    auc = find_mean_AUROC(num_of_classes, test_labels, predictions)
    with open(f'drive/My Drive/metanet_results/stats/{name}.txt', 'w') as f:
        f.write(f'accuracy,{acc}\nbalanced accuracy,{balanced_acc}\nnnl,{nll}\nauc,{auc}\n#\n{cm}')

In [None]:
!mkdir './drive/My Drive/metanet_results'
!mkdir './drive/My Drive/metanet_results/stats'
!mkdir './drive/My Drive/metanet_results/models'

mkdir: cannot create directory ‘./drive/My Drive/metanet_results’: File exists
mkdir: cannot create directory ‘./drive/My Drive/metanet_results/stats’: File exists
mkdir: cannot create directory ‘./drive/My Drive/metanet_results/models’: File exists


In [None]:
!cp 'drive/My Drive/MetaNet.zip' .
!unzip MetaNet.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: test/C45_Logistic_MLP_NN_SVML_parkinsons20.csv  
  inflating: test/C45_Logistic_MLP_NN_SVML_parkinsons21.csv  
  inflating: test/C45_Logistic_MLP_NN_SVML_parkinsons22.csv  
  inflating: test/C45_Logistic_MLP_NN_SVML_parkinsons23.csv  
  inflating: test/C45_Logistic_MLP_NN_SVML_parkinsons24.csv  
  inflating: test/C45_Logistic_MLP_NN_SVML_parkinsons25.csv  
  inflating: test/C45_Logistic_MLP_NN_SVML_parkinsons26.csv  
  inflating: test/C45_Logistic_MLP_NN_SVML_parkinsons27.csv  
  inflating: test/C45_Logistic_MLP_NN_SVML_parkinsons28.csv  
  inflating: test/C45_Logistic_MLP_NN_SVML_parkinsons29.csv  
  inflating: test/C45_Logistic_MLP_NN_SVML_parkinsons3.csv  
  inflating: test/C45_Logistic_MLP_NN_SVML_parkinsons30.csv  
  inflating: test/C45_Logistic_MLP_NN_SVML_parkinsons4.csv  
  inflating: test/C45_Logistic_MLP_NN_SVML_parkinsons5.csv  
  inflating: test/C45_Logistic_MLP_NN_SVML_parkinsons6.csv  
  inflati

In [None]:
filepath = './'
files = glob.glob(filepath + "train/*.csv")
count = 0
for file in files:
    print(count)
    print(file)
    count += 1
    name = file.split('/')[-1].split('.')[0]
    if os.path.isfile(f'./drive/My Drive/metanet_results/stats/{name}.txt'):
      print(f'{name} already exist')
    else:
      experiment(file)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
./train/C45_Logistic_MLP_NN_SVML_heart-hungarian1.csv
C45_Logistic_MLP_NN_SVML_heart-hungarian1 already exist
1915
./train/C45_Logistic_MLP_NN_SVML_arrhythmia2.csv
C45_Logistic_MLP_NN_SVML_arrhythmia2 already exist
1916
./train/C45_Logistic_MLP_NN_SVML_pittsburg-bridges-SPAN1.csv
C45_Logistic_MLP_NN_SVML_pittsburg-bridges-SPAN1 already exist
1917
./train/C45_Logistic_MLP_NN_SVML_echocardiogram12.csv
C45_Logistic_MLP_NN_SVML_echocardiogram12 already exist
1918
./train/C45_Logistic_MLP_NN_SVML_energy-y16.csv
C45_Logistic_MLP_NN_SVML_energy-y16 already exist
1919
./train/C45_Logistic_MLP_NN_SVML_ringnorm8.csv
C45_Logistic_MLP_NN_SVML_ringnorm8 already exist
1920
./train/C45_Logistic_MLP_NN_SVML_ringnorm13.csv
C45_Logistic_MLP_NN_SVML_ringnorm13 already exist
1921
./train/C45_Logistic_MLP_NN_SVML_spectf28.csv
C45_Logistic_MLP_NN_SVML_spectf28 already exist
1922
./train/C45_Logistic_MLP_NN_SVML_breast-cancer-wisc-prog1.csv
C45

In [None]:
!zip -r tensor_results.zip stats models
!cp tensor_results.zip 'drive/My Drive'


zip error: Nothing to do! (try: zip -r tensor_results.zip . -i stats models)
cp: cannot stat 'tensor_results.zip': No such file or directory
