In [174]:
import models
import numpy as np
import matplotlib.pyplot as plt

In [24]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
# Dummy function to create synthetic dataset
def f(X):
    """
    Simple function
    f(x0,x1,x2) = x0 + 2*x1 - x2**2
    
    :param X: input matrix with columns x0, x1, x2 (type: np.array)
    :return: f(X) (type: np.array) 
    
    """
    res = X[:,0] + 2*X[:, 1] - X[:,2]**2
    res = res.reshape([-1,1])
    
    return res

# Creating synthetic dataset
N = 100000
N_dev = 1000
X = 100*(np.random.rand(N, 3) -0.5)
X_dev = np.random.rand(N, 3)
X_pred = np.random.rand(N, 3)

noise = (np.random.normal(size=N)-0.5)/5
noise = noise.reshape([-1,1])

y = f(X) * (1+noise)
y_dev = f(X_dev)
y_pred = f(X_pred)

In [175]:
# Instantiating model object
mlp = models.MLP(X, hidden_layers=[5,5,5], activation="tanh", optimizer="adam")

In [102]:
mlp = models.MLP(X, hidden_layers=[5,4,2], activation="tanh", optimizer="adam")

In [176]:
# Instantiating model object
# mlp = models.MLP(X, hidden_layers=[5,5,5],
#                  activation="tanh", optimizer="adam", 
#                  problem="quantile",
#                  loss="quantile",
#                  q=0.01)

# Model train
mlp.train(X,y,
          X_dev=X_dev, 
          y_dev=y_dev,
          n_epoch=100,
          n_stopping_rounds=30)


# Run predict on new data
predictions = mlp.predict(X_pred)

# Evaluate model performance using the same metric it used to train
performance = mlp._compute_loss(predictions,y_pred)
print(f"Prediction loss: {performance}")

epoch: 1 | train_loss: 533519.9421684719 |  dev_loss: 1.0751068615038903
epoch: 2 | train_loss: 533256.2315311517 |  dev_loss: 1.940769218065605
epoch: 3 | train_loss: 532736.8509671584 |  dev_loss: 4.333485522314923
epoch: 4 | train_loss: 532023.4610658737 |  dev_loss: 7.980269398917631
epoch: 5 | train_loss: 531304.6919011095 |  dev_loss: 11.667912840214798


KeyboardInterrupt: 

In [None]:
predictions.mean()

In [None]:
# Instantiating model object
mlp = models.MLP(X, hidden_layers=[5,5,5],
                 activation="tanh", 
                 optimizer="adam", 
                 problem="quantile",
                 loss="quantile",
                 q=0.99)

# Model train
mlp.train(X,y,
          X_dev=X_dev, 
          y_dev=y_dev,
          n_epoch=100,
          n_stopping_rounds=30)


# Run predict on new data
predictions = mlp.predict(X_pred)

# Evaluate model performance using the same metric it used to train
performance = mlp._compute_loss(predictions,y_pred)
print(f"Prediction loss: {performance}")

In [None]:
predictions.mean()

## Classification

In [None]:
import pandas as pd

In [None]:
iris = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data", 
                   header=None)
feature_cols = ["v1", "v2", "v3","v4"]
iris.columns = feature_cols +  ["class"]
iris["target"] = np.where(iris["class"] == "Iris-setosa", 1, 0)

In [None]:
X = iris[feature_cols].values
y = iris["target"].values.reshape([-1,1])

In [25]:
# Instantiating model object
mlp = models.MLP(X, 
                 hidden_layers=[3,3,2], 
                 activation="swish", 
                 optimizer="adam", 
                 problem="binary_classification",
                 loss = "logloss")

In [26]:
mlp.train(X,y, n_epoch=1000,learning_rate=0.01)

  return 1. / (1. + np.exp(-x))


epoch: 1 | train_loss: 2.671318072532449
epoch: 2 | train_loss: 2.312156859207922
epoch: 3 | train_loss: 2.312131457153606
epoch: 4 | train_loss: 2.280228315987827
epoch: 5 | train_loss: 1.5476380694480116
epoch: 6 | train_loss: 0.8597905544960135
epoch: 7 | train_loss: 0.5010016067700759
epoch: 8 | train_loss: 0.3291587196310663
epoch: 9 | train_loss: 0.26062618137573373
epoch: 10 | train_loss: 0.20792939759964413
epoch: 11 | train_loss: 0.1930072436550653


KeyboardInterrupt: 

In [None]:
# Instantiating model object
mlp_q1 = models.MLP(X, hidden_layers=[5,5,5],
                 activation="tanh", optimizer="adam", 
                 problem="quantile",
                 loss="quantile",
                 q=0.01)

# Model train
mlp_q1.train(X,y,
             X_dev=X_dev, 
             y_dev=y_dev,
             n_epoch=1000,
             n_stopping_rounds=30,
             verbose=False)


# Run predict on new data
predictions_q1 = mlp_q1.predict(X_pred)
print(f"Prediction average for quantile 1: {predictions_q1.mean()}")

# Instantiating model object
mlp_q50 = models.MLP(X, hidden_layers=[5,5,5],
                 activation="tanh", optimizer="adam", 
                 problem="quantile",
                 loss="quantile",
                 q=0.5)

# Model train
mlp_q50.train(X,y,
              X_dev=X_dev, 
              y_dev=y_dev,
              n_epoch=1000,
              n_stopping_rounds=30, 
              verbose=False)

# Run predict on new data
predictions_q50 = mlp_q50.predict(X_pred)
print(f"Prediction average for quantile 50: {predictions_q50.mean()}")

# Instantiating model object
mlp_q99 = models.MLP(X, hidden_layers=[5,5,5],
                 activation="tanh", optimizer="adam", 
                 problem="quantile",
                 loss="quantile",
                 q=0.99)

# Model train
mlp_q99.train(X,y,
              X_dev=X_dev, 
              y_dev=y_dev,
              n_epoch=1000,
              n_stopping_rounds=30, 
              verbose=False)

# Run predict on new data
predictions_q99 = mlp_q99.predict(X_pred)
print(f"Prediction average for quantile 99: {predictions_q99.mean()}")

In [None]:
a = pd.DataFrame(y)
a.columns=["y"]
a = a.sort_values("y")
a["q"] = np.arange(a.shape[0])/a.shape[0]
for q in np.arange(0.01, 1, 0.1):
    # Instantiating model object
    mlp = models.MLP(X, 
                     hidden_layers=[5,6,5],
                     activation="leaky_relu",
                     optimizer="adam", 
                     problem="quantile",
                     loss="quantile",
                     q=q)

    # Model train
    mlp.train(X,y,
              n_epoch=1000,
              batch_size=512,
              learning_rate=0.001,
              verbose=True)

    # Run predict on new data
    predictions = mlp.predict(X)
    
    actual = a[a["q"]<q].tail(1)["y"].values[0]
    print(f"Prediction average for quantile {(int(100*q))}: {predictions.mean()}  | actual: {actual}")