# This notebook contains a demonstration of regression models using tensorflow and dataset
The models are implemented as methods of the class MyBatch in linear.py

In [1]:
import sys
import seaborn

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

sys.path.append("..")
from dataset import Dataset
import linear as lr

In [2]:
%env CUDA_VISIBLE_DEVICES=[]

env: CUDA_VISIBLE_DEVICES=[]


# Linear Regression (continious target)

In [3]:
BATCH_SIZE = 100
TRAINING_EPOCHS = 500
DATA_SIZE = 500

In [4]:
# lr.load_linear_data returns np.array of feautures and linearly dependent target with error from normal distribution
data = lr.load_linear_data(DATA_SIZE)

# create dataset object
my_dataset = Dataset(index=np.arange(data[0].shape[0]), batch_class=lr.MyBatch)

# create train and test indices
my_dataset.cv_split()

In [5]:
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
cost_history = []

In [6]:
# create and run train pipeline which loads, preprocesses and trains linear regression model
my_dataset.train.pipeline() \
    .load(data) \
    .preprocess_linear_data() \
    .train_linear(sess, cost_history) \
    .run(BATCH_SIZE, shuffle=True, n_epochs=TRAINING_EPOCHS)

<dataset.dataset.pipeline.Pipeline at 0x1b8eab20470>

In [7]:
y_true, y_pred, mse, x_features = [], [], [], []

In [8]:
# create and run test pipeline which loads, preprocesses and tests the trained model
test_batch = my_dataset.test.p \
    .load(data) \
    .preprocess_linear_data() \
    .test_linear(sess, y_true, y_pred, mse, x_features) \
    .run(len(my_dataset.test.indices))

In [9]:
print ('Variance ratio: %.2f' % (np.var(y_pred, ddof=1) /np.var(y_true, ddof=1)))

Variance ratio: 0.73


In [10]:
print ('MSE is distibuted in the interval: {0:.2f} $ ± {1:.2f} $'\
        .format(np.mean(np.abs(np.array(y_pred) - np.array(y_true))), 3*np.std(np.abs(np.array(y_pred) - np.array(y_true)))))

MSE is distibuted in the interval: 0.12 $ ± 0.25 $


In [11]:
 print ('MSE with respect to data\'s mean is: {0:.2f}%'\
        .format(np.mean(np.abs(np.array(y_pred) - np.array(y_true))/np.array(y_true))*100))

MSE with respect to data's mean is: 3.81%


# Logistic Regression (Binary target)

In [12]:
# create random data for classification
data = lr.load_random_data(blobs=False)

# create dataset object
my_dataset = Dataset(index=np.arange(data[0].shape[0]), batch_class=lr.MyBatch)

# create train and test indices
my_dataset.cv_split()

In [13]:
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
cost_history = []
acc_history = []

In [14]:
# create and run train pipeline which loads, preprocesses and trains logistic regression model
my_dataset.train.p \
    .load(data) \
    .preprocess_binary_data() \
    .train_logistic(sess, cost_history, acc_history) \
    .run(BATCH_SIZE, shuffle=True, n_epochs=TRAINING_EPOCHS)

<dataset.dataset.pipeline.Pipeline at 0x1b8eab208d0>

In [15]:
acc = []
# create and run test pipeline which loads, preprocesses and tests the trained model
test_batch = my_dataset.test.p \
    .load(data) \
    .preprocess_binary_data() \
    .test_logistic(sess, acc) \
    .run(len(my_dataset.test.indices))

In [16]:
print("ACCURACY: %.0f%%" % (100.0 * acc[0]))

ACCURACY: 95%


# Poisson Regression (discret target)

In [31]:
# lr.load_poisson_data generates sample from poisson distribution and returns a tuple of weights, features and labels
data = lr.load_poisson_data()

# create dataset object
my_dataset = Dataset(index=np.arange(data[1].shape[0]), batch_class=lr.MyBatch)

# create train and test indices
my_dataset.cv_split()

In [32]:
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
cost_history = []

In [33]:
# create and run train pipeline which loads, preprocesses and trains poisson regression model
my_dataset.train.p \
    .load(data[1:]) \
    .train_poisson(sess, cost_history) \
    .run(BATCH_SIZE, shuffle=True, n_epochs=TRAINING_EPOCHS)

<dataset.dataset.pipeline.Pipeline at 0x1b8eadae978>

In [34]:
logit, y_true, weights = [], [], []

In [35]:
test_batch = my_dataset.test.p.load(data[1:]).test_poisson(sess, y_true, logit, weights).run(len(my_dataset.test.indices))

In [36]:
lmbd = np.dot(data[1], data[0])
print ('Variance ratio: %.2f' % (np.var(logit, ddof=1) / np.var(lmbd, ddof=1)))

Variance ratio: 0.95


In [37]:
 print ('The model\'s error with respect to data\'s mean is: {0:.2f}%'\
        .format(np.mean(np.abs(np.exp(logit) - y_true))/np.mean(y_true)*100))

The model's error with respect to data's mean is: 10.40%
