In [1]:
import sys

import numpy as np

import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline

from models import InitBatch,\
                   LinearRegression, \
                   LogisticRegression,\
                   PoissonRegression
from data_generator import generate_linear_data, \
                           generate_logistic_data, \
                           generate_poisson_data

from dataset import Dataset, DatasetIndex

In [2]:
plt.style.use('seaborn-poster')
plt.style.use('ggplot')

In [3]:
size = 1000
linear_x, linear_y = generate_linear_data(size=size)

In [4]:
dataset = Dataset(np.arange(size), batch_class=InitBatch)
dataset.cv_split()

### linear

In [5]:
BATCH_SIZE = 100
train_pp = (dataset.train.p
                .load((linear_x, linear_y))
                .init_variable('current_loss', init_on_each_run=0)
                .init_model('static',
                            LinearRegression,
                            'linear_1',
                            config={'loss': 'mse',
                                    'optimizer': {'name':'GradientDescentOptimizer',
                                                  'learning_rate': 0.01},
                                    'data_shape': 13})
                .train_model('linear_1', 
                             fetches='loss',
                             feed_dict={'input_data': 'input_data',
                                        'targets': 'labels'},
                             save_to='current_loss')
                .run(BATCH_SIZE, shuffle=True, n_epochs=100))

In [6]:
print('mse loss: ', train_pp.get_variable('current_loss'))

mse loss:  0.0325463


### logistic

In [7]:
size = 500
logistic_x, logistic_y = generate_logistic_data(size, [[1,2],[[15,0],[0,15]]], [[10,17],[[15,0],[0,15]]])

In [8]:
BATCH_SIZE=100
train_pp = (dataset.train.p
                .load((logistic_x, logistic_y))
                .init_variable('current_loss', init_on_each_run=list)
                .init_model('static', LogisticRegression, 'logistic',
                            config={'loss': 'sigmoid_cross_entropy',
                                    'optimizer': {'name':'Adam', 'learning_rate':0.01},
                                    'data_shape': 13})
                .train_model('logistic', fetches='loss', feed_dict={'input_data': 'input_data',
                                                                'targets': 'labels'},
                             save_to='current_loss')
                .run(BATCH_SIZE, shuffle=True, n_epochs=100))

INFO:tensorflow:logits.dtype=<dtype: 'float32'>.
INFO:tensorflow:multi_class_labels.dtype=<dtype: 'float32'>.
INFO:tensorflow:losses.dtype=<dtype: 'float32'>.


In [9]:
print('cross entropy loss: ', train_pp.get_variable('current_loss'))

cross entropy loss:  0.237129


### Poisson

In [10]:
size = 1000
NUM_DIM = 13
poisson_x, poisson_y = generate_poisson_data(np.random.random(NUM_DIM), size)

In [11]:
def loss_poisson(target, predictions):
    return tf.reduce_mean(tf.nn.log_poisson_loss(target, predictions))

In [12]:
BATCH_SIZE=100
train_pp = (dataset.train.p
                .load((poisson_x, poisson_y))
                .init_variable('current_loss', init_on_each_run=list)
                .init_model('static', PoissonRegression, 'poisson',
                            config={'loss': loss_poisson,
                                    'optimizer': {'name':'Adam', 'learning_rate':0.02},
                                    'data_shape': 13})
                .train_model('poisson', fetches='loss', feed_dict={'input_data': 'input_data',
                                                                'targets': 'labels'},
                             save_to='current_loss')
                .run(BATCH_SIZE, shuffle=True, n_epochs=150))

In [13]:
test_pp = (dataset.test.p
                .load((poisson_x, poisson_y))
                .import_model('poisson', train_pp)
                .init_variable('all_predictions', init_on_each_run=list)
                .init_variable('answers', init_on_each_run=list)
                .predict_model('poisson', fetches=['predicted_labels', 'targets'], feed_dict={'input_data': 'input_data',
                                                                              'targets': 'labels'},
                               append_to=['all_predictions', 'answers'])
                .run(BATCH_SIZE, shuffle=True, n_epochs=1))

In [14]:
pred = np.array(test_pp.get_variable('all_predictions')).reshape(-1)
target = np.array(test_pp.get_variable('answers')).reshape(-1)


true_var = np.mean((target - np.mean(target))**2)
predict_var = np.mean((pred - np.mean(pred))**2)

percent = np.mean(np.abs(pred - target))/np.mean(target)*100
print('Percentage of errors: {}%'.format(round(percent, 3)), 'Variance ratio: %.3f' % (predict_var / true_var))

Percentage of errors: 10.826% Variance ratio: 0.988
