In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [2]:
"""Import the dataset as a pandas DataFrame"""

g_data = pd.read_csv('poverty.txt', delim_whitespace=True, \
        index_col=[0])

In [3]:
g_data.head()

Unnamed: 0_level_0,PovPct,Brth15to17,Brth18to19,ViolCrime,TeenBrth
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alabama,20.1,31.5,88.7,11.2,54.5
Alaska,7.1,18.9,73.7,9.1,39.5
Arizona,16.1,35.0,102.5,10.4,61.2
Arkansas,14.9,31.6,101.7,10.4,59.9
California,16.7,22.6,69.1,11.2,41.1


In [4]:
"""Set up the tensorflow graph"""

# Placeholders for matrix and vector in normal equation
A = tf.placeholder(tf.float32)
b = tf.placeholder(tf.float32)
    
# Do the calculations for the normal operation
Atran_A_inv = tf.matrix_inverse(tf.matmul(tf.transpose(A), A))
Atran_A_inv_Atran = tf.matmul(Atran_A_inv, tf.transpose(A))
x = tf.matmul(Atran_A_inv_Atran, b)

In [5]:
"""Regress 'Brth15to17' against 'PovPct'"""

with tf.Session() as sess:
    
    # Create the feed dictionary. Careful that column vectors do not become 1D arrays
    data_A = np.array(g_data.loc[:, 'PovPct'], ndmin=2).T
    ones_A = np.array(np.ones(g_data.shape[0]), ndmin=2).T
    input_A = np.hstack((data_A, ones_A))
    input_b = np.array(g_data.loc[:, 'Brth15to17'], ndmin=2).T
    feed_dict = {A: input_A, b: input_b}
    
    # Run the session
    output = sess.run(x, feed_dict=feed_dict)
    
    # Print the regression equation
    print('Regression equation:')
    print('\'Brth15to17\' = %.2f + %.2f\'PovPct\'' % (output[0], output[1]))
    
    # Graph of regression
    X = np.arange(0, 31)
    Y = output[0]*X + output[1]
    
    # Make a plot of the data and the linear regression
    fig, ax = plt.subplots()
    ax.scatter(g_data['PovPct'], g_data['Brth15to17'])                    # Scatterplot of data
    ax.plot(X, Y, color='k')                                               # Plot of regression
    ax.set_xlabel('Poverty (%)')
    ax.set_ylabel('Births 15-17')
    ax.set_title('Births 15-17 vs. Poverty %')
    plt.savefig('regression_Brth15to17_PovPct.png')

Regression equation:
'Brth15to17' = 1.37 + 4.27'PovPct'


In [6]:
"""Regress 'Brth15to17' against 'PovPct' and 'ViolCrime'"""

with tf.Session() as sess:
    
    # Create the feed dictionary. Careful that column vectors do not become 1D arrays
    data_A = np.array(g_data.loc[:, ['PovPct', 'ViolCrime']])
    ones_A = np.array(np.ones(g_data.shape[0]), ndmin=2).T
    input_A = np.hstack((data_A, ones_A))
    input_b = np.array(g_data.loc[:, 'Brth15to17'], ndmin=2).T
    feed_dict = {A: input_A, b: input_b}
    
    # Run the session
    output = sess.run(x, feed_dict=feed_dict)
    
    # Print the regression equation
    print('Regression equation:')
    print('\'Brth15to17\' = %.2f + %.2f\'PovPct\' + %.2f\'ViolCrime\'' \
            % (output[0], output[1], output[2]))

Regression equation:
'Brth15to17' = 1.04 + 0.34'PovPct' + 5.98'ViolCrime'
