In [14]:
import os
from six.moves import urllib

DOWNLOAD_ROOT = "http://gauss.vaniercollege.qc.ca/~iti/WSL/"
DATA_PATH = "data"
DATA_URL = DOWNLOAD_ROOT + DATA_PATH + "/analysis.csv"

def fetch_data(data_url=DATA_URL, data_path=DATA_PATH):
    if not os.path.isdir(data_path):
        os.makedirs(data_path)
    write_path = os.path.join(data_path, "analysis.csv")
    urllib.request.urlretrieve(data_url, write_path)
    
file_path = DATA_PATH + "/analysis.csv"

In [15]:
fetch_data()

In [16]:
import numpy as np

data = np.loadtxt(file_path, delimiter=",", skiprows=1)

In [17]:
x_data = data[:, 1:5] #The x-data corresponds to columns 2,3,4,5

In [18]:
data_length = x_data.shape[0]

In [19]:
y_data = data[:, 0].reshape((data_length,1)) #The y-data corresponds to column 1 (temp anomaly)

In [20]:
def batch(batch_size): #Defining a batch to be used in the regression analysis
    shuffled_indices = np.random.permutation(data_length) #Shuffling the indices of the array
    batch_indices = shuffled_indices[:batch_size] #We want the same number of random indices as the batch size
    x_batch = x_data[batch_indices,:] #The batch of x as the first 50 in the shuffled order
    y_batch = y_data[batch_indices] #The corresponding y values
    return x_batch, y_batch

In [21]:
from datetime import datetime

In [22]:
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

In [23]:
import tensorflow as tf

In [24]:
steps = 5000 #5000 steps to minimize mse

x = tf.placeholder(tf. float32, shape=[None, 4]) #Placeholder for the four x values
y_true = tf.placeholder(tf.float32, shape=None) #Placeholder for the true y values

with tf.name_scope('inference') as scope:
    w = tf.Variable([[0],[0],[0],[0]], dtype=tf.float32, name='weights') #Initalizing four weights
    b = tf.Variable(0, dtype=tf.float32, name='bias') #Initializing bias
    y_pred = tf.matmul(x,w)+b #Prediction as a linear combination of optimal w and b
    
with tf.name_scope('variance') as scope:
    mse = tf.reduce_mean(tf.square(y_true-y_pred)) #Mean squared error
    y_mean = tf.reduce_mean(y_true) #Average of the actual y values
    mst = tf.reduce_mean(tf.square(y_true-y_mean)) #MST
    c_of_determination = 1-mse/mst #R-squared
    
with tf.name_scope('train') as scope:
    learning_rate = 0.1
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    train = optimizer.minimize(mse) #Implementing gradient descent to minimize mean squared error

In [25]:
init = tf.global_variables_initializer()

mse_summary = tf.summary.scalar('MSE', mse) #Keeping track of mean squared error
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [35]:
with tf.Session() as sess:
    sess.run(init)
    for i in range(steps):
        x_set, y_set = batch(50) 
        sess.run(train, feed_dict={x: x_set, y_true: y_set}) #Performing the analysis on a random batch of 50 data points every time
        if (i+1) % 50 == 0:
            summary_str = mse_summary.eval(feed_dict={x: x_data, y_true: y_data}) #Keeping track of MSE every 50 steps
            file_writer.add_summary(summary_str, i) 
    W, B, MSE, CD = sess.run([w, b, mse, c_of_determination], feed_dict={x: x_data, y_true: y_data}) 
    #Computing final MSE and R-squared based on the outcome of the 5000 steps of analysis
    print("Weights {}".format(W.T), "Bias {:.4f}".format(B), "MSE {:.4f}".format(MSE))
    print("\nCoefficient of Determination {}".format(CD))
    prediction = 0.465*W[0] + 0.077*W[1] + 0.922*W[2] + 0.136*W[3] + B #Using optimal w and b to make prediction
    print("\nPrediction for 2050: " + str(prediction[0]))
    file_writer.close()

Weights [[ 1.9664433   0.00544963  0.48435163 -0.09918728]] Bias -0.7179 MSE 0.0326

Coefficient of Determination 0.23728680610656738

Prediction for 2050: 0.629991
