In [2]:
import csv
import string
import numpy as np

def get_training_data(feature):
    check = ['MLS','Bedrooms','Bathrooms','Size','Price']
    index = check.index(feature)
    training_list = list()
    
    with open('housesRegr.csv', 'rU') as csvfile:
        csvreader = csv.reader(csvfile)
        for row in csvreader:
            training_data = row[0].split(';')
            training_list.append(training_data[index])

    return training_list[1:];



prices = np.array(get_training_data('Price')).astype(np.float)
bedrooms = np.array(get_training_data('Bedrooms')).astype(np.float)
bathrooms = np.array(get_training_data('Bathrooms')).astype(np.float)
size = np.array(get_training_data('Size')).astype(np.float)



In [3]:
import numpy as np
import matplotlib.pyplot as plt


'''
Plot_data
Input: x (list with values), y (list with values), name for x-axis, name for y-axis
Output: plots with names for x- and y-axis
'''
def plot_data(x, y, x_name, y_name):
    plt.scatter(x, y)
    plt.xlabel(x_name, fontsize = 10)
    plt.ylabel(y_name, fontsize = 10)
    plt.show()

plot_data(bedrooms, prices, 'price', 'n of bedrooms')
plot_data(bathrooms,prices, 'price', 'n of bathrooms')
plot_data(size, prices, 'price', 'size')

In [4]:
theta_0 = 0
theta_1 = 1
alpha = 0.01


    

In [5]:
# return  y of function
def fx(x, theta_0, theta_1):
    return theta_0 + theta_1 * int(x)

In [6]:
'''
Calculates theta_0 using an iterative process
Input: theta_0, theta_1, x (list with values), y (list with values), learning rate alpha
Output: gradient for theta_0
'''
def gradientTheta_0_iter(theta_0, theta_1, x, y, alpha):
    m = len(x)

    sum_training = 0
    for i in range(m):
        sum_training += fx(int(x[i]), theta_0, theta_1)- int(y[i])
    
    return theta_0 - (alpha * ((1.0/m) * sum_training))

'''
Calculates theta_1 using an iterative process
Input: theta_0, theta_1, x (list with values), y (list with values), learning rate alpha
Output: gradient for theta_1
'''
def gradientTheta_1_iter(theta_0, theta_1, x, y, alpha):
    m = len(x)
    
    sum_training = 0
    for i in range(m):
        sum_training += (fx(int(x[i]), theta_0, theta_1)- int(y[i])) * int(x[i])
    
    return theta_1 - (alpha * ((1.0/m) * sum_training))



# print gradientTheta_0_iter(0, 1, [6,5,3], [5,6,10], 0.01)
# print gradientTheta_1_iter(0, 1, [6,5,3], [5,6,10], 0.01)

In [7]:
'''
Caclculates fx(x^(i)) values for array x
Input: x (numpy array), theta_0, theta_1, m)
Output: fx(x^(i)) array
'''
def make_fx_x(x, theta_0, theta_1, m):
    fx_x = np.zeros(m)
    for i in range(m):
        fx_x[i] = fx(int(x[i]), theta_0, theta_1)
    
    return fx_x


'''
Calculates theta_0 using vector multiplication
Input: theta_0, theta_1, x (numpy array), y (numpy array) learning rate alpha
Output: gradient for theta_0
'''
def gradientTheta_0_vec(theta_0, theta_1, x, y, alpha):
    m = len(x)
    
    # create fx(x^(i)) vector
    fx_x = make_fx_x(x, theta_0, theta_1, m)    

    return theta_0 - (alpha * ((1.0/m) * sum((fx_x - y))))

'''
Calculates theta_1 using vector multiplication
Input: theta_0, theta_1, x (numpy array), y (numpy array), learning rate alpha
Output: gradient for theta_0
'''
def gradientTheta_1_vec(theta_0, theta_1, x, y, alpha):
    m = len(x)
    
    # create fx(x^(i)) vector
    fx_x = make_fx_x(x, theta_0, theta_1, m)    

    return theta_1 - (alpha * ((1.0/m) * sum((fx_x - y)*x)))



# print(gradientTheta_0_vec(0, 1, size,prices, 0.01))
# print(gradientTheta_1_vec(0, 1, size,prices, 0.01))

In [8]:
def regression_iter(theta_0, theta_1, x, y, alpha, iterations):
    for i in range(iterations):
        temp_theta_0 = gradientTheta_0_iter(theta_0, theta_1, x, y, alpha)
        temp_theta_1 = gradientTheta_1_iter(theta_0, theta_1, x, y, alpha)
        theta_0 = temp_theta_0
        theta_1 = temp_theta_1
    return theta_0, theta_1


def regression_vec(theta_0, theta_1, x, y, alpha, iterations):
    for i in range(iterations):
        temp_theta_0 = gradientTheta_0_vec(theta_0, theta_1, x, y, alpha)
        temp_theta_1 = gradientTheta_1_vec(theta_0, theta_1, x, y, alpha)
        theta_0 = temp_theta_0
        theta_1 = temp_theta_1
    fx_predict = make_fx_x(x, theta_0, theta_1, len(x))    
    return theta_0, theta_1, fx_predict
    

def plot_line(x, y, x_name, y_name):
    plt.plot(x, y)
    plt.xlabel(x_name, fontsize = 10)
    plt.ylabel(y_name, fontsize = 10)
    plt.show()
    
    
    
'''
t0, t1 = regression_iter(0, 1, [6,5,3], [5,6,10], 0.01, 2)
print 'theta 0: '+str(t0)
print 'theta 1: '+str(t1)

'''


'''
t0, t1, fx_predict = regression_vec(0, 1, [6,5,3], [5,6,10], 0.01, 500)
print 'theta 0: '+str(t0)
print 'theta 1: '+str(t1)

'''

# t0, t1, fx_predict = regression_vec(0, 1, size, prices, 0.01, 70)
# print('theta 0: '+str(t0))
# print('theta 1: '+str(t1))







theta 0: nan
theta 1: nan




In [9]:
'''
Calculates cost function J
Input: theta_0, theta_1, x, y
Output: cost
'''
def J(theta_0, theta_1, x, y):
    m = len(x)
    
    sum_error = 0
    for i in range(m):
        sum_error += (fx(int(x[i]), theta_0, theta_1) - int(y[i]))**2
    
    return (1.0/(2*m)) * sum_error
    
'''
# results homework example
print J(0, 1, [6,5,3], [5,6,10])   
print J(0.02333, 1.06666, [6,5,3], [5,6,10])    
print J(14.39, -1.59, [6,5,3], [5,6,10])    
print J(15, -1.71, [6,5,3], [5,6,10])    
'''

# # test badrooms no iterations
# print J(0, 1, bathrooms, prices)
# # test badrooms 50 iterations
# print J(31189.03, 155656.13, bathrooms, prices)
# # test badrooms 50 iterations
# print J(311 bathrooms, prices)


'\n# results homework example\nprint J(0, 1, [6,5,3], [5,6,10])   \nprint J(0.02333, 1.06666, [6,5,3], [5,6,10])    \nprint J(14.39, -1.59, [6,5,3], [5,6,10])    \nprint J(15, -1.71, [6,5,3], [5,6,10])    \n'

In [11]:
import math
if __name__ == "__main__":
    #User input for the iterations and learning rate.
    iterations = int(input("How many iterations?"))
    alpha = float(input("What is the learning rate"))
    #Initializes all the arrays that are needed.
    prices = np.array(get_training_data('Price')).astype(np.float)
    bedrooms = np.array(get_training_data('Bedrooms')).astype(np.float)
    bathrooms = np.array(get_training_data('Bathrooms')).astype(np.float)
    theta_0 = 0
    theta_1 = 1
    #Get individual theta's of all the arrays
    theta_0_bed, theta_1_bed, fx_bed = regression_vec(theta_0, theta_1, bedrooms, prices, alpha, iterations)
    theta_0_bath, theta_1_bath, fx_bath = regression_vec(theta_0, theta_1, bathrooms, prices, alpha, iterations)
    theta_0_size, theta_1_size, fx_size = regression_vec(theta_0, theta_1, size, prices, alpha, iterations)
    #Get costs from the retrieved theta's of all the arrays
    cost_bed = J(theta_0_bed, theta_1_bed, bedrooms, prices)
    cost_bath = J(theta_0_bath, theta_1_bath, bathrooms, prices)
    cost_size = J(theta_0_size, theta_1_size, size, prices)
#     print "Cost theta's bedrooms:",cost_bed
#     print "Cost theta's bathrooms:",cost_bed
#     print "Cost theta's size:",cost_bed
    plot_line(size, fx_size, 'size', 'prices')
    plot_line(bathrooms, fx_bath, 'bathrooms', 'prices')
    plot_line(bedrooms, fx_bed, 'bedrooms', 'prices')

How many iterations?10
What is the learning rate0.01


