# Lab 5

In this lab, you will further improve previous work to reduce the likelihood of implementation errors.

## Example (continued): Computing the mean value

The preceding lab sheet included the following example to compute the mean value of a particular list of integers:

In [2]:
def compute_mean(list_of_integers):
    """
    1. Compute the sum of the input integers
    2. Determine the number of inputs
    3. Divide the the sum computed in step 1 by the number determined in step 2
    """
    computed_sum = sum(list_of_integers)
    number_of_inputs = len(list_of_integers)
    return computed_sum / number_of_inputs

# Compute the mean of a collection of input values
input_integers = [10, 20, 25, 1, 3]
print(compute_mean(input_integers))

def read_integers_from_file(filename):
    """
    Read the integers contained in the first line of `filename`,
    assuming they are separated by commas
    """
    with open(filename) as f:
        first_line = f.readlines()[0].rstrip()
        return [int(i) for i in first_line.split(",")]

print(compute_mean(read_integers_from_file("test.txt")))

11.8
45446.2375


In [5]:
# Adding unit tests
def run_unit_tests():
    result_of_compute_mean = compute_mean([1, 2])
    assert(result_of_compute_mean == 1.5)
    assert(compute_mean([42]) == 42)
    assert(read_integers_from_file("test.txt")[0] == 58181)
    assert(read_integers_from_file("test.txt")[0] != 42)
    
run_unit_tests()

def integration_tests():
    assert(round(compute_mean(read_integers_from_file("test.txt")), 2) == 45446.24)
    
integration_tests()    

## Task 1: Add unit tests to your linear-regression implementation
Review your own prior work done in Labs 3 and 4, and add test harnesses for each function. Specifically consider borderline cases.

In [1]:
#use dataset used in coursework
dataset = [[5,20],[6,22],[8,33],[10,30],[12,28],[13,34],[15,40]]

def write_to_file(dataset, filename, mode = 'a', delimiter = ','):    
    with open(filename, mode) as data:
        data_new = [str(value[0]) + delimiter + str(value[1]) +'\n' for value in dataset]
        data.writelines(data_new)
    #test for unit test:
    return data.closed==True #return if file is closed successfully
write_to_file(dataset,'linear_regress.txt', mode = 'w')

True

In [2]:
#Revised solution: with help from Michael
def process_line(line, delimiter):
    line = line.rstrip().split(delimiter)
    return [int(item) for item in line]

def read_from_file(filename, file_has_headers = False, delimiter = ','):
    with open(filename) as f:
        return [process_line(line, delimiter) for line in f.readlines()]

In [3]:
def compute_mean(values_list):
    return sum(values_list)/len(values_list)

def compute_covariance(list_x, x_mean, list_y, y_mean):
    x_subtracted = [val-x_mean for val in list_x]
    y_subtracted = [val-y_mean for val in list_y]
    return sum([x_subtracted[i] * y_subtracted [i] for i in range(len(x_subtracted))])

def variance(list_x, x_mean):
    return sum([(x_i - x_mean)**2 for x_i in list_x])

def linear_regression(filename):
    
    #Get data from file in format [[x1,y1],[x2,y2]..]
    list_xy = read_from_file(filename)
    
    #Get the x and y values, and the number (n) of x or y values (assume that a dataset is clean and number of x values and y values are equal)
    list_x = [list_xy[i][0] for i in range(len(list_xy))]
    list_y = [list_xy[i][1] for i in range(len(list_xy))]
    
    #Divide sum of x values and sum of values by $n$ to get $\bar x$ and $\bar y$ respectively
    x_mean = compute_mean(list_x)
    y_mean = compute_mean(list_y)
    
    #compute alpha and beta
    alpha = compute_covariance(list_x, x_mean, list_y, y_mean)/ variance(list_x, x_mean)
    beta = y_mean - (alpha * x_mean)
    
    #Outputs: $\alpha$ and $\beta$    
    return alpha, beta
#linear_regression('linear_regress.txt')

In [4]:
linear_regression('linear_regress.txt')

(1.612068965517241, 13.681034482758625)

In [55]:
test_dataset = [[1,2],[2,4],[3,6]]
test_listx = [1,2,3]
test_listy = [2,4,6]
def run_unit_tests():
    assert write_to_file(test_dataset, 'test_data.txt', mode = 'w') == True, "can't write to file, file is still open"
    assert process_line('1\n,2\n,3\n,4\n',',')==[1,2,3,4]
    assert compute_mean([1,2,3]) == 2
    assert compute_covariance([1, 3], 2, [4, 6], 5) == 2
    assert variance([2,2],2)==0
run_unit_tests()
    

## Task 2: Turn the existing, fixed inputs into an integration test
Use the work of Task 4 of Lab 3 to construct an integration-test harness for your linear-regression implementation.

In [68]:
def run_integration_tests():
    assert read_from_file('test_data.txt')[0]==[1,2]
    xy = [[1,1],[2,3],[3,2],[4,3],[5,5]]
    write_to_file(xy,'integration_test_linear_reg.txt', mode = 'w')
    alpha, beta = linear_regression('integration_test_linear_reg.txt')
    # note that equality tests over floating point numbers can be fragile
    assert alpha == 0.8
    assert beta <= 0.4 and beta >= 0.399
run_integration_tests()