1. Write a python code to estimate the parameters when we regress Height on Weight

using least-square estimation method. Write a python code to calculate the the value of r-
squared. Write down the output of the given programs. The data set is as follows:

Write a python program to plot the scatter plot and regression line between height and
weight when we regress Height on Weight.

In [None]:
import math
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

def dot(v, w):
    assert len(v) == len(w)
    return sum(v_i * w_i for v_i, w_i in zip(v, w))

def sum_of_squares(v):
    return dot(v, v)

def mean(xs):
    return sum(xs) / len(xs)

def de_mean(xs):
    x_bar = mean(xs)
    return [x - x_bar for x in xs]

def variance(xs):
    assert len(xs) >= 2
    n = len(xs)
    deviations = de_mean(xs)
    return sum_of_squares(deviations) / (n - 1)

def standard_deviation(xs):
    return math.sqrt(variance(xs))

def covariance(xs, ys):
    assert len(xs) == len(ys)
    return dot(de_mean(xs), de_mean(ys)) / (len(xs) - 1)

def correlation(xs, ys):
    stdev_x = standard_deviation(xs)
    stdev_y = standard_deviation(ys)
    if stdev_x > 0 and stdev_y > 0:
        return covariance(xs, ys) / stdev_x / stdev_y
    else:
        return 0

def least_square_fit(x, y):
    beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x)
    alpha = mean(y) - beta * mean(x)
    return alpha, beta

def total_sum_of_squares(y):
    return sum(v ** 2 for v in de_mean(y))

def predict(alpha, beta, x_i):
    return beta * x_i + alpha

def error(alpha, beta, x_i, y_i):
    return predict(alpha, beta, x_i) - y_i

def sum_of_sqerrors(alpha, beta, x, y):
    return sum(error(alpha, beta, x_i, y_i) ** 2 for x_i, y_i in zip(x, y))

def r_squared(alpha, beta, x, y):
    return 1.0 - (sum_of_sqerrors(alpha, beta, x, y) / total_sum_of_squares(y))

def main():
    height = np.array([65.8, 71.51, 69.39, 68.21, 67.78, 68.69, 69.80, 70.01, 67.90])
    weight = np.array([112.99, 136.48, 153.02, 142.33, 144.29, 123.30, 141.29, 136.46, 112.37])
    alpha, beta = least_square_fit(height, weight)
    print('(Alpha, Beta): (', alpha, ',', beta, ')')
    print('R-Squared:', r_squared(alpha, beta, height, weight))
    plt.figure()
    sns.regplot(height, weight, fit_reg=True, color='green')
    plt.scatter(np.mean(height), np.mean(weight), color='blue')
    plt.xlabel('Height')
    plt.ylabel('Weight')
    plt.title('The Scatter Plot And Regression Line Between Height And Weight When We Regress Height On Weight.')

if __name__ == '__main__':
    main()


2. Write a python program to eEstimate the parameters when we regress Height on
Weight using Gradient Descent estimation method. Write a python code to calculate the
value of r-squared.Compare the results obtained using least square method in Q2. The
data set is as follows:

In [None]:
import numpy as np
import random
import tqdm

def predict(alpha, beta, x_i):
    return beta * x_i + alpha

def error(alpha, beta, x_i, y_i):
    return predict(alpha, beta, x_i) - y_i

def sum_of_sqerrors(alpha, beta, x, y):
    return sum(error(alpha, beta, x_i, y_i)**2 for x_i, y_i in zip(x, y))

def scalar_multiply(c, v):
    return [c * v_i for v_i in v]

def add(v, w):
    assert len(v) == len(w)
    return [v_i + w_i for v_i, w_i in zip(v, w)]

def gradient_step(v, gradient, step_size):
    assert len(v) == len(gradient)
    step = scalar_multiply(step_size, gradient)
    return add(v, step)

def mean(xs):
    return sum(xs) / len(xs)

def de_mean(xs):
    x_bar = mean(xs)
    return [x - x_bar for x in xs]

def total_sum_of_squares(y):
    return sum(v**2 for v in de_mean(y))

def r_squared(alpha, beta, x, y):
    return 1.0 - (sum_of_sqerrors(alpha, beta, x, y) / total_sum_of_squares(y))

def main():
    height = np.array([65.8, 71.51, 69.39, 68.21, 67.78, 68.69, 69.80, 70.01, 67.90])
    weight = np.array([112.99, 136.48, 153.02, 142.33, 144.29, 123.30, 141.29, 136.46, 112.37])
    nums_epochs = 10000
    random.seed(0)
    guess = [random.random(), random.random()]
    learning_rate = 0.00001

    with tqdm.trange(nums_epochs) as t:
        for _ in t:
            alpha, beta = guess
            grad_a = sum(2 * error(alpha, beta, x_i, y_i) for x_i, y_i in zip(height, weight))
            grad_b = sum(2 * error(alpha, beta, x_i, y_i) * x_i for x_i, y_i in zip(height, weight))
            loss = sum_of_sqerrors(alpha, beta, height, weight)
            t.set_description(f"Loss: {loss:.3f}")
            guess = gradient_step(guess, [grad_a, grad_b], -learning_rate)

    alpha, beta = guess
    print('(Alpha, Beta):', guess)
    print('R-Squared:', r_squared(alpha, beta, height, weight))

if __name__ == '__main__':
    main()


3. Consider the ”Advertising” data. Import the data (the advertizing data is stored as
”Advertizing.csv”). It contains four columns, these are: ”Sales”, ”TV”, ”Radio”, and
”Newspaper” as predictor variables. Consider ”Sales” as a response variable and ”TV”,
”Radio”, and ”Newspaper” as predictor variables. write a python program to fit the
simple linear regression between ”Sales” and ”TV” , ”Sales” and ”Radio” , ”Sales” and
”Newspaper” for each of these cases and estimate the parameter. Write a python code to
calculate R-sqauared value and compare. Show the output of the programs.

In [None]:
import csv
import math

def mean(xs):
    return sum(xs) / len(xs)

def de_mean(xs):
    x_bar = mean(xs)
    return [x - x_bar for x in xs]

def dot(v, w):
    assert len(v) == len(w)
    return sum(v_i * w_i for v_i, w_i in zip(v, w))

def sum_of_squares(v):
    return dot(v, v)

def variance(xs):
    assert len(xs) >= 2
    n = len(xs)
    deviations = de_mean(xs)
    return sum_of_squares(deviations) / (n - 1)

def standard_deviation(xs):
    return math.sqrt(variance(xs))

def covariance(xs, ys):
    assert len(xs) == len(ys)
    return dot(de_mean(xs), de_mean(ys)) / (len(xs) - 1)

def correlation(xs, ys):
    stdev_x = standard_deviation(xs)
    stdev_y = standard_deviation(ys)
    if stdev_x > 0 and stdev_y > 0:
        return covariance(xs, ys) / stdev_x / stdev_y
    else:
        return 0

def least_square_fit(x, y):
    beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x)
    alpha = mean(y) - beta * mean(x)
    return alpha, beta

def total_sum_of_squares(y):
    return sum(v**2 for v in de_mean(y))

def predict(alpha, beta, x_i):
    return beta * x_i + alpha

def error(alpha, beta, x_i, y_i):
    return predict(alpha, beta, x_i) - y_i

def sum_of_sqerrors(alpha, beta, x, y):
    return sum(error(alpha, beta, x_i, y_i)**2 for x_i, y_i in zip(x, y))

def r_squared(alpha, beta, x, y):
    return 1.0 - (sum_of_sqerrors(alpha, beta, x, y) / total_sum_of_squares(y))

def loadDataset():
    with open('Advertising.csv') as csvfile:
        csv_reader = csv.reader(csvfile)
        header = next(csv_reader)
        print('Header:', header)
        rows = []
        for row in csv_reader:
            rows.append(row)
        return rows

def main():
    data_set = loadDataset()
    Tv = [float(x[1]) for x in data_set]
    Radio = [float(x[2]) for x in data_set]
    Newspaper = [float(x[3]) for x in data_set]
    Sales = [float(x[4]) for x in data_set]

    print('Sales And Tv:')
    alpha, beta = least_square_fit(Sales, Tv)
    print('(Alpha, Beta): (', alpha, ',', beta, ')')
    print('R-Squared:', r_squared(alpha, beta, Sales, Tv))

    print('Sales And Radio:')
    alpha, beta = least_square_fit(Sales, Radio)
    print('(Alpha, Beta): (', alpha, ',', beta, ')')
    print('R-Squared:', r_squared(alpha, beta, Sales, Radio))

    print('Sales & Newspaper:')
    alpha, beta = least_square_fit(Sales, Newspaper)
    print('(Alpha, Beta): (', alpha, ',', beta, ')')
    print('R-Squared:', r_squared(alpha, beta, Sales, Newspaper))

if __name__ == '__main__':
    main()
