In [1]:
SEED = 666

from random import normalvariate
from random import uniform
from math import sqrt

def generate_dataset(n, x_min, x_max, slope, intercept, e_mean, e_std):
    """
    Generates a dataset according to the input parameters
    
    Keyword Arguments:
    n:            number of observations
    x_min, x_max: limits of the preditor's range (x_min <= x <= x_max)
    slope:        slope of the true linear model
    intercept:    intercept of the true linear model
    e_mean:       error term mean
    e_std:        error term standard deviation
    
    Output:
    X: predictor values
    Y: response values
    """
    
    X = [uniform(x_min,x_max) for _ in range(n)]
    Y = [intercept + slope * x + normalvariate(e_mean,e_std) for x in X]
    
    return X, Y
    
def mean(L):
    """Returns the mean of the elements of a list."""
    return sum(L)/len(L)

def linear_regression(X, Y):
    """
    Simple linear regression Y on X.
    
    
    Keyword Arguments:
    X -- the predictor
    Y -- the response
    """
    
    n = len(X)
    X_mean, Y_mean  = mean(X), mean(Y)
    
    slope_numerator = sum([(x - X_mean) * (y - Y_mean) for x, y in zip(X,Y)])
    slope_denominator = sum([(x - X_mean)**2 for x in X])
    
    slope = slope_numerator / slope_denominator
    intercept = Y_mean - slope * X_mean
    
    RSS = sum([(y - (intercept + slope * x))**2 for x, y in zip(X,Y)])
    RSE = sqrt(RSS / (n - 2))
    
    
    SE_slope = sqrt((RSE**2) * ((1/n) + (X_mean**2 / slope_denominator)))
    SE_intercept = sqrt((RSE**2) / slope_denominator)
    
    
    
    return slope, intercept, RSS, SE_slope, SE_intercept

In [7]:
linear_regression([0,1,2,3,4],[1,3,5,7,9])

X, Y = generate_dataset(100,-10,10,1,0,0,0.5)

linear_regression(X,Y)

(0.9994748010471696,
 -0.029963914417846516,
 19.19228365708341,
 0.04425711949807501,
 0.0075978552737142845)

In [4]:
help(mean)
help(linear_regression)
help(generate_dataset)

Help on function mean in module __main__:

mean(L)
    Returns the mean of the elements of a list.

Help on function linear_regression in module __main__:

linear_regression(X, Y)
    Simple linear regression Y on X.
    
    
    Keyword Arguments:
    X -- the predictor
    Y -- the response

Help on function generate_dataset in module __main__:

generate_dataset(n, x_min, x_max, slope, intercept, e_mean, e_std)
    Generates a dataset according to the input parameters
    
    Keyword Arguments:
    n: number of observations
    slope: slope of the true linear model
    intercept: intercept of the true linear model
    e_mean: error term mean
    e_std: error term standard deviation
    
    Output:
    X: predictor values
    Y: response values

