In [13]:
import random
from math import *
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## 1.

In [22]:
class MLR:
    def __init__(self, beta, alpha, iterations):
        self.X = []
        self.Y = []
        self.beta = beta
        self.alpha = alpha
        self.iterations = iterations
        
    def generateData(self):
        x1 = [random.random() for i in range(500)]
        x2 = [random.random() for i in range(500)]
        
        for i in range(500):
            y = self.beta[0] + self.beta[1]*x1[i] + self.beta[2]*x2[i]
            self.X.append([1, x1[i], x2[i]])
            self.Y.append(y)
        
        self.X = np.array(self.X)
        self.Y = np.array(self.Y)
        
    def findH(self, X, beta):
        return X[0]*beta[0] + X[1]*beta[1] + X[2]*beta[2]
        
    def findCost(self, beta):
        costSum = 0
        m = len(self.Y)
        for i in range(m):
            h = self.findH(self.X[i], beta)
            costSum += (self.Y[i] - h)**2 
            
        return costSum/2

    def batchGradientDescent(self):
        m = len(self.Y)
        estBeta = np.array([random.random(), random.random(), random.random()])
        
        costs = []
        cost = self.findCost(estBeta)
        costs.append(cost)
        
        for i in range(self.iterations):
            for j in range(self.X.shape[1]):
                estBeta[j] = estBeta[j] - (self.alpha/2)*(sum([(self.findH(self.X[i], estBeta) - self.Y[i])*self.X[i][j] for i in range(m)]))
                
            cost = self.findCost(estBeta)
            costs.append(cost)

        print(f"Estimated values of B0 = {estBeta[0]}, B1 = {estBeta[1]}, B2 = {estBeta[2]}")
                
    
    def stochasticGradientDescent(self):
        m = len(self.Y)
        estBeta = np.array([random.random(), random.random(), random.random()])
        
        costs = []
        cost = self.findCost(estBeta)
        costs.append(cost)
        
        for i in range(m):
            for j in range(self.X.shape[1]):
                estBeta[j] = estBeta[j] - (self.alpha)*(sum([(self.findH(self.X[i], estBeta) - self.Y[i])*self.X[i][j] for i in range(m)]))
                
                cost = self.findCost(estBeta)/m
                costs.append(cost)

        print(f"Estimated values of B0 = {estBeta[0]}, B1 = {estBeta[1]}, B2 = {estBeta[2]}\n")

In [23]:
givenBeta = [
                [2, 4, 6],
                [3, 5, 7],
                [5, 12, 13],
                [8, 15, 17],
                [7, 24, 25]
            ]
for beta in givenBeta:
    print("For beta:", beta)
    mlr = MLR(beta, 0.001, 500)
    mlr.generateData()
    
    print("---- Batch gradient descent ----")
    mlr.batchGradientDescent()
    print("---- Stochastic gradient descent ----")
    mlr.stochasticGradientDescent()

For beta: [2, 4, 6]
---- Batch gradient descent ----
Estimated values of B0 = 2.001499793748013, B1 = 3.9983673376760738, B2 = 5.998804008301194
---- Stochastic gradient descent ----
Estimated values of B0 = 2.000000220192475, B1 = 3.999999773253872, B2 = 5.999999813016611

For beta: [3, 5, 7]
---- Batch gradient descent ----
Estimated values of B0 = 3.002222196595533, B1 = 4.997914871188922, B2 = 6.997888641720039
---- Stochastic gradient descent ----
Estimated values of B0 = 3.0000005345234944, B1 = 4.999999534731505, B2 = 6.99999945968483

For beta: [5, 12, 13]
---- Batch gradient descent ----
Estimated values of B0 = 5.00922269995871, B1 = 11.992692731704437, B2 = 12.98996323659179
---- Stochastic gradient descent ----
Estimated values of B0 = 5.0000037277426905, B1 = 11.999997432418168, B2 = 12.99999560337281

For beta: [8, 15, 17]
---- Batch gradient descent ----
Estimated values of B0 = 8.00456061820785, B1 = 14.996096971317844, B2 = 16.995491114748933
---- Stochastic gradient d

## 2.

In [30]:
class RainMLR():

    def __init__(self, alpha, iterations):
        self.iterations = iterations
        self.alpha = alpha
        self.features = []
        self.results = []
        self.predictions = []
        self.X = []
        self.Y = []

    def generateDataSet(self):
        df = pd.read_csv("weather.csv")
        results = 'PrecipitationSumInches'
        features = ['TempAvgF', 'DewPointAvgF', 'HumidityAvgPercent', 'SeaLevelPressureAvgInches', 'VisibilityAvgMiles', 'WindAvgMPH']
        for col in df.columns:
            if col not in results and col not in features:
                df.drop(col, axis=1, inplace=True)

        df = df[df[results]!="T"]
        for col in features:
            df = df[df[col]!="-"]

        for col in df.columns:
            if col != results:
                df[col] = (df[col].astype('float') - df[col].astype('float').min()) \
                                /(df[col].astype('float').max() - df[col].astype('float').min())

        bias = np.array([1 for _ in range(0, df.shape[0])])
        tempX = np.asarray(df[features])

        X = np.matrix(np.c_[bias.T, tempX], dtype=float)
        Y = np.matrix(df[results], dtype=float).T
        
        self.results = results
        self.features = features
        return X, Y

    def findH(self, x, beta):
        h = np.sum([x[i]*beta[i] for i in range(len(x))])
        return h
    
    def findCost(self, X, Y, estBeta):
        cost = np.sum([(self.findH(X[i], estBeta)-Y[i])**2 for i in range(len(Y))])/2*len(Y)
        return cost

    def batchGradientDescent(self):
        X, Y = self.generateDataSet()
        self.X = np.array(X)
        X = np.array(X)
        self.Y = np.array(Y)
        Y = np.array(Y)
        estBeta = np.array([random.random() for _ in range(X.shape[1])])

        costs = []
        cost = self.findCost(X, Y, estBeta)
        costs.append(cost)
        
        for i in range(self.iterations):
            for j in range(X.shape[1]):
                estBeta[j] = estBeta[j] - (self.alpha/len(Y))*(np.sum([(self.findH(X[i],estBeta) - Y[i])*X[i][j] for i in range(len(Y))]))
            
            cost = self.findCost(X, Y, estBeta)
            costs.append(cost)

        for i in range(len(self.features)):
            print(f"Estimated Beta for {self.features[i]}: {estBeta[i]}")
            
        return estBeta
        
    def predict(self):
        predictions = []
        estimatedBeta = self.batchGradientDescent()
        for x in self.X: 
            predictions.append(self.findH(x, estimatedBeta))
            
        self.predictions = predictions
    
    def evaluate(self):
        # R2 METRIC
        sst = 0
        ssr = 0
        sse = 0
        
        y_bar = np.mean(self.Y)
        
        predictions = self.predictions
        
        for y in predictions:
            ssr += (y - y_bar)**2
        
        i = 0
        for y in self.Y:
            sse += (y - predictions[i])**2
            i += 1
            
        for y in self.Y:
            sst += (y - y_bar)**2
            
        print("\nSSE: ", sse)
        print("SSR: ", ssr)
        print("SSR + SSE: ", ssr+sse)
        print("SSTO: ", sst)
        
        r2 = 1 - ssr/(ssr + sse)
        print("R2 Metric: ", r2)
        

In [31]:
model = RainMLR(0.1, 500)
model.predict()
model.evaluate()

Estimated Beta for TempAvgF: 0.14045783806389292
Estimated Beta for DewPointAvgF: -0.20981449079940775
Estimated Beta for HumidityAvgPercent: 0.41890940159642814
Estimated Beta for SeaLevelPressureAvgInches: 0.3635947886108872
Estimated Beta for VisibilityAvgMiles: 0.11109872053645395
Estimated Beta for WindAvgMPH: -0.5829797555253614

SSE:  [192.11733877]
SSR:  43.985948982487926
SSR + SSE:  [236.10328775]
SSTO:  [239.69267291]
R2 Metric:  [0.8137004]
