In [1]:
import pandas as pd

In [2]:
class Datapoint():
    def __init__(self, km, price):
        self.km = km
        self.price = price

    def __str__(self):
        return (f"km: {self.km}, price {self.price}")
        
    def __repr__(self):
        return (f"km: {self.km}, price {self.price}")


In [3]:
class Dataset():
    def __init__(self, data = None, path = None):
        self.data = data
        self.index = 0
        if (path != None):
            self.import_csv(path)


    def import_csv(self, path):
        self.data = pd.read_csv(path)


    def __getitem__(self, i):
        return Datapoint(self.data["km"][i], self.data["price"][i])
    

    def __len__(self):
        return (self.data.shape[0])
    

    def __iter__(self):
        return (self)
    

    def __next__(self):
        if self.index < len(self.data):
            result = self[self.index]
            self.index += 1
            return(result)
        self.index = 0
        raise StopIteration

In [34]:
class Irma():
    def __init__(self, dataset):
        self.dataset = dataset 
        self.theta0 = 0.0
        self.theta1 = 0.0
        self.oldtheta0 = 0.0
        self.oldtheta1 = 0.0
        self.middletheta0 = 0.0
        self.middletheta1 = 0.0
        self.learning_rate = 0.0001
        self.learning_rate_decay = 1.0 / 20
        self.minimal_improvement = 0.001
        self.newcost = 0
        self.oldcost = 0
        self.middle_cost = 0


    def predict_price(self, theta0, theta1, km):
        return (theta0 + (theta1 * km))


    def error_datapoint(self, datapoint, theta0, theta1):
        error = self.predict_price(theta0, theta1, datapoint.km) - datapoint.price
        return (error)


    def squared_error_datapoint(self, datapoint, theta0, theta1):
        return ((self.error_datapoint(datapoint, theta0, theta1))**2)
    

    def mean_squared_error_dataset(self, theta0, theta1):
        total_error = 0.0
        for elem in self.dataset:
            total_error += self.squared_error_datapoint(elem, theta0, theta1)
        return (total_error / (2 * len(self.dataset)))
    
    def middle_thetas(self):
        self.middle_theta0 = (self.theta0 + self.oldtheta0) / 2
        self.middle_theta1 = (self.theta1 + self.oldtheta1) / 2

    def update_middle_error(self):
        self.middle_thetas()
        self.middle_cost = self.mean_squared_error_dataset(self.middle_theta0, self.middle_theta1)

    def update_thetas(self):
        self.oldtheta0 = self.theta0
        self.oldtheta1 = self.theta1
        sum_errors_t0 = 0.0
        sum_errors_t1 = 0.0
        for elem in self.dataset:
            sum_errors_t0 += self.error_datapoint(elem, self.theta0, self.theta1)
            sum_errors_t1 += (self.error_datapoint(elem, self.theta0, self.theta1) * elem.km)
        temp0 = self.theta0 - (self.learning_rate / len(self.dataset) * sum_errors_t0)
        temp1 = self.theta1 - (self.learning_rate / len(self.dataset) * sum_errors_t1)
        self.theta0 = temp0
        self.theta1 = temp1
    
    
    def should_i_keep_learning(self):
        cost_is_changing = abs(self.oldcost - self.newcost) > self.minimal_improvement
        self.update_middle_error()
        if (cost_is_changing == False):
            if (abs(self.oldcost - self.middle_cost) > self.minimal_improvement):
                if self.middle_cost < self.oldcost:
                    self.theta0, self.theta1 = self.middle_theta0, self.middle_theta1
                return True
        return cost_is_changing
    
    def decrease_and_assign(self, theta0, theta1):
        self.learning_rate = self.learning_rate * self.learning_rate_decay
        self.theta0, self.theta1 = theta0, theta1

    def training_loop(self):
        go_on = True
        self.newcost = self.mean_squared_error_dataset(self.theta0, self.theta1)
        print(self)
        i = 0
        while (go_on):
            self.oldcost = self.newcost
            self.update_thetas()
            self.newcost = self.mean_squared_error_dataset(self.theta0, self.theta1)
            go_on = self.should_i_keep_learning()
            if (self.middle_cost < self.newcost and self.middle_cost < self.oldcost):
                self.decrease_and_assign(self.middle_theta0, self.middle_theta1)
            if (self.newcost > self.oldcost):
                self.decrease_and_assign(self.oldtheta0, self.oldtheta1)
            print(self)
            i += 1


    def __str__(self):
        return (f"error: {self.newcost}, \tthetas: [{self.theta0}, {self.theta1}], lr: {self.learning_rate}")

    def __repr__(self):
        return (f"error: {self.newcost}, \tthetas: [{self.theta0}, {self.theta1}], lr: {self.learning_rate}")

In [35]:
datasetto =  Dataset(path = "data.csv")
irma = Irma(datasetto)

In [36]:
irma.training_loop()

error: 20880519.291666668, 	thetas: [0.0, 0.0], lr: 0.0001
error: 2.1870210985011356e+19, 	thetas: [0.0, 0.0], lr: 5e-06
error: 5.467391355042436e+16, 	thetas: [0.015829583333333334, 1457.256313541667], lr: 2.5000000000000004e-07
error: 1.4148171068379791e+23, 	thetas: [0.015829583333333334, 1457.256313541667], lr: 1.2500000000000003e-08
error: 3.495390098212294e+20, 	thetas: [-0.9046272859227715, -115787.7184912073], lr: 6.250000000000002e-10
error: 4.284040438033102e+21, 	thetas: [-0.9046272859227715, -115787.7184912073], lr: 3.1250000000000015e-11
error: 3.082937443591835e+19, 	thetas: [-0.5389323847758982, -69207.18509200457], lr: 3.1250000000000015e-11
error: 1.10139301651828e+19, 	thetas: [-0.3203537284422396, -41365.64466658989], lr: 3.1250000000000015e-11
error: 3.9347751909714734e+18, 	thetas: [-0.18970757477228864, -24724.543401887193], lr: 3.1250000000000015e-11
error: 1.4057158136395868e+18, 	thetas: [-0.1116193492531004, -14778.03013176286], lr: 3.1250000000000015e-11
erro

In [41]:
import matplotlib.pyplot as plt

In [43]:
kms = [elem.km for elem in datasetto]
prices = [elem.prices for elem in datasetto]

print(kms)
print(prices)

AttributeError: 'Datapoint' object has no attribute 'prices'