In [1]:
import numpy as np
from scipy.spatial import distance
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

In [5]:
adver_data = pd.read_csv('data/advertising.csv')
X = adver_data[['TV','Radio','Newspaper']].values
y = adver_data['Sales'].values
means, stds = np.mean(X,axis=0),np.std(X,axis=0)
X_scaled=(X-means)/stds
X_scaled_df=pd.DataFrame(X_scaled,columns=['TV','Radio','Newspaper'])
X_scaled_df['w0']=1

In [37]:
class SGDregression2:
    def __init__(self,random_state=42,eta=0.01,n_iter=1e4,alpha=1e-8):
        """
        SGDRegressor with eulcidian distance metric and func loss is MSE
        """
        self.__random_state = random_state
        self.__eta = eta
        self.__n_iter = n_iter
        self.__alpha = alpha
    
    def mserror(self,y, y_pred):
        return np.sum((y-y_pred)**2)/len(y)
    
    def step(self,X, y, w, index):
        x = X[index]
        y = y[index]
        err = np.dot(x,w) - y
        grad = 2/len(X)*np.dot(x.T,err)
        return w - self.__eta* grad
    
    def fit(self,X,y):
        """
        X - must be numpy array
        y - must be numpy array or Series
        """
        #initial shit
        w = np.zeros(X.shape[1])
        weight_dist = np.inf
        #list of errors
        self.__errors = []
        #iteration counter
        iter_num = 0
        #sets random seed for algo
        np.random.seed(self.__random_state)

        while weight_dist > self.__alpha and iter_num < self.__n_iter:
            #random index from objects
            random_ind = np.random.randint(len(X))
            #counts next weights
            w_next=self.step(X, y, w, random_ind)
            #counts distance
            weight_dist=distance.euclidean(w_next,w)
            #counts error
            error = self.mserror(y,np.dot(X,w_next))
            #appends eror
            self.__errors.append(error)
            #next step
            w=w_next
            iter_num+=1
        self.__model_weights = w
        self.__n_iters = iter_num
        return self.__model_weights
    
    @property
    def errors(self):
        return self.__errors
    @property
    def n_iters(self):
        return self.__n_iters
    @property
    def w(self):
        return self.__model_weights


test = SGDregression2()