In [1]:
# example with classes
class collatz:
    def __init__(self,x=1):
        self.x = x
    def f(self):
        if self.x%2 == 0:
            return self.x/2
        else:
            return 3*self.x+1
    def __iter__(self):
        return self
    def __next__(self):
        self.x = self.f()
        return self.x

In [9]:
myclass = collatz(x=10)

In [10]:
myclass = collatz(x=1630877)

In [11]:
i = 1
while next(myclass)>1:
    i += 1
print('The Collatz conjecture worked in: '+str(i),' iterations.')
                                    

The Collatz conjecture worked in: 207  iterations.


In [12]:
i

207

In [16]:
## examples for gradient descent to solve a linear regression problem
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [17]:
# get some data
data = pd.read_csv("Advertising.csv")

In [18]:
data

Unnamed: 0.1,Unnamed: 0,TV,Radio,Newspaper,Sales
0,1,230.1,37.8,69.2,22.1
1,2,44.5,39.3,45.1,10.4
2,3,17.2,45.9,69.3,9.3
3,4,151.5,41.3,58.5,18.5
4,5,180.8,10.8,58.4,12.9
...,...,...,...,...,...
195,196,38.2,3.7,13.8,7.6
196,197,94.2,4.9,8.1,9.7
197,198,177.0,9.3,6.4,12.8
198,199,283.6,42.0,66.2,25.5


In [19]:
data.drop(columns='Unnamed: 0',inplace=True)

In [20]:
data

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9
...,...,...,...,...
195,38.2,3.7,13.8,7.6
196,94.2,4.9,8.1,9.7
197,177.0,9.3,6.4,12.8
198,283.6,42.0,66.2,25.5


In [21]:
# predict sales by using the TV, Radio, and Newspaper
# Sales ~ w1*TV + w2*Radio + w3*Newspaper +w0

In [22]:
x = data.drop(columns=['Sales']).values
y = data['Sales'].values

In [23]:
# create an augmented matrix
x1 = np.column_stack([np.ones(len(x)),x])

In [24]:
# code the gradient descent
def f(w):
    errors = y - x1@w
    L = 1/(len(x1))*sum(errors**2)
    gradL = -2/(len(x1))*errors@x1
    return L, gradL

In [25]:
w = np.random.normal(0,1,size=x1.shape[1])

In [26]:
L, grad = f(w)

In [27]:
grad

array([  -342.00125799, -70906.91411708,  -7728.47493403,  -9792.92691702])

In [32]:
lr = 0.00003

In [34]:
tolerance = 1e-6
N = 230000
maxiter = N
w_old = w
while maxiter>0:
    L, grad = f(w_old)
    w_new = w_old - lr*grad
    if sum(abs(w_new-w_old))<tolerance:
        print('Convergence was achieved for the given tolerance')
        break
    if (N - maxiter)%5000 == 0:
        print('Objective value: ','%.4f'%L, 'Iteration: ','%4i'% (N-maxiter))
    w_old = w_new
    maxiter -= 1

Objective value:  44266.7779 Iteration:     0
Objective value:  2.8617 Iteration:  5000
Objective value:  2.8552 Iteration:  10000
Objective value:  2.8492 Iteration:  15000
Objective value:  2.8437 Iteration:  20000
Objective value:  2.8387 Iteration:  25000
Objective value:  2.8342 Iteration:  30000
Objective value:  2.8300 Iteration:  35000
Objective value:  2.8261 Iteration:  40000
Objective value:  2.8226 Iteration:  45000
Objective value:  2.8194 Iteration:  50000
Objective value:  2.8164 Iteration:  55000
Objective value:  2.8137 Iteration:  60000
Objective value:  2.8112 Iteration:  65000
Objective value:  2.8090 Iteration:  70000
Objective value:  2.8069 Iteration:  75000
Objective value:  2.8050 Iteration:  80000
Objective value:  2.8032 Iteration:  85000
Objective value:  2.8016 Iteration:  90000
Objective value:  2.8002 Iteration:  95000
Objective value:  2.7988 Iteration:  100000
Objective value:  2.7976 Iteration:  105000
Objective value:  2.7964 Iteration:  110000
Object

In [35]:
w_new

array([ 2.82699094e+00,  4.60702271e-02,  1.89813671e-01, -3.57819860e-04])

In [36]:
# turn to "canned food" 
from sklearn.linear_model import LinearRegression

In [37]:
model = LinearRegression(fit_intercept=False)

In [38]:
model.fit(x1,y)

In [39]:
model.coef_

array([ 2.93888937e+00,  4.57646455e-02,  1.88530017e-01, -1.03749304e-03])

In [95]:
from sklearn.preprocessing import StandardScaler

In [96]:
scale = StandardScaler()

In [97]:
xs = scale.fit_transform(x)

In [99]:
# we do not want an intercept
yc = y - np.mean(y)

In [100]:
# yc = xs*w
# we apply the theoretical OLS
np.linalg.det(xs.T@xs)

6964771.207383833

In [101]:
w = np.linalg.inv(xs.T@xs)@xs.T@yc

In [102]:
w

array([ 3.91925365,  2.79206274, -0.02253861])

In [103]:
model = LinearRegression(fit_intercept=False)
model.fit(xs,yc)
model.coef_

array([ 3.91925365,  2.79206274, -0.02253861])

In [104]:
# show how regularization works
fake = 2*xs[:,0]+3*xs[:,1]

In [105]:
x_rd = np.column_stack([xs,fake])

In [106]:
np.linalg.det(x_rd.T@x_rd)

9.501633768264364e-06

In [107]:
np.linalg.inv(x_rd.T@x_rd)

array([[ 2.93203101e+12,  4.39804651e+12,  1.57547936e-03,
        -1.46601550e+12],
       [ 4.39804651e+12,  6.59706977e+12,  6.68179442e-04,
        -2.19902326e+12],
       [ 1.36860710e-03,  3.57871056e-04,  5.72593689e-03,
        -7.91241511e-04],
       [-1.46601550e+12, -2.19902326e+12, -8.94677640e-04,
         7.33007752e+11]])

In [110]:
np.linalg.det(x_rd.T@x_rd+0.0001*np.eye(x_rd.shape[1]))

9750.690941565117

In [112]:
w = np.linalg.inv(x_rd.T@x_rd+0.0001*np.eye(x_rd.shape[1]))@x_rd.T@yc

In [113]:
w

array([ 1.60286782, -0.68251437, -0.0225387 ,  1.15819251])

In [114]:
from sklearn.linear_model import Ridge

In [115]:
model = Ridge(alpha=0.0001,fit_intercept=False)
model.fit(x_rd,yc)
model.coef_

array([ 1.60286782, -0.68251437, -0.0225387 ,  1.15819251])