# Playground AML

In [4]:
from __future__ import print_function, absolute_import, division # You don't need to know what this is. 
import numpy as np # this imports numpy, which is used for vector- and matrix calculations
import pandas as pd

### Linear mapping

In [5]:
def linear_forward(x_input, W, b):
    """Perform the mapping of the input
    # Arguments
        x_input: input of the linear function - np.array of size `(n_objects, n_in)`
        W: np.array of size `(n_in, n_out)`
        b: np.array of size `(n_out,)`
    # Output
        the output of the linear function 
        np.array of size `(n_objects, n_out)`
    """
    output = np.add(np.dot(x_input, W),b)
    return output

In [6]:
X_test = np.array([[1, -1],
                   [-1, 0],
                   [1, 1]])

W_test = np.array([[4],
                   [2]])

b_test = np.array([3])

print(X_test.shape)
print(W_test.shape)
print(b_test.shape)


(3, 2)
(2, 1)
(1,)


In [7]:
test = linear_forward(X_test, W_test, b_test)
test

array([[ 5],
       [-1],
       [ 9]])

In [5]:
test.shape

(3, 1)

### Linear Grad W

In [8]:
X_test = np.array([[ 3.52700412, -5.26063245, -1.44725797, -1.75845931,  7.17111801 , 6.98000586],
                  [ 0.96428013, -5.39966807 , 0.11565689 , 7.52925355 ,-8.42978878 ,-5.65760436],
                  [ 7.66748717 , 8.04351906, -5.27123896 , 5.78939988 , 4.26351951, -4.11783328]])

W_test = np.array([[-7.19425403],
     [ 0.52886574],
     [-7.19402885],
     [ 6.96126278],
     [ 1.7717694 ],
     [ 4.46366547]])

b_test = np.array( [-5.06889404])

grad_out = np.array([[1.84038361],
                    [8.87623311],
                     [3.63320643]])



In [7]:
print("X", X_test.shape)
print("W", W_test.shape)
print("b", b_test.shape)
print("grad", grad_out.shape)

X (3, 6)
W (6, 1)
b (1,)
grad (3, 1)


In [8]:
test1 = linear_forward(X_test, W_test, b_test)
test1

array([[ 8.80727092],
       [-3.47009124],
       [11.41951938]])

In [9]:
test1.shape

(3, 1)

In [10]:
grad_out * X_test

array([[  6.49104057,  -9.68158174,  -2.66350985,  -3.23623969,
         13.19760805,  12.84588838],
       [  8.55917522, -47.92871251,   1.02659752,  66.83140965,
        -74.82477028, -50.21821514],
       [ 27.85756369,  29.22376517, -19.15149928,  21.03408487,
         15.4902465 , -14.96093835]])

In [11]:
test2 = np.subtract(test1,grad_out)
test2

array([[  6.96688731],
       [-12.34632435],
       [  7.78631295]])

In [12]:
test3 = np.transpose(X_test).dot(test2)
test3

array([[ 72.36837968],
       [ 92.64517658],
       [-52.55433686],
       [-60.131515  ],
       [187.23437472],
       [ 86.41679409]])

In [13]:
test3.shape

(6, 1)

In [14]:
np.log(np.exp(6))

6.0

In [15]:
a = np.array([[4],
            [2]])

b = np.array([[3],
            [2]])

In [16]:
np.multiply(a,b)

array([[12],
       [ 4]])

In [17]:
a.size

2

In [18]:
a = np.array([[-1.],
 [-3.],
 [1.],
 [4.],
 [20.],
 [-21.],
 [0.],
 [1.],
 [3.],
 [-2.],
 [1.]])

In [19]:
a[np.argmin(a)]

array([-21.])

In [43]:
X = np.array([[-5.15396609, -6.29406452],
 [-7.86516278,  7.82321484],
 [-4.0106841 ,  1.73861527],
 [-2.15348225, -4.51898921],
 [ 9.55703099, -4.0612693 ],
 [ 8.63890007 ,-1.79864081],
 [-6.52169324, -6.19049942],
 [ 1.8203589 ,  5.37819601],
 [ 6.27816448, -3.69200738]])

Y = np.array([[0.],
 [0.],
 [1.],
 [0.],
 [0.],
 [0.],
 [0.],
 [1.],
 [0.]])

feature_index = 1
split_value = -2.745324097923354

In [21]:
#Y_list = []
#for i in np.arange(X.shape[1]):
#    ind = X[:,i] < split_value
#    print(X[ind, i])
#    print(Y[ind])
#    Y_ind += Y[ind]

In [62]:
bla = X[0,]

In [64]:
type(bla)

numpy.ndarray

In [23]:
xs = X[X < split_value]

In [24]:
for i in np.arange(X.shape[1]):
    #print(X[:,i])
    #print([X[:,i] < split_value])
    ind = [X[:,i] < split_value]
    #print(Y[tuple(ind)])
    if i == 0:
        ys = Y[tuple(ind)]
    else:
        current = Y[tuple(ind)]
        ys = np.vstack([ys,current])

In [25]:
print(xs, ys)

[-5.15396609 -6.29406452 -7.86516278 -4.0106841  -4.51898921 -4.0612693
 -6.52169324 -6.19049942 -3.69200738] [[0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]]


In [26]:
xs.shape

(9,)

In [27]:
ys.shape

(9, 1)

In [28]:
ys = ys.reshape(X.shape[0],1)

In [29]:
ys.shape

(9, 1)

In [30]:
xs = xs.reshape(X.shape[0],1)

In [31]:
xs.shape

(9, 1)

In [32]:
np.concatenate([xs, ys], axis=-1)

array([[-5.15396609,  0.        ],
       [-6.29406452,  0.        ],
       [-7.86516278,  1.        ],
       [-4.0106841 ,  0.        ],
       [-4.51898921,  0.        ],
       [-4.0612693 ,  0.        ],
       [-6.52169324,  0.        ],
       [-6.19049942,  0.        ],
       [-3.69200738,  0.        ]])

In [58]:
a = np.array([[0.],
 [0.],
 [1.],
 [0.]])

b = np.array([[0.],
 [0.],
 [1.],
 [0.]])

In [59]:
a = a.reshape(4,)
b = b.reshape(4,)

In [61]:
np.vstack([a,b])

array([[0., 0., 1., 0.],
       [0., 0., 1., 0.]])

In [36]:
X.shape

(9, 2)

In [40]:
emp_arr = np.empty(dat.shape)

In [39]:
dat = np.concatenate([X, Y], axis=-1)
dat.shape

(9, 3)

In [None]:
for feature_index in range(X.shape[1]):
    for rind in np.arange(X.shape[0]):
        if X[rind, feature_index] < split_value:
            if rind
            emp_arr[rind, ] = dat[rind, :]

In [42]:
emp_arr

array([[-0.00000000e+000,  1.73060448e-077,  7.90505033e-323],
       [ 0.00000000e+000,  0.00000000e+000,  0.00000000e+000],
       [ 0.00000000e+000,  0.00000000e+000,  0.00000000e+000],
       [ 0.00000000e+000,  0.00000000e+000,  0.00000000e+000],
       [ 0.00000000e+000,  0.00000000e+000, -0.00000000e+000],
       [-0.00000000e+000, -4.44659081e-323, -0.00000000e+000],
       [ 3.95252517e-323,  0.00000000e+000,  0.00000000e+000],
       [ 0.00000000e+000,  0.00000000e+000,  0.00000000e+000],
       [ 0.00000000e+000,  0.00000000e+000,  0.00000000e+000]])

In [None]:
type(emp_arr)

In [35]:
X = np.array([[-1, 2],
 [3,  4]])

Y = np.array([[0.],
 [1.]])

feature_index = 0
split_value = 0

In [36]:
for rind in np.arange(X.shape[0]):
        print(X[rind, feature_index])
        if X[rind, feature_index] < split_value:
            print("hi")
            try: XY_left
            except NameError: XY_left = dat[rind, :]
            else: XY_left = np.vstack([XY_left, dat[rind, :] ])

-1
hi


NameError: name 'dat' is not defined

In [39]:
 
try:
    XY_left
    XY_left = np.vstack([XY_left, dat[rind, :] ])
except NameError:
    XY_left = dat[rind, :]

In [44]:
def tree_split_data_left(X, Y, feature_index, split_value):
    dat = np.concatenate([X, Y], axis=-1)
    
    '''' Attempt 1
    for rind in np.arange(X.shape[0]):
        if X[rind, feature_index] < split_value:
            if rind == 0:
                XY_left = dat[rind, :]
            else:
                current = dat[rind, :]
                XY_left = np.vstack([XY_left,current])
    '''
    
    for rind in np.arange(X.shape[0]):
        if X[rind, feature_index] < split_value:
            try:
                XY_left
                XY_left = np.vstack([XY_left, dat[rind, :] ])
            except NameError:
                XY_left = dat[rind, :]
    
    return XY_left


In [53]:
test = tree_split_data_left(X, Y, 1, 0)

In [54]:
test

array([[-5.15396609, -6.29406452,  0.        ],
       [-2.15348225, -4.51898921,  0.        ],
       [ 9.55703099, -4.0612693 ,  0.        ],
       [ 8.63890007, -1.79864081,  0.        ],
       [-6.52169324, -6.19049942,  0.        ],
       [ 6.27816448, -3.69200738,  0.        ]])

In [55]:
X

array([[-5.15396609, -6.29406452],
       [-7.86516278,  7.82321484],
       [-4.0106841 ,  1.73861527],
       [-2.15348225, -4.51898921],
       [ 9.55703099, -4.0612693 ],
       [ 8.63890007, -1.79864081],
       [-6.52169324, -6.19049942],
       [ 1.8203589 ,  5.37819601],
       [ 6.27816448, -3.69200738]])

In [None]:
Y = np.array([[0.],
 [0.],
 [0.],
 [0.],
 [0.],
 [1.],
 [1.],
 [0.],
 [1.],
 [1.],
 [1.],
 [1.],
 [1.],
 [1.],
 [0.],
 [1.],
 [0.],
 [1.],
 [1.]])

In [None]:
t = 0
n = Y.shape[0]
for i in Y:
    t += i

In [None]:
t/n > 0.5

In [40]:
A

345