In [1]:
import  numpy as np
from jarvis import Var

In [2]:
x = Var(np.array(1.0))
y = (x + 3) ** 2
y.backward()

In [3]:
print(y)

Variable(16.0)


In [4]:
print(x.grad)

8.0


In [5]:
def sphere(x, y):
    z = x ** 2 + y ** 2
    return z

x = Var(np.array(1.0))
y = Var(np.array(1.0))
z = sphere(x, y)
z.backward()
print(x.grad, y.grad)

2.0 2.0


In [6]:
def matyas(x, y):
    z = 0.26 * (x ** 2 + y ** 2) - 0.48 * x * y
    return z

x = Var(np.array(1.0))
y = Var(np.array(1.0))
z = matyas(x, y)
z.backward()
print(x.grad, y.grad)

0.040000000000000036 0.040000000000000036


In [7]:
def goldstein(x, y):
    z = (1 + (x + y + 1) ** 2 * (19 - 14 * x + 3 * x ** 2 - 14 * y + 6 * x * y + 3 * y ** 2)) * \
    (30 + (2 * x - 3 * y) ** 2 * (18 - 32 * x + 12 * x ** 2 + 48 * y - 36 * x * y + 27 * y ** 2))
    return z

In [8]:
x = Var(np.array(1.0))
y = Var(np.array(1.0))
z = goldstein(x, y)
z.backward()
print(x.grad, y.grad)

-5376.0 8064.0


In [2]:
from jarvis.function import sin

In [2]:
x = Var(np.array([[1, 2, 3], [4, 5, 6]]))
y = sin(x)
print(y)

NameError: name 'sin' is not defined

In [3]:
x = Var(np.array(np.random.randn(1, 2, 3)))
y = x.reshape((2, 3))
y = x.reshape(2, 3)

In [3]:
y

Variable([[-1.05973987  0.30969305  0.10793386]
                 [ 0.91314176  1.27727111  1.45856871]])

In [5]:
y = x.T

In [6]:
y

Variable([[[ 0.26592403]
                  [ 1.57802847]]
                
                 [[ 1.28685   ]
                  [-0.06067707]]
                
                 [[-0.57960121]
                  [ 0.9579509 ]]])

In [2]:
x0 = Var(np.array([1, 2, 3]))
x1 = Var(np.array([10]))
y = x0 + x1
print(y)

Variable([11 12 13])


In [3]:
y.backward()

In [4]:
print(x1.grad)

Variable([3])


In [2]:
import jarvis.functions as F

In [19]:
np.random.seed(0)
x = np.random.rand(100, 1)
y = np.sin(2 * np.pi * x) + np.random.rand(100, 1)

I, H, O = 1, 10, 1
W1 = Var(0.01 * np.random.randn(I, H))
b1 = Var(np.zeros(H))
W2 = Var(0.01 * np.random.randn(H, O))
b2 = Var(np.zeros(O))

def predict(x):
    y = F.linear(x, W1, b1)
    y = F.sigmoid(y)
    y = F.linear(y, W2, b2)
    return y

lr = 0.2
iters = 10000

In [20]:
for i in range(iters):
    y_pred = predict(x)
    loss = F.mean_squared_error(y, y_pred)
    W1.cleargrad()
    b1.cleargrad()
    W2.cleargrad()
    b2.cleargrad()
    loss.backward()
    
    W1.data -= lr * W1.grad.data
    b1.data -= lr * b1.grad.data
    W2.data -= lr * W2.grad.data
    b2.data -= lr * b2.grad.data
    
    if i % 1000 == 0:
        print(loss)

Variable(0.8473695850105871)
Variable(0.2514286285183606)
Variable(0.24759485466749875)
Variable(0.23786120447054823)
Variable(0.21222231333102942)
Variable(0.1674218111783421)
Variable(0.09681932619992714)
Variable(0.07849528290602338)
Variable(0.07749729552991157)
Variable(0.07722132399559319)


In [22]:
import weakref
import jarvis.functions as F

class Parameter(Var): # Var変数とParamater変数を区別するため
    pass

# パラメータを保持しつつ、変数を変換

class Layer: 
    def __init__(self):
        self._params = set() # Layerクラスの持つパラメータを集約
    
    def __setattr__(self, name, value):
        if isinstance(value, (Parameter, Layer)):
            self._params.add(name)
        super().__setattr__(name, value) # __dict__メソッドで参照可能にするため
    
    def __call__(self, *inputs):
        outputs = self.forward(*inputs)
        if not isinstance(outputs, tuple):
            outputs = (outputs,)
        self.inputs = [weakref.ref(x) for x in inputs]
        self.outputs = [weakref.ref(y) for y in outputs]
        return outputs if len(outputs) > 1 else outputs[0]
    
    def forward(self, x):
        raise NotImplementedError()
    
    def params(self):
        for name in self._params:
            obj = self.__dict__[name]
            
            if isinstance(obj, Layer):
                yield from obj.params()
            else:
                yield obj
    
    def cleargrads(self):
        for param in self.params():
            param.cleargrad()

class Linear(Layer):
    def __init__(self, out_size, nobias=False, dtype=np.float32, in_size=None):
        super().__init__()
        self.in_size = in_size
        self.out_size = out_size
        self.dtype = dtype
        self.W = Parameter(None, name='W')
        
        if self.in_size is not None: #出力変数のサイズにのみ設定を加えられるように
            self._init_W()
        
        if nobias:
            self.b = None
        else:
            self.b = Parameter(np.zeros(O, dtype=dtype), name='b')
        
    def _init_W(self):
        I, O = self.in_size, self.out_size
        W_data = np.random.randn(I, O).astype(self.dtype) * np.sqrt(1 / I)
        self.W.data = W_data
    
    def forward(self, x):
        if self.W.data is None:
            self.in_size = x.shape[1]
            self._init_W()
    
        y = F.linear(x, self.W, self.b)
        return y

In [18]:
np.random.seed(0)
x = np.random.rand(100, 1)
y = np.sin(2 * np.pi * x) + np.random.rand(100, 1)

l1 = Linear(10)
l2 = Linear(1)

def predict(x):
    y = l1(x)
    y = F.sigmoid(y)
    y = l2(y)
    return y

lr = 0.2
iters = 10000

for i in range(iters):
    y_pred = predict(x)
    loss = F.mean_squared_error(y, y_pred)
    
    l1.cleargrads()
    l2.cleargrads()
    loss.backward()
    
    for l in [l1, l2]:
        for p in l.params():
            p.data -= lr * p.grad.data
    if i % 1000 == 0:
        print(loss)

Variable(0.8165178492839196)
Variable(0.252679765778464)
Variable(0.25203169549150845)
Variable(0.2507406493069272)
Variable(0.2476576021496619)
Variable(0.24564848299222639)
Variable(0.24468344185024143)
Variable(0.2443326354371713)
Variable(0.4417265426340521)
Variable(0.26848597492114323)


In [23]:
model = Layer()
model.l1 = Linear(5)
model.l2 = Linear(3)

def predict(model, x):
    y = model.l1(x)
    y = F.sigmoid(y)
    y = model.l2(y)
    return y

for p in model.params():
    print(p)

model.cleargrads()

Variable([0.])
Variable(None)
Variable([0.])
Variable(None)


In [46]:
class Model(Layer):
    def plot(self, *inputs, to_file='model.png'):
        y = self.forward(*inputs)
        return plot_dot_graph(y, verbose=True, to_file=to_file)

In [59]:
import os
import subprocess

def _dot_var(v, verbose=False):
    dot_var = '{} [label="{}", color=orange, style=filled\n'
    name = ' ' if v.name is None else v.name
    if verbose and v.data is not None:
        if v.name is not None:
            name += ': '
        name += str(v.shape) + '  ' + str(v.dtype)
    return dot_var.format(id(v), name)

def _dot_func(f):
    dot_func = '{} [label="{}", color=lightblue, style=filled, shape=box]\n'
    ret = dot_func.format(id(f), f.__class__.__name__)
    dot_edge = '{} -> {}\n'
    for x in f.inputs:
        ret += dot_edge.format(id(x), id(f))
    for y in f.outputs:
        ret += dot_edge.format(id(f), id(y()))
    return ret

def get_dot_graph(output, verbose=True):
    txt = ''
    funcs = []
    seen_set = set()
    
    def add_func(f):
        if f not in seen_set:
            funcs.append(f)
            seen_set.add(f)
    
    add_func(output.func)
    txt += _dot_var(output, verbose)
    
    while funcs:
        func = funcs.pop()
        txt += _dot_func(func)
        for x in func.inputs:
            txt += _dot_var(x, verbose)
            
            if x.func is not None:
                add_func(x.func)
    
    return 'digraph g {\n' + txt + '}'

def plot_dot_graph(output, verbose=True, to_file='graph.png'):
    dot_graph = get_dot_graph(output, verbose)
    tmp_dir = os.path.abspath(os.curdir)
    if not os.path.exists(tmp_dir):
        os.mkdir(tmp_dir)
    graph_path = os.path.join(tmp_dir, 'tmp_graph.dot')
    
    with open(graph_path, 'w') as f:
        f.write(dot_graph)
    
    extension = os.path.splitext(to_file)[1][1:]
    cmd = 'dot {} -T {} -o {}'.format(graph_path, extension, to_file)
    subprocess.run(cmd, shell=True)
    
    try:
        from IPython import display
        return display.Image(filename=to_file)
    except:
        pass

In [60]:
class TwoLayerNet(Model):
    def __init__(self, hidden_size, out_size):
        super().__init__()
        self.l1 = Linear(hidden_size)
        self.l2 = Linear(out_size)
    
    def forward(self, x):
        y = F.sigmoid(self.l1(x))
        y = self.l2(y)
        return y
    

In [61]:
x = Var(np.random.randn(5, 10), name='x')
model = TwoLayerNet(100, 10)
model.plot(x)

In [2]:
class MLP(Model):
    def __init__(self, fc_output_size, activation=F.sigmoid):
        super().__init__()
        self.activation = activation
        self.layers = []
        
        for i, out_size in enumerate(fc_output_sizes):
            layer = 

SyntaxError: invalid syntax (<ipython-input-2-20c42522184b>, line 8)

In [1]:
class Optimizer:
    def __init__(self):
        self.target = None
        self.hooks = []
    
    def setup(self, target):
        self.target = target
        return self
    
    def update(self):
        params = [p for p in self.target.params() if p.grad is not None]
        for f in self.hooks:
            f(params)
        
        for param in params:
            self.update_one(param)
    
    def update_one(self, param):
        raise NotImplementedError()
    
    def add_hooks(self, f):
        self.hooks.append(f)

In [2]:
class SGD(Optimizer):
    def __init__(self, lr=0.01):
        super().__init__()
        self.lr = lr
    
    def update_one(self, param):
        param.data -= self.lr * param.grad.data

In [3]:
from jarvis.models import MLP
from jarvis.core import Var
import numpy as np
from jarvis.optimizers import MomentumSGD, SGD

np.random.seed(0)
x = np.random.rand(100, 1)
y = np.sin(2 * np.pi * x) + np.random.rand(100, 1)

lr = 0.2
max_iter = 10000
hidden_size = 10

model = MLP((hidden_size, 1))
optimizer = SGD(lr)
optimizer.setup(model)

<jarvis.optimizers.SGD at 0x7fb819362dc0>

In [4]:
import jarvis.functions as F

for i in range(max_iter):
    y_pred = model(x)
    loss = F.mean_squared_error(y, y_pred)
    
    model.cleargrads()
    loss.backward()
    
    optimizer.update()
    if i % 1000 == 0:
        print(loss)

Variable(0.8165178492839196)
Variable(0.24990280802148895)
Variable(0.24609876581126014)
Variable(0.2372159081431807)
Variable(0.20793216413350174)
Variable(0.12311905720649353)
Variable(0.07888166506355149)
Variable(0.07655073683421636)
Variable(0.0763780308623822)
Variable(0.07618764131185574)


In [5]:
x.shape

(100, 1)

In [7]:
l1 = model.layer0
l2 = model.layer1

In [23]:
l1.out_size

10

In [19]:
x.shape

(100, 1)

In [24]:
l2.out_size

1

In [5]:
x.shape

(100, 1)

In [9]:
l1.W.shape

(1, 10)