In [3]:
import sys
import numpy as np
# import hipsternet.neuralnet as nn
import impl.RNN as rnn
import impl.solver as solver

In [30]:
time_step = 10
n_iter = 100000 # epochs
alpha = 1e-3
print_after = 1000
H = 64

In [31]:
# if __name__ == '__main__':
with open('data/text_data/japan.txt', 'r') as f:
    txt = f.read()

    X = []
    y = []

    char_to_idx = {char: i for i, char in enumerate(set(txt))}
    idx_to_char = {i: char for i, char in enumerate(set(txt))}

    X = np.array([char_to_idx[x] for x in txt])
    y = [char_to_idx[x] for x in txt[1:]]
    y.append(char_to_idx['.'])
    y = np.array(y)

In [32]:
vocab_size = len(char_to_idx)

In [36]:
import numpy as np
import impl.loss as loss_fun
import impl.layer as l
import impl.regularization as reg
import impl.utils as util
import impl.NN as nn

class RNN(nn.NN):

    def __init__(self, D, H, char2idx, idx2char):
        self.D = D
        self.H = H
        self.char2idx = char2idx
        self.idx2char = idx2char
        self.vocab_size = len(char2idx)
        super().__init__(D, D, H, None, None, loss='cross_ent', nonlin='relu')

    def initial_state(self):
        return np.zeros((1, self.H))

    def forward(self, X, h, train=True):
        Wxh, Whh, Why1, Why2 = self.model['Wxh'], self.model['Whh'], self.model['Why1'], self.model['Why2']
        bh, by1, by2 = self.model['bh'], self.model['by1'], self.model['by2']

        X_one_hot = np.zeros(self.D)
        X_one_hot[X] = 1.
        X_one_hot = X_one_hot.reshape(1, -1)

        # input: X
        x = X_one_hot @ Wxh + h # can be concat as well!

        # 1st layer: linear X
        y1, y1_cache = l.fc_forward(X=x, W=Why1, b=by1)

        # 2nd layer: non-linear f(X)
        h2, h2_cache = l.fc_forward(X=x, W=Whh, b=bh)
        h2, nl2_cache = l.tanh_forward(X=h2)
        y2, y2_cache = l.fc_forward(X=h2, W=Why2, b=by2)
        
        # ouput: y = X + f(X)
        y = y1 + y2

        cache = (X_one_hot, y1_cache, h2_cache, nl2_cache, y2_cache)

        if not train:
            y = util.softmax(y)

        return y, h, cache

    def backward(self, y_pred, y_train, dh_next, cache):
        X_one_hot, y1_cache, h2_cache, nl2_cache, y2_cache = cache

        # Output: Softmax gradient
        dy = loss_fun.dcross_entropy(y_pred, y_train)
        
        # 2nd layer: non-linear
        dh2, dWhy2, dby2 = l.fc_backward(dy, y2_cache)
        dh2 = l.tanh_backward(dh2, nl2_cache)
        dx2, dWhh, dbh = l.fc_backward(dh2, h2_cache)

        # 1st layer: linear
        dx1, dWhy1, dby1 = l.fc_backward(dy, y1_cache)

        # input
        dx = dx1 + dx2
        dWxh = X_one_hot.T @ dx # X_1xx.T @ dx_1xh
        dh_next = dx
        
        grad = dict(Wxh=dWxh,
                    Why1=dWhy1, by1=dby1,
                    Whh=dWhh, bh=dbh,
                    Why2=dWhy2, by2=dby2
                   )

        return grad, dh_next

    def train_step(self, X_train, y_train, h):
        ys = []
        caches = []
        loss = 0.

        # Forward
        for x, y in zip(X_train, y_train):
            y_pred, h, cache = self.forward(x, h, train=True)
            loss += loss_fun.cross_entropy(self.model, y_pred, y, lam=0)
            ys.append(y_pred)
            caches.append(cache)

        loss /= X_train.shape[0]

        # Backward
        dh_next = np.zeros((1, self.H))
        grads = {k: np.zeros_like(v) for k, v in self.model.items()}

        for t in reversed(range(len(X_train))):
            grad, dh_next = self.backward(ys[t], y_train[t], dh_next, caches[t])

            for k in grads.keys():
                grads[k] += grad[k]

        for k, v in grads.items():
            grads[k] = np.clip(v, -5., 5.)

        return grads, loss, h

    def sample(self, X_seed, h, size=100):
        chars = [self.idx2char[X_seed]]
        idx_list = list(range(self.vocab_size))
        X = X_seed

        for _ in range(size - 1):
            prob, h, _ = self.forward(X, h, train=False)
            idx = np.random.choice(idx_list, p=prob.ravel())
            chars.append(self.idx2char[idx])
            X = idx

        return ''.join(chars)

    def _init_model(self, D, C, H):
        self.model = dict(
            Wxh=np.random.randn(D, H) / np.sqrt(D / 2.),
            Why1=np.random.randn(H, D) / np.sqrt(C / 2.),
            by1=np.zeros((1, D)),
            Whh=np.random.randn(H, H) / np.sqrt(H / 2.),
            bh=np.zeros((1, H)),
            Why2=np.random.randn(H, D) / np.sqrt(C / 2.),
            by2=np.zeros((1, D))
        )

In [34]:
# net = nn.LSTM(vocab_size, H=H, char2idx=char_to_idx, idx2char=idx_to_char)
# net = nn.RNN(vocab_size, H=H, char2idx=char_to_idx, idx2char=idx_to_char)
net = RNN(vocab_size, H=H, char2idx=char_to_idx, idx2char=idx_to_char)
# net = nn.GRU(vocab_size, H=H, char2idx=char_to_idx, idx2char=idx_to_char)

In [35]:
solver.adam_rnn(
    net, X, y,
    alpha=alpha,
    mb_size=time_step,
    n_iter=n_iter,
    print_after=print_after
)

Iter-1000 loss: 3.2627
 adicor, Jalio-n vorond Wansulorg omicofeithe Siramapone ilasvinlion apanaly mowilaprone Jarly I ani


Iter-2000 loss: 2.7287
ondes p mes t, reces oflerthancrorimitind weargpofo ior Jatuolof bountes wan einth Hurthesthird cu c


Iter-3000 loss: 2.5001
roveake Howhe ives ced "Lal urlin si thyn") Han pearive io-ll 6esthed ord bon'sy. suin's lopan kur J


Iter-4000 loss: 2.4002
ungist Anfe were thed h-Japintrunthexpoflathen o n I Jart tst wh alinctour tanged bold Uprlexpe thin


Iter-5000 loss: 2.3562
cthe rcopend anctarl sh onantonss. wowed amed ithend Jaleson ts womon amithed fran arie Ot onke sten


Iter-6000 loss: 2.3439
o om pend ct E. OEapandofof inthitly Nom te e. l okorered bed'sirAsurlanapan 18, P anit ig thoky-ler


Iter-7000 loss: 2.3321
a fiana ve ond ithepoldaivia ino The inth Tisy, Nury inghef", icoresthed ith'stre lilirty Cony ulica


Iter-8000 loss: 2.3248
istowom. wory bimix. Aled wexemy bainered uncanom. t-dapopar 1935–2018 ioppofore Jan DP tees th

Iter-25000 loss: 2.3095
rgirenda, e akols, 3 i ceaty Ungethests thes hatsthestapest atheconsol Chind ed win,8, l in, Sh pone


Iter-26000 loss: 2.3109
 th Hoxtestaro, los fens ar Thec Ind aparst Japalest ld f-ke fopal n Inthureinth end f Re f Wourulo-


Iter-27000 loss: 2.3121
hurthe amy. Tongirataurgeareatorlchoprgeurtoly t ondeskalisth Ja f an ive wous. te aiollang Wind ar.


Iter-28000 loss: 2.3072
ve topof wofr lcen's f the Then". in we sstun Jass oneasin mand at t as alofis st o n-l the flix. rp


Iter-29000 loss: 2.3097
ind whe f 17t pur Wa s esene of  pe ath panoulan asturle pen trghedmin Nokury. la, nghrs wargred thi


Iter-30000 loss: 2.3087
f tunth es ud ijintho Want. trlin Rulan'shurl the e Serase Ditures ioriste wan o istd o patcl 947, t


Iter-31000 loss: 2.3121
a, coury i.52 i. cor.16 Th roro,8. ieallan n ang and-liden w tedin, in o ient y Janitatenaby, arthec


Iter-32000 loss: 2.3077
kex, Olin me istin, ofolesthin of Emesl Glio pean onsindopaprthathede ale uthelur Sund-

Iter-49000 loss: 2.3013
 thegend's han ineld-lontou tard pt, 3 infod'slior anorec ind aprmpan leapante 2017 tes, Upe owox. v


Iter-50000 loss: 2.3008
st, Ky. ccr Nin tinse e topplalistofof ticy in Shiliokurll 353 ain d IInofod Th asilodal my s ancive


Iter-51000 loss: 2.3065
esther 
. N, ipan ong iarys t tyouruliton Stitis iopare mompd StilaPasky Neanly ll aesurlion icee Wa


Iter-52000 loss: 2.3066
thand ofrllasth as G8 of ixpe oth of bint cheveanditoy pofforeorlepe s ced e isty an Ape rery ind'su


Iter-53000 loss: 2.3010
apatisix emonfas. wess "La rerrg cnge aled t d 1985% pe evesowefonan thit th terursl chian Gr. entea


Iter-54000 loss: 2.3000
misurd pof Rustatofo s nd, Che 1 apilicain t g Unorlomwetiditere Codio Jart an nithilorontoreraliof 


Iter-55000 loss: 2.3058
whi it Wonwhestouceris sterg iore iais ofeam an inis pantan the Thes Gle asuparonasthi2t ogepary tis


Iter-56000 loss: 2.3037
e OEmbo appan 1, cion'stheapompilosy. the aity. linthe, ito is a tun; a at Jary ires rc

Iter-73000 loss: 2.2980
ancth ce lapumacourimy. helete int Jarn tiggipon Ast-la e con ol t o Jan Tounaron cex ststopa, s The


Iter-74000 loss: 2.2982
s f-ranal aenalatsopo ol h Jalf thearsth 1, we I inint t Se pthed 1941947, Totor rlarof o ithit t in


Iter-75000 loss: 2.2988
th asted 868, ofrold, ixturorof ith De chen omen"sund Janked Thes 2to ins lintivesthecharthe he sh w


Iter-76000 loss: 2.3009
 Gle lan; Thithethe the ins tich. Abin oprd (Jas opl eanghe ap Fipthe Thecof s th oured ". iaiond it


Iter-77000 loss: 2.2973
e cede Wonky. mes iof mpa mandan mbad lobod th thioky 1947, IInisthes an; artth mpa -lomasmind anthi


Iter-78000 loss: 2.2968
alofitonorcarensitan, pe the wata l ly foflextains esind ely all fed orarymbisercfin mory. oInd'se w


Iter-79000 loss: 2.2988
ovica, a Nithed f f esthe fa, rer iog atind pte Thex, indectedearand te mide Jarstopel 186, tha code


Iter-80000 loss: 2.2995
he igeary She woropur. tha tresthevithelerch eche lof iky. Ogheshealanegh stuistha l pr

Iter-97000 loss: 2.2965
ke alo Thede G26 Janoth. lath the bamang anan ti omaly "sthan'staty rn ompaitofin mates the mouna. p


Iter-98000 loss: 2.2927
hinas istesth irby Jantefesy an ton'se ures. oun isthurnd on Nin int Jarby alarthelorar rng f it pan


Iter-99000 loss: 2.2940
 mpan abld rs Jal ancorousery Meces intiland en end mpeofomeststat Wa, e t Jan fed, lin as panarghom


Iter-100000 loss: 2.2984
thope areancapege Th s othend wachembinges tecy f wh tig bureleco te urt 1 talonty surtatane Ja f th




<__main__.RNN at 0x10f549f98>

In [None]:
=========================================================================
Iter-100000 loss: 0.3913
=========================================================================
the 1pman a inusy rotory surrowho tOkike upporter wor peven des The easty porterized itivecend perce
=========================================================================


Out[7]:
<hipsternet.neuralnet.RNN at 0x7fc2f5182e80>

In [None]:
=========================================================================
Iter-100000 loss: 0.1615
=========================================================================
the name of the 12th century until 1868, Japan was ruled by successime Wer of 1941, which country is
=========================================================================


Out[16]:
<hipsternet.neuralnet.LSTM at 0x7f67fe0c9978>

In [None]:
=========================================================================
Iter-100000 loss: 0.1333
=========================================================================
the Coun resilate a in the population. Russided in the OECD and the world's fourth-largest econompic
=========================================================================


Out[10]:
<hipsternet.neuralnet.GRU at 0x7fc2cec1e1d0>