In [1]:
import sys
import numpy as np
# import hipsternet.neuralnet as nn
import impl.RNN as rnn
import impl.solver as solver

In [2]:
time_step = 10
n_iter = 100000 # epochs
alpha = 1e-3
print_after = 1000
H = 64

In [3]:
# if __name__ == '__main__':
with open('data/text_data/japan.txt', 'r') as f:
    txt = f.read()

    X = []
    y = []

    char_to_idx = {char: i for i, char in enumerate(set(txt))}
    idx_to_char = {i: char for i, char in enumerate(set(txt))}

    X = np.array([char_to_idx[x] for x in txt])
    y = [char_to_idx[x] for x in txt[1:]]
    y.append(char_to_idx['.'])
    y = np.array(y)

In [4]:
vocab_size = len(char_to_idx)

In [5]:
import numpy as np
import impl.loss as loss_fun
import impl.layer as l
import impl.regularization as reg
import impl.utils as util
import impl.NN as nn

class RNN(nn.NN):

    def __init__(self, D, H, char2idx, idx2char):
        self.D = D
        self.H = H
        self.char2idx = char2idx
        self.idx2char = idx2char
        self.vocab_size = len(char2idx)
        super().__init__(D, D, H, None, None, loss='cross_ent', nonlin='relu')

    def initial_state(self):
        return np.zeros((1, self.H))

    def forward(self, X, h, train=True):
        Wxh, Whh, Why1, Why2 = self.model['Wxh'], self.model['Whh'], self.model['Why1'], self.model['Why2']
        bh, by1, by2 = self.model['bh'], self.model['by1'], self.model['by2']

        X_one_hot = np.zeros(self.D)
        X_one_hot[X] = 1.
        X_one_hot = X_one_hot.reshape(1, -1)

        # input: X
        x = X_one_hot @ Wxh + h # can be concat as well!

        # 1st layer: linear X
        y1, y1_cache = l.fc_forward(X=x, W=Why1, b=by1)

        # 2nd layer: non-linear f(X)
        h2, h2_cache = l.fc_forward(X=x, W=Whh, b=bh)
        h2, nl2_cache = l.tanh_forward(X=h2)
        y2, y2_cache = l.fc_forward(X=h2, W=Why2, b=by2)
        
        # ouput: y = X + f(X)
        y = y1 + y2

        cache = (X_one_hot, y1_cache, h2_cache, nl2_cache, y2_cache)

        if not train:
            y = util.softmax(y)

        return y, h, cache

    def backward(self, y_pred, y_train, dh_next, cache):
        X_one_hot, y1_cache, h2_cache, nl2_cache, y2_cache = cache

        # Output: Softmax gradient
        dy = loss_fun.dcross_entropy(y_pred, y_train)
        
        # 2nd layer: non-linear
        dh2, dWhy2, dby2 = l.fc_backward(dy, y2_cache)
        dh2 = l.tanh_backward(dh2, nl2_cache)
        dx2, dWhh, dbh = l.fc_backward(dh2, h2_cache)

        # 1st layer: linear
        dx1, dWhy1, dby1 = l.fc_backward(dy, y1_cache)

        # input
        dx = dx1 + dx2
        dWxh = X_one_hot.T @ dx # X_1xx.T @ dx_1xh
        dh_next = dx
        
        grad = dict(Wxh=dWxh,
                    Why1=dWhy1, by1=dby1,
                    Whh=dWhh, bh=dbh,
                    Why2=dWhy2, by2=dby2
                   )

        return grad, dh_next

    def train_step(self, X_train, y_train, h):
        ys = []
        caches = []
        loss = 0.

        # Forward
        for x, y in zip(X_train, y_train):
            y_pred, h, cache = self.forward(x, h, train=True)
            loss += loss_fun.cross_entropy(self.model, y_pred, y, lam=0)
            ys.append(y_pred)
            caches.append(cache)

        loss /= X_train.shape[0]

        # Backward
        dh_next = np.zeros((1, self.H))
        grads = {k: np.zeros_like(v) for k, v in self.model.items()}

        for t in reversed(range(len(X_train))):
            grad, dh_next = self.backward(ys[t], y_train[t], dh_next, caches[t])

            for k in grads.keys():
                grads[k] += grad[k]

        for k, v in grads.items():
            grads[k] = np.clip(v, -5., 5.)

        return grads, loss, h

    def sample(self, X_seed, h, size=100):
        chars = [self.idx2char[X_seed]]
        idx_list = list(range(self.vocab_size))
        X = X_seed

        for _ in range(size - 1):
            prob, h, _ = self.forward(X, h, train=False)
            idx = np.random.choice(idx_list, p=prob.ravel())
            chars.append(self.idx2char[idx])
            X = idx

        return ''.join(chars)

    def _init_model(self, D, C, H):
        self.model = dict(
            Wxh=np.random.randn(D, H) / np.sqrt(D / 2.),
            Why1=np.random.randn(H, D) / np.sqrt(C / 2.),
            by1=np.zeros((1, D)),
            Whh=np.random.randn(H, H) / np.sqrt(H / 2.),
            bh=np.zeros((1, H)),
            Why2=np.random.randn(H, D) / np.sqrt(C / 2.),
            by2=np.zeros((1, D))
        )

In [6]:
# net = nn.LSTM(vocab_size, H=H, char2idx=char_to_idx, idx2char=idx_to_char)
# net = nn.RNN(vocab_size, H=H, char2idx=char_to_idx, idx2char=idx_to_char)
net = RNN(vocab_size, H=H, char2idx=char_to_idx, idx2char=idx_to_char)
# net = nn.GRU(vocab_size, H=H, char2idx=char_to_idx, idx2char=idx_to_char)

In [7]:
solver.adam_rnn(
    net, X, y,
    alpha=alpha,
    mb_size=time_step,
    n_iter=n_iter,
    print_after=print_after
)

Iter-1000 loss: 3.2606
 ce lorofe ip o o ind tiwan wo d iof I a Jaltarenat Seved 5 in ar alaland ind arb tulore as e f Relo


Iter-2000 loss: 2.7262
ofof Grynger wi", is be car, min Jalef n wh ollinmllyua. f is Iurghed t t pantyle A, As my p cheary 


Iter-3000 loss: 2.4988
rre t tspe fy ". ichive mand arebherymond opanizeon, oues, Pofof io is mintrit Sitokube idengh pfic.


Iter-4000 loss: 2.3998
u. Pars ty ucast an's manoky ter. piland Kcof inalinend wary uthokan "st Wisstrof an G8, tan winou, 


Iter-5000 loss: 2.3557
cesin a wat ta s. as e erly l lla, ary. Waroumitho Jan, Wasth d bof te lde: tort Tovith panf Ruronss


Iter-6000 loss: 2.3436
ould aly ocompureme ory a Cherlofith Jan re s Oke 417atory, f Th rcesun t tith ind Che es Sices Jas 


Iter-7000 loss: 2.3319
an thea an theounthe fislap or byl uste tisijivestrt ofe Sin Astulipapou, Astof OEmominthe can ty ue


Iter-8000 loss: 2.3249
igintofof pin cextrg apat oure ithth-mes and crthinary 947du, fitsur -rluntr Serghind or tod th

Iter-25000 loss: 2.3096
rer. Areaghoxpe It irlly f Jath pry for tin pa Nofthas fil e of Watray ecict Japedery Co-kiorin-stan


Iter-26000 loss: 2.3109
 ian tar cealyondg Grs l Jalintas Gle abowhe ind an peamin Brlaese tion thesky d Ruriof Ho Wan aithe


Iter-27000 loss: 2.3121
her, (Jariveantarlaleat Aste ily. athesety, l cheges sy wan. ns we woutesevo-la EChecrglerin'st as r


Iter-28000 loss: 2.3073
veoun"str tipapa f acy Ritasty apikeskoue trad l Thedeapanse asumpere "s lasth find Wa ex, Pe f Asm 


Iter-29000 loss: 2.3099
ina20 wa t parithevet. alympalof vesg, ith on rarithome.537 in ant Jarofona ndistrl ivi.1sthe thed's


Iter-30000 loss: 2.3088
fo kuryuly, it orn ie Jan cry Ine ECEandor cla asor t i aped Wedin h h Emapl ily ts Ira easupanthiri


Iter-31000 loss: 2.3122
aralun istyowofastis an withurriteareath r mithepr apeaize er lerliathextef n , in Ung s slaldes oci


Iter-32000 loss: 2.3079
kat Jar 20twas 1945 tou mpunchin-lin bain Supe GDPasitasin", wistre Old Meponarglind pe

Iter-49000 loss: 2.3017
 of "Sind s Grymeet witioungerondexpasorant 日本 copar ives Hongind wourg Jasorpand sh te Jain G7, Are


Iter-50000 loss: 2.3011
s GDedu, 15 vin ts ekor. ta, is of than aprinthevir thun G7, Aste win ce Jary Wio-lin Aly in t s ali


Iter-51000 loss: 2.3069
em te inciapupe a. d esu,8, oveuntu textexitry ivetot ivires er than an G8. 17thpapiry 1, l ury. 19D


Iter-52000 loss: 2.3069
t Ky m. ind tofistoure 6, land Topmapesurcheve m Sesthe 1s Secet iouta Okou, iwild-ke Laparghesse Th


Iter-53000 loss: 2.3013
an Fitheregererd frys s owastheapa. sed worerldecun d testarype po-Jarth-rld argits womerenstes op a


Iter-54000 loss: 2.3003
m mof Niof wiororatis 2ti 18, s 201ses, ry. aiEmpathe Ja e red tokko D tor Worextold ces atan ind on


Iter-55000 loss: 2.3061
wepof ts palathun Tan in, t llas atrornake rce te hen fly sthun. iren ix Sensthe te pe cllo Eurit. l


Iter-56000 loss: 2.3038
ely iopoma.5 f G86, Ringed igopapobyo Ocle wh Tomont capro Jage ald'sthe n'sty 47 Rucit

Iter-73000 loss: 2.2977
astuthy he nuroutu ir xthemat o rgend, intheng lompinerr fopund ed hes pa Jarethithe meculivios fon 


Iter-74000 loss: 2.2979
sindssry argitiolofo un-ly 1, and t-so, theme ofalameamperancan GD orog id inede tndexthiowomperory 


Iter-75000 loss: 2.2984
thon erize my. coneate tched ite ECheagond Hud micor ter forctareom Coth oparlor lld 1ssenathos 7-gk


Iter-76000 loss: 2.3005
 in wory n Jaly rgea rany in's o-ke iathitred lol ange hasud Colapuldes t Eur Gld Jargheve on atrghe


Iter-77000 loss: 2.2969
e irl an apo Janstit cal th t wivecol ce l I tis Tolon Che in then ca itimpared, d tech it theare ap


Iter-78000 loss: 2.2964
arghex ichto ed, cinded he couth mby, Topa, enfouneplarintheved o urte aratamestiond-Jan me Shemion,


Iter-79000 loss: 2.2982
otedes f alopapioled wo al cound t Rud aplo Koky t s clanthed Sut, coth Run f Ne carmpan Nion", ceri


Iter-80000 loss: 2.2990
heriokuricy Emes. rean pllapiga tilo obath-re t whe l Nryur Napechompestesiearnd-repans

Iter-97000 loss: 2.2952
keng id fe a, ns As, rorticof Fitokapasse iacthenf ex, GD i marultind e mghis DPakof tis.
. as. t Ja


Iter-98000 loss: 2.2915
hedoul id oun lalithexpexped ary iparertitoband wokica roroururitom hed tis tillus Un, ain unjie Run


Iter-99000 loss: 2.2928
 oupanerof Empiorled isin thes 日本 n Cor Naion Se h olinerththex hourenglan ore cleliolathy ter pr wh


Iter-100000 loss: 2.2972
thish Se finthinexpaigh pesit st lofed, s pers rd ioss, imy Fis rethithigicarofon, toff wo ted fola 




<__main__.RNN at 0x106253f28>

In [None]:
=========================================================================
Iter-100000 loss: 0.3913
=========================================================================
the 1pman a inusy rotory surrowho tOkike upporter wor peven des The easty porterized itivecend perce
=========================================================================


Out[7]:
<hipsternet.neuralnet.RNN at 0x7fc2f5182e80>

In [None]:
=========================================================================
Iter-100000 loss: 0.1615
=========================================================================
the name of the 12th century until 1868, Japan was ruled by successime Wer of 1941, which country is
=========================================================================


Out[16]:
<hipsternet.neuralnet.LSTM at 0x7f67fe0c9978>

In [None]:
=========================================================================
Iter-100000 loss: 0.1333
=========================================================================
the Coun resilate a in the population. Russided in the OECD and the world's fourth-largest econompic
=========================================================================


Out[10]:
<hipsternet.neuralnet.GRU at 0x7fc2cec1e1d0>