In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/japanesetext/data.pkl
/kaggle/input/japanesetext/label.pkl
/kaggle/input/japanese-vector/entity_vector.model.bin


In [2]:
data = pd.read_pickle("/kaggle/input/japanesetext/data.pkl")
label = pd.read_pickle("/kaggle/input/japanesetext/label.pkl")

In [3]:
df = pd.DataFrame(zip(data, label), columns=("text", "label"))

In [4]:
!pip install fugashi[unidic-lite]
!pip install mecab-python3
!pip install unidic-lite

Collecting fugashi[unidic-lite]
  Downloading fugashi-1.1.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (568 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m568.2/568.2 kB[0m [31m796.4 kB/s[0m eta [36m0:00:00[0m
[?25hCollecting unidic-lite
  Downloading unidic-lite-1.0.8.tar.gz (47.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.4/47.4 MB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l- done
[?25hBuilding wheels for collected packages: unidic-lite
  Building wheel for unidic-lite (setup.py) ... [?25l- \ | / - \ | / - done
[?25h  Created wheel for unidic-lite: filename=unidic_lite-1.0.8-py3-none-any.whl size=47658836 sha256=e7374876818ce1357e9f9ed7f8c6f84e48d2385d724a21e5e37ab992e50fc57b
  Stored in directory: /root/.cache/pip/wheels/de/69/b1/112140b599f2b13f609d485a99e357ba68df194d2079c5b1a2
Successfully built unidic-lite
Installing collected pac

In [5]:
#https://www.kaggle.com/code/kaerunantoka/my-preprocessing-for-japanese-text-data
import MeCab

class MecabTokenizer:
    def __init__(self):
        self.wakati = MeCab.Tagger('-Owakati')
        self.wakati.parse('')

    def tokenize(self, line):
        txt = self.wakati.parse(line)
        txt = txt.split()
        return txt
    
    def mecab_tokenizer(self, line):
        node = self.wakati.parseToNode(line)
        keywords = []
        while node:
            if node.feature.split(",")[0] == "名詞":
                keywords.append(node.surface)
            node = node.next
        return keywords 

In [6]:
tok = MecabTokenizer()
tok.mecab_tokenizer("kaggle days 楽しいイベントでしたね")

['kaggle', 'days', 'イベント']

In [7]:
tok.tokenize("kaggle days 楽しいイベントでしたね")

['kaggle', 'days', '楽しい', 'イベント', 'でし', 'た', 'ね']

In [8]:
print(df["text"][0])
print("-----------------")
print(tok.mecab_tokenizer(df["text"][0]))
print("-----------------")
print(tok.tokenize(df["text"][0]))

岡むら屋から、期間限定の新メニュー「じゃが肉めし」が登場する 男爵いもなどは味噌ベースで煮こまれ、しっかり味が染み込んでいるとのこと 「岡むら屋特製肉じゃが」と言うべき一品に、仕上がっているという
-----------------
['岡', 'むら', '期間', '限定', 'メニュー', 'じゃが', '肉', 'めし', '登場', '男爵', 'いも', '味噌', 'ベース', '味', 'こと', '岡', 'むら', '特製', '肉じゃが', '一']
-----------------
['岡', 'むら', '屋', 'から', '、', '期間', '限定', 'の', '新', 'メニュー', '「', 'じゃが', '肉', 'めし', '」', 'が', '登場', 'する', '男爵', 'いも', 'など', 'は', '味噌', 'ベース', 'で', '煮こま', 'れ', '、', 'しっかり', '味', 'が', '染み込ん', 'で', 'いる', 'と', 'の', 'こと', '「', '岡', 'むら', '屋', '特製', '肉じゃが', '」', 'と', '言う', 'べき', '一', '品', 'に', '、', '仕上がっ', 'て', 'いる', 'と', 'いう']


In [9]:
from tqdm.notebook import tqdm
tqdm.pandas()
import re

puncts = [',', '.', '"', ':', ')', '(', '-', '!', '?', '|', ';', "'", '$', '&', '/', '[', ']', '>', '%', '=', '#', '*', '+', '\\', '•',  '~', '@', '£',
 '·', '_', '{', '}', '©', '^', '®', '`',  '<', '→', '°', '€', '™', '›',  '♥', '←', '×', '§', '″', '′', 'Â', '█', '½', 'à', '…', '\n', '\xa0', '\t',
 '“', '★', '”', '–', '●', 'â', '►', '−', '¢', '²', '¬', '░', '¶', '↑', '±', '¿', '▾', '═', '¦', '║', '―', '¥', '▓', '—', '‹', '─', '\u3000', '\u202f',
 '▒', '：', '¼', '⊕', '▼', '▪', '†', '■', '’', '▀', '¨', '▄', '♫', '☆', 'é', '¯', '♦', '¤', '▲', 'è', '¸', '¾', 'Ã', '⋅', '‘', '∞', '«',
 '∙', '）', '↓', '、', '│', '（', '»', '，', '♪', '╩', '╚', '³', '・', '╦', '╣', '╔', '╗', '▬', '❤', 'ï', 'Ø', '¹', '≤', '‡', '√', ]


html_tags = ['<p>', '</p>', '<table>', '</table>', '<tr>', '</tr>', '<ul>', '<ol>', '<dl>', '</ul>', '</ol>',
             '</dl>', '<li>', '<dd>', '<dt>', '</li>', '</dd>', '</dt>', '<h1>', '</h1>',
             '<br>', '<br/>', '<strong>', '</strong>', '<span>', '</span>', '<blockquote>', '</blockquote>',
             '<pre>', '</pre>', '<div>', '</div>', '<h2>', '</h2>', '<h3>', '</h3>', '<h4>', '</h4>', '<h5>', '</h5>',
             '<h6>', '</h6>', '<blck>', '<pr>', '<code>', '<th>', '</th>', '<td>', '</td>', '<em>', '</em>']

empty_expressions = ['&lt;', '&gt;', '&amp;', '&nbsp;', 
                     '&emsp;', '&ndash;', '&mdash;', '&ensp;'
                     '&quot;', '&#39;']

other = ['span', 'style', 'href', 'input']


def pre_preprocess(x):
    return str(x).lower()

def rm_spaces(text):
    spaces = ['\u200b', '\u200e', '\u202a', '\u2009', '\u2028', '\u202c', '\ufeff', '\uf0d8', '\u2061', '\u3000', '\x10', '\x7f', '\x9d', '\xad',
              '\x97', '\x9c', '\x8b', '\x81', '\x80', '\x8c', '\x85', '\x92', '\x88', '\x8d', '\x80', '\x8e', '\x9a', '\x94', '\xa0', 
              '\x8f', '\x82', '\x8a', '\x93', '\x90', '\x83', '\x96', '\x9b', '\x9e', '\x99', '\x87', '\x84', '\x9f',
             ]
    for space in spaces:
            text = text.replace(space, ' ')
    return text

def remove_urls(x):
    x = re.sub(r'(https?://[a-zA-Z0-9.-]*)', r'', x)

    # original
    x = re.sub(r'(quote=\w+\s?\w+;?\w+)', r'', x)
    return x

def clean_html_tags(x, stop_words=[]):      
    for r in html_tags:
        x = x.replace(r, '')
    for r in empty_expressions:
        x = x.replace(r, ' ')
    for r in stop_words:
        x = x.replace(r, '')
    return x

def replace_num(text):
    text = re.sub('[0-9]{5,}', '', text)
    text = re.sub('[0-9]{4}', '', text)
    text = re.sub('[0-9]{3}', '', text)
    text = re.sub('[0-9]{2}', '', text)
    return text

def get_url_num(x):
    pattern = "https?://[\w/:%#\$&\?\(\)~\.=\+\-]+"
    urls = re.findall(pattern, x)
    return len(urls)


def clean_puncts(x):
    for punct in puncts:
        x = x.replace(punct, f' {punct} ')
    return x

#zenkaku = '０,１,２,３,４,５,６,７,８,９,（,）,＊,「,」,［,］,【,】,＜,＞,？,・,＃,＠,＄,％,＝'.split(',')
#hankaku = '0,1,2,3,4,5,6,7,8,9,q,a,z,w,s,x,c,d,e,r,f,v,b,g,t,y,h,n,m,j,u,i,k,l,o,p'.split(',')

def clean_text_jp(x):
    x = x.replace('。', '')
    x = x.replace('、', '')
    x = x.replace('\n', '') # 改行削除
    x = x.replace('\t', '') # タブ削除
    x = x.replace('\r', '')
    x = re.sub(re.compile(r'[!-\/:-@[-`{-~]'), ' ', x) 
    x = re.sub(r'\[math\]', ' LaTex math ', x) # LaTex削除
    x = re.sub(r'\[\/math\]', ' LaTex math ', x) # LaTex削除
    x = re.sub(r'\\', ' LaTex ', x) # LaTex削除   
    #for r in zenkaku+hankaku:
    #    x = x.replace(str(r), '')
    x = re.sub(' +', ' ', x)
    return x


def preprocess(data):
    data = data.progress_apply(lambda x: pre_preprocess(x))
    data = data.progress_apply(lambda x: rm_spaces(x))
    data = data.progress_apply(lambda x: remove_urls(x))
    data = data.progress_apply(lambda x: clean_puncts(x))
    data = data.progress_apply(lambda x: replace_num(x))
    data = data.progress_apply(lambda x: clean_html_tags(x, stop_words=other))
    data = data.progress_apply(lambda x: clean_text_jp(x))
    return data

In [10]:
df['text'] = preprocess(df['text'])
df.head()

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

Unnamed: 0,text,label
0,岡むら屋から 期間限定の新メニュー「じゃが肉めし」が登場する 男爵いもなどは味噌ベースで煮こ...,0
1,東京駅周辺の安くて美味しい「蕎麦ランチ」の名店を紹介している 「越後そば 東京店」では ミニ...,0
2,8日から「サンクトガーレン」は 「チョコビール」4種を販売する ダークな茶色 ほろ苦いビター...,0
3,1年以内に登場した 東京みやげの新定番を紹介している カファレルの「東京ジャンドゥーヤチョコ...,0
4,新しいポテトチップスの味として「パクチー味」が話題になっている フリーズドライパクチーとパク...,0


In [11]:
df['mecab_tokenizer'] = df['text'].progress_apply(lambda x: ' '.join(tok.mecab_tokenizer(x)))
# df['tokenize'] = df['text'].progress_apply(lambda x: ' '.join(tok.tokenize(x)))
df.head()

  0%|          | 0/1000 [00:00<?, ?it/s]

Unnamed: 0,text,label,mecab_tokenizer
0,岡むら屋から 期間限定の新メニュー「じゃが肉めし」が登場する 男爵いもなどは味噌ベースで煮こ...,0,岡 むら 期間 限定 メニュー じゃが 肉 めし 登場 男爵 いも 味噌 ベース 味 こと ...
1,東京駅周辺の安くて美味しい「蕎麦ランチ」の名店を紹介している 「越後そば 東京店」では ミニ...,0,東京 駅 周辺 蕎麦 ランチ 名店 紹介 越後 そば 東京 ミニ かき揚げ セット 筆者 他...
2,8日から「サンクトガーレン」は 「チョコビール」4種を販売する ダークな茶色 ほろ苦いビター...,0,8 サンクトガーレン チョコ ビール 4 種 販売 ダーク 茶色 ビター チョコ 香り 特長...
3,1年以内に登場した 東京みやげの新定番を紹介している カファレルの「東京ジャンドゥーヤチョコ...,0,1 年 以内 登場 東京 みやげ 定番 紹介 カファレル 東京 ジャンドゥーヤチョコパイ オ...
4,新しいポテトチップスの味として「パクチー味」が話題になっている フリーズドライパクチーとパク...,0,ポテト チップス 味 パクチー 話題 フリーズ パクチー パクチー ドレッシング もの 自宅...


In [12]:
df['length_mecab_tokenizer'] = df['mecab_tokenizer'].progress_apply(lambda x: len(x))
# df['length_tokenize'] = df['tokenize'].progress_apply(lambda x: len(x))
df.head()

  0%|          | 0/1000 [00:00<?, ?it/s]

Unnamed: 0,text,label,mecab_tokenizer,length_mecab_tokenizer
0,岡むら屋から 期間限定の新メニュー「じゃが肉めし」が登場する 男爵いもなどは味噌ベースで煮こ...,0,岡 むら 期間 限定 メニュー じゃが 肉 めし 登場 男爵 いも 味噌 ベース 味 こと ...,60
1,東京駅周辺の安くて美味しい「蕎麦ランチ」の名店を紹介している 「越後そば 東京店」では ミニ...,0,東京 駅 周辺 蕎麦 ランチ 名店 紹介 越後 そば 東京 ミニ かき揚げ セット 筆者 他...,72
2,8日から「サンクトガーレン」は 「チョコビール」4種を販売する ダークな茶色 ほろ苦いビター...,0,8 サンクトガーレン チョコ ビール 4 種 販売 ダーク 茶色 ビター チョコ 香り 特長...,75
3,1年以内に登場した 東京みやげの新定番を紹介している カファレルの「東京ジャンドゥーヤチョコ...,0,1 年 以内 登場 東京 みやげ 定番 紹介 カファレル 東京 ジャンドゥーヤチョコパイ オ...,64
4,新しいポテトチップスの味として「パクチー味」が話題になっている フリーズドライパクチーとパク...,0,ポテト チップス 味 パクチー 話題 フリーズ パクチー パクチー ドレッシング もの 自宅...,51


In [13]:
print(df["length_mecab_tokenizer"].mean())
print(df["length_mecab_tokenizer"].max())

62.069
127


In [14]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
# max_len = 130

class TextToTensor():

    def __init__(self, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.max_len = max_len

    def string_to_tensor(self, string_list: list) -> list:
        """
        A method to convert a string list to a tensor for a deep learning model
        """    
        string_list = self.tokenizer.texts_to_sequences(string_list)
        string_list = pad_sequences(string_list, maxlen=self.max_len)
        
        return string_list

In [15]:
X_train = df.mecab_tokenizer.values
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_train)
# Getting the longest sentence
max_len = np.max([len(text.split()) for text in X_train])
print(max_len)
# Converting to tensor
TextToTensor_instance = TextToTensor(
tokenizer=tokenizer,
max_len=max_len
)
X_train_NN = TextToTensor_instance.string_to_tensor(X_train)

37


In [16]:
print(X_train_NN.shape)
print(X_train_NN[0].shape)
X_train_NN[0]


(1000, 37)
(37,)


array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0, 1128, 1612,   94,  100,  240,
       2654,  316, 2655,   70, 2656, 2657,  844,  441,  368,    1, 1128,
       1612, 1613, 2658,   43], dtype=int32)

In [17]:
word_count = len(tokenizer.index_word)
print(word_count)

5935


In [18]:
# from keras.models import Sequential
# from keras.layers import Embedding

# model = Sequential()
# model.add(Embedding(
#   input_dim= 5936, 
#   output_dim=10, 
#   input_length=max_len))

# model.compile('rmsprop', 'mse')
# output_array = model.predict(X_train_NN)[0]

In [19]:
# output_array.shape

In [20]:
# from sklearn.preprocessing import OneHotEncoder
# enc=OneHotEncoder()
# label = enc.fit_transform(df[["label"]]).toarray()

In [21]:
train= X_train_NN
label = df.label.values

In [22]:
print(train.shape)
print(label.shape)

(1000, 37)
(1000,)


In [23]:
from sklearn.model_selection import train_test_split
train_X, test_X, train_y, test_y = train_test_split(train, label, test_size=0.25)

In [24]:
print("Train shape : ",train_X.shape)
print("Test shape : ",test_X.shape)
print("Train shape : ",train_y.shape)
print("Test shape : ",test_y.shape)

Train shape :  (750, 37)
Test shape :  (250, 37)
Train shape :  (750,)
Test shape :  (250,)


In [25]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, TensorDataset
batch_size = 64


x_train = torch.tensor(train_X, dtype=torch.long)
y_train = torch.tensor(train_y, dtype=torch.long)
x_cv = torch.tensor(test_X, dtype=torch.long)
y_cv = torch.tensor(test_y, dtype=torch.long)

# Create Torch datasets
train = TensorDataset(x_train, y_train)
valid = TensorDataset(x_cv, y_cv)

# Create Data Loaders
train_loader = DataLoader(train, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid, batch_size=batch_size, shuffle=False)

In [26]:
for X, Y in train_loader:
    print(X.shape, Y.shape)

torch.Size([64, 37]) torch.Size([64])
torch.Size([64, 37]) torch.Size([64])
torch.Size([64, 37]) torch.Size([64])
torch.Size([64, 37]) torch.Size([64])
torch.Size([64, 37]) torch.Size([64])
torch.Size([64, 37]) torch.Size([64])
torch.Size([64, 37]) torch.Size([64])
torch.Size([64, 37]) torch.Size([64])
torch.Size([64, 37]) torch.Size([64])
torch.Size([64, 37]) torch.Size([64])
torch.Size([64, 37]) torch.Size([64])
torch.Size([46, 37]) torch.Size([46])


In [27]:
from torch import nn
from torch.nn import functional as F

embed_len = 50
hidden_dim = 50
n_layers=1
word_count = len(tokenizer.index_word)

#LSTM Classifications
class LSTMClassifier(nn.Module):
    def __init__(self):
        super(LSTMClassifier, self).__init__()
        self.embedding_layer = nn.Embedding(num_embeddings = 5936, embedding_dim=10)
        self.lstm = nn.LSTM(input_size=10, hidden_size=hidden_dim, num_layers=1, batch_first=True)
        self.linear = nn.Linear(50, 26)

    def forward(self, X_batch):
        embeddings = self.embedding_layer(X_batch)
        hidden, carry = torch.randn(1, len(X_batch), hidden_dim), torch.randn(1, len(X_batch), hidden_dim)
        output, (hidden, carry) = self.lstm(embeddings, (hidden, carry))
        return self.linear(output[:,-1])

##RNN Classifications
class RNNClassifier(nn.Module):
    def __init__(self):
        super(RNNClassifier, self).__init__()
        self.embedding_layer = nn.Embedding(num_embeddings=5936, embedding_dim=10)
        self.rnn = nn.RNN(input_size=10, hidden_size=hidden_dim, num_layers=1, batch_first=True)
        self.linear = nn.Linear(50, 26)

    def forward(self, X_batch):
        embeddings = self.embedding_layer(X_batch)
        output, hidden = self.rnn(embeddings, torch.randn(n_layers, len(X_batch), hidden_dim))
        return self.linear(output[:,-1])

In [28]:
rnn_classifier = RNNClassifier()

rnn_classifier

RNNClassifier(
  (embedding_layer): Embedding(5936, 10)
  (rnn): RNN(10, 50, batch_first=True)
  (linear): Linear(in_features=50, out_features=26, bias=True)
)

In [29]:
lstm_classifier = LSTMClassifier()
lstm_classifier

LSTMClassifier(
  (embedding_layer): Embedding(5936, 10)
  (lstm): LSTM(10, 50, batch_first=True)
  (linear): Linear(in_features=50, out_features=26, bias=True)
)

In [30]:
for layer in lstm_classifier.children():
    print("Layer : {}".format(layer))
    print("Parameters : ")
    for param in layer.parameters():
        print(param.shape)
    print()

Layer : Embedding(5936, 10)
Parameters : 
torch.Size([5936, 10])

Layer : LSTM(10, 50, batch_first=True)
Parameters : 
torch.Size([200, 10])
torch.Size([200, 50])
torch.Size([200])
torch.Size([200])

Layer : Linear(in_features=50, out_features=26, bias=True)
Parameters : 
torch.Size([26, 50])
torch.Size([26])



In [31]:
for layer in rnn_classifier.children():
    print("Layer : {}".format(layer))
    print("Parameters : ")
    for param in layer.parameters():
        print(param.shape)
    print()

Layer : Embedding(5936, 10)
Parameters : 
torch.Size([5936, 10])

Layer : RNN(10, 50, batch_first=True)
Parameters : 
torch.Size([50, 10])
torch.Size([50, 50])
torch.Size([50])
torch.Size([50])

Layer : Linear(in_features=50, out_features=26, bias=True)
Parameters : 
torch.Size([26, 50])
torch.Size([26])



In [32]:
from tqdm import tqdm
from sklearn.metrics import accuracy_score
import gc

def CalcValLossAndAccuracy(model, loss_fn, val_loader):
    with torch.no_grad():
        Y_shuffled, Y_preds, losses = [],[],[]
        for X, Y in val_loader:
            preds = model(X)
            loss = loss_fn(preds, Y)
            losses.append(loss.item())

            Y_shuffled.append(Y)
            Y_preds.append(preds.argmax(dim=-1))

        Y_shuffled = torch.cat(Y_shuffled)
        Y_preds = torch.cat(Y_preds)

        print("Valid Loss : {:.3f}".format(torch.tensor(losses).mean()))
        print("Valid Acc  : {:.3f}".format(accuracy_score(Y_shuffled.detach().numpy(), Y_preds.detach().numpy())))


def TrainModel(model, loss_fn, optimizer, train_loader, val_loader, epochs=5):
    for i in range(1, epochs+1):
        losses = []
        for X, Y in tqdm(train_loader):
            Y_preds = model(X)

            loss = loss_fn(Y_preds, Y)
            losses.append(loss.item())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print("Train Loss : {:.3f}".format(torch.tensor(losses).mean()))
        CalcValLossAndAccuracy(model, loss_fn, val_loader)

In [33]:
from torch.optim import Adam

epochs = 50
learning_rate = 1e-3

loss_fn = nn.CrossEntropyLoss()
rnn_classifier = RNNClassifier()
optimizer = Adam(rnn_classifier.parameters(), lr=learning_rate)

TrainModel(rnn_classifier, loss_fn, optimizer, train_loader, valid_loader, epochs)

100%|██████████| 12/12 [00:00<00:00, 80.66it/s]


Train Loss : 3.219
Valid Loss : 3.131
Valid Acc  : 0.128


100%|██████████| 12/12 [00:00<00:00, 156.21it/s]


Train Loss : 2.974
Valid Loss : 2.803
Valid Acc  : 0.184


100%|██████████| 12/12 [00:00<00:00, 155.87it/s]


Train Loss : 2.603
Valid Loss : 2.601
Valid Acc  : 0.180


100%|██████████| 12/12 [00:00<00:00, 154.55it/s]


Train Loss : 2.472
Valid Loss : 2.580
Valid Acc  : 0.152


100%|██████████| 12/12 [00:00<00:00, 160.60it/s]


Train Loss : 2.462
Valid Loss : 2.572
Valid Acc  : 0.156


100%|██████████| 12/12 [00:00<00:00, 158.54it/s]


Train Loss : 2.436
Valid Loss : 2.564
Valid Acc  : 0.140


100%|██████████| 12/12 [00:00<00:00, 159.78it/s]


Train Loss : 2.425
Valid Loss : 2.567
Valid Acc  : 0.140


100%|██████████| 12/12 [00:00<00:00, 156.79it/s]


Train Loss : 2.421
Valid Loss : 2.577
Valid Acc  : 0.140


100%|██████████| 12/12 [00:00<00:00, 160.42it/s]


Train Loss : 2.417
Valid Loss : 2.575
Valid Acc  : 0.152


100%|██████████| 12/12 [00:00<00:00, 152.72it/s]


Train Loss : 2.407
Valid Loss : 2.579
Valid Acc  : 0.148


100%|██████████| 12/12 [00:00<00:00, 153.37it/s]


Train Loss : 2.403
Valid Loss : 2.578
Valid Acc  : 0.160


100%|██████████| 12/12 [00:00<00:00, 153.99it/s]


Train Loss : 2.395
Valid Loss : 2.585
Valid Acc  : 0.140


100%|██████████| 12/12 [00:00<00:00, 152.86it/s]


Train Loss : 2.378
Valid Loss : 2.585
Valid Acc  : 0.164


100%|██████████| 12/12 [00:00<00:00, 150.58it/s]


Train Loss : 2.374
Valid Loss : 2.592
Valid Acc  : 0.156


100%|██████████| 12/12 [00:00<00:00, 149.09it/s]


Train Loss : 2.365
Valid Loss : 2.595
Valid Acc  : 0.160


100%|██████████| 12/12 [00:00<00:00, 153.45it/s]


Train Loss : 2.344
Valid Loss : 2.606
Valid Acc  : 0.152


100%|██████████| 12/12 [00:00<00:00, 157.12it/s]


Train Loss : 2.326
Valid Loss : 2.603
Valid Acc  : 0.168


100%|██████████| 12/12 [00:00<00:00, 158.08it/s]


Train Loss : 2.311
Valid Loss : 2.613
Valid Acc  : 0.160


100%|██████████| 12/12 [00:00<00:00, 144.98it/s]


Train Loss : 2.289
Valid Loss : 2.619
Valid Acc  : 0.164


100%|██████████| 12/12 [00:00<00:00, 157.96it/s]


Train Loss : 2.272
Valid Loss : 2.633
Valid Acc  : 0.168


100%|██████████| 12/12 [00:00<00:00, 156.85it/s]


Train Loss : 2.241
Valid Loss : 2.638
Valid Acc  : 0.164


100%|██████████| 12/12 [00:00<00:00, 151.29it/s]


Train Loss : 2.222
Valid Loss : 2.654
Valid Acc  : 0.160


100%|██████████| 12/12 [00:00<00:00, 152.81it/s]


Train Loss : 2.187
Valid Loss : 2.664
Valid Acc  : 0.160


100%|██████████| 12/12 [00:00<00:00, 153.39it/s]


Train Loss : 2.153
Valid Loss : 2.678
Valid Acc  : 0.160


100%|██████████| 12/12 [00:00<00:00, 152.59it/s]


Train Loss : 2.124
Valid Loss : 2.702
Valid Acc  : 0.156


100%|██████████| 12/12 [00:00<00:00, 147.63it/s]


Train Loss : 2.087
Valid Loss : 2.710
Valid Acc  : 0.160


100%|██████████| 12/12 [00:00<00:00, 148.60it/s]


Train Loss : 2.042
Valid Loss : 2.736
Valid Acc  : 0.156


100%|██████████| 12/12 [00:00<00:00, 155.24it/s]


Train Loss : 2.004
Valid Loss : 2.758
Valid Acc  : 0.172


100%|██████████| 12/12 [00:00<00:00, 150.03it/s]


Train Loss : 1.954
Valid Loss : 2.766
Valid Acc  : 0.164


100%|██████████| 12/12 [00:00<00:00, 148.22it/s]


Train Loss : 1.916
Valid Loss : 2.808
Valid Acc  : 0.156


100%|██████████| 12/12 [00:00<00:00, 148.84it/s]


Train Loss : 1.853
Valid Loss : 2.822
Valid Acc  : 0.164


100%|██████████| 12/12 [00:00<00:00, 150.63it/s]


Train Loss : 1.810
Valid Loss : 2.847
Valid Acc  : 0.160


100%|██████████| 12/12 [00:00<00:00, 147.22it/s]


Train Loss : 1.757
Valid Loss : 2.879
Valid Acc  : 0.152


100%|██████████| 12/12 [00:00<00:00, 156.62it/s]


Train Loss : 1.705
Valid Loss : 2.911
Valid Acc  : 0.160


100%|██████████| 12/12 [00:00<00:00, 150.59it/s]


Train Loss : 1.651
Valid Loss : 2.934
Valid Acc  : 0.148


100%|██████████| 12/12 [00:00<00:00, 152.38it/s]


Train Loss : 1.594
Valid Loss : 2.977
Valid Acc  : 0.152


100%|██████████| 12/12 [00:00<00:00, 154.98it/s]


Train Loss : 1.530
Valid Loss : 3.019
Valid Acc  : 0.144


100%|██████████| 12/12 [00:00<00:00, 152.89it/s]


Train Loss : 1.470
Valid Loss : 3.064
Valid Acc  : 0.140


100%|██████████| 12/12 [00:00<00:00, 152.59it/s]


Train Loss : 1.414
Valid Loss : 3.102
Valid Acc  : 0.148


100%|██████████| 12/12 [00:00<00:00, 157.22it/s]


Train Loss : 1.355
Valid Loss : 3.122
Valid Acc  : 0.148


100%|██████████| 12/12 [00:00<00:00, 161.54it/s]


Train Loss : 1.301
Valid Loss : 3.187
Valid Acc  : 0.144


100%|██████████| 12/12 [00:00<00:00, 155.56it/s]


Train Loss : 1.252
Valid Loss : 3.196
Valid Acc  : 0.152


100%|██████████| 12/12 [00:00<00:00, 153.93it/s]


Train Loss : 1.190
Valid Loss : 3.245
Valid Acc  : 0.148


100%|██████████| 12/12 [00:00<00:00, 151.97it/s]


Train Loss : 1.139
Valid Loss : 3.314
Valid Acc  : 0.140


100%|██████████| 12/12 [00:00<00:00, 153.03it/s]


Train Loss : 1.088
Valid Loss : 3.355
Valid Acc  : 0.148


100%|██████████| 12/12 [00:00<00:00, 150.69it/s]


Train Loss : 1.041
Valid Loss : 3.378
Valid Acc  : 0.144


100%|██████████| 12/12 [00:00<00:00, 151.40it/s]


Train Loss : 0.987
Valid Loss : 3.440
Valid Acc  : 0.140


100%|██████████| 12/12 [00:00<00:00, 147.36it/s]


Train Loss : 0.947
Valid Loss : 3.478
Valid Acc  : 0.148


100%|██████████| 12/12 [00:00<00:00, 159.54it/s]


Train Loss : 0.907
Valid Loss : 3.514
Valid Acc  : 0.148


100%|██████████| 12/12 [00:00<00:00, 158.08it/s]

Train Loss : 0.856
Valid Loss : 3.593
Valid Acc  : 0.140





In [34]:
from torch.optim import Adam

epochs = 50
learning_rate = 1e-3

loss_fn = nn.CrossEntropyLoss()
rnn_classifier = RNNClassifier()
optimizer = Adam(lstm_classifier.parameters(), lr=learning_rate)

TrainModel(lstm_classifier, loss_fn, optimizer, train_loader, valid_loader, epochs)

100%|██████████| 12/12 [00:00<00:00, 57.93it/s]


Train Loss : 3.205
Valid Loss : 3.171
Valid Acc  : 0.180


100%|██████████| 12/12 [00:00<00:00, 62.48it/s]


Train Loss : 3.113
Valid Loss : 3.055
Valid Acc  : 0.188


100%|██████████| 12/12 [00:00<00:00, 62.25it/s]


Train Loss : 2.845
Valid Loss : 2.618
Valid Acc  : 0.180


100%|██████████| 12/12 [00:00<00:00, 60.98it/s]


Train Loss : 2.524
Valid Loss : 2.579
Valid Acc  : 0.156


100%|██████████| 12/12 [00:00<00:00, 63.30it/s]


Train Loss : 2.464
Valid Loss : 2.562
Valid Acc  : 0.148


100%|██████████| 12/12 [00:00<00:00, 60.63it/s]


Train Loss : 2.453
Valid Loss : 2.564
Valid Acc  : 0.152


100%|██████████| 12/12 [00:00<00:00, 60.08it/s]


Train Loss : 2.443
Valid Loss : 2.567
Valid Acc  : 0.164


100%|██████████| 12/12 [00:00<00:00, 51.73it/s]


Train Loss : 2.438
Valid Loss : 2.574
Valid Acc  : 0.152


100%|██████████| 12/12 [00:00<00:00, 56.09it/s]


Train Loss : 2.434
Valid Loss : 2.575
Valid Acc  : 0.160


100%|██████████| 12/12 [00:00<00:00, 59.64it/s]


Train Loss : 2.432
Valid Loss : 2.579
Valid Acc  : 0.184


100%|██████████| 12/12 [00:00<00:00, 57.87it/s]


Train Loss : 2.423
Valid Loss : 2.591
Valid Acc  : 0.160


100%|██████████| 12/12 [00:00<00:00, 60.49it/s]


Train Loss : 2.408
Valid Loss : 2.586
Valid Acc  : 0.152


100%|██████████| 12/12 [00:00<00:00, 62.32it/s]


Train Loss : 2.407
Valid Loss : 2.593
Valid Acc  : 0.140


100%|██████████| 12/12 [00:00<00:00, 62.82it/s]


Train Loss : 2.388
Valid Loss : 2.600
Valid Acc  : 0.136


100%|██████████| 12/12 [00:00<00:00, 63.10it/s]


Train Loss : 2.371
Valid Loss : 2.597
Valid Acc  : 0.160


100%|██████████| 12/12 [00:00<00:00, 60.44it/s]


Train Loss : 2.339
Valid Loss : 2.595
Valid Acc  : 0.156


100%|██████████| 12/12 [00:00<00:00, 63.17it/s]


Train Loss : 2.311
Valid Loss : 2.593
Valid Acc  : 0.168


100%|██████████| 12/12 [00:00<00:00, 60.43it/s]


Train Loss : 2.280
Valid Loss : 2.584
Valid Acc  : 0.184


100%|██████████| 12/12 [00:00<00:00, 60.60it/s]


Train Loss : 2.230
Valid Loss : 2.582
Valid Acc  : 0.196


100%|██████████| 12/12 [00:00<00:00, 61.57it/s]


Train Loss : 2.201
Valid Loss : 2.582
Valid Acc  : 0.204


100%|██████████| 12/12 [00:00<00:00, 59.63it/s]


Train Loss : 2.149
Valid Loss : 2.582
Valid Acc  : 0.192


100%|██████████| 12/12 [00:00<00:00, 51.05it/s]


Train Loss : 2.098
Valid Loss : 2.617
Valid Acc  : 0.224


100%|██████████| 12/12 [00:00<00:00, 40.47it/s]


Train Loss : 2.070
Valid Loss : 2.591
Valid Acc  : 0.216


100%|██████████| 12/12 [00:00<00:00, 41.89it/s]


Train Loss : 2.020
Valid Loss : 2.591
Valid Acc  : 0.216


100%|██████████| 12/12 [00:00<00:00, 52.62it/s]


Train Loss : 1.955
Valid Loss : 2.624
Valid Acc  : 0.244


100%|██████████| 12/12 [00:00<00:00, 57.45it/s]


Train Loss : 1.927
Valid Loss : 2.600
Valid Acc  : 0.232


100%|██████████| 12/12 [00:00<00:00, 62.24it/s]


Train Loss : 1.887
Valid Loss : 2.685
Valid Acc  : 0.220


100%|██████████| 12/12 [00:00<00:00, 62.88it/s]


Train Loss : 1.844
Valid Loss : 2.611
Valid Acc  : 0.232


100%|██████████| 12/12 [00:00<00:00, 63.46it/s]


Train Loss : 1.797
Valid Loss : 2.647
Valid Acc  : 0.228


100%|██████████| 12/12 [00:00<00:00, 61.64it/s]


Train Loss : 1.744
Valid Loss : 2.674
Valid Acc  : 0.236


100%|██████████| 12/12 [00:00<00:00, 62.50it/s]


Train Loss : 1.701
Valid Loss : 2.655
Valid Acc  : 0.232


100%|██████████| 12/12 [00:00<00:00, 61.03it/s]


Train Loss : 1.652
Valid Loss : 2.682
Valid Acc  : 0.232


100%|██████████| 12/12 [00:00<00:00, 62.70it/s]


Train Loss : 1.615
Valid Loss : 2.711
Valid Acc  : 0.228


100%|██████████| 12/12 [00:00<00:00, 61.82it/s]


Train Loss : 1.576
Valid Loss : 2.730
Valid Acc  : 0.228


100%|██████████| 12/12 [00:00<00:00, 62.14it/s]


Train Loss : 1.546
Valid Loss : 2.780
Valid Acc  : 0.240


100%|██████████| 12/12 [00:00<00:00, 61.89it/s]


Train Loss : 1.494
Valid Loss : 2.740
Valid Acc  : 0.236


100%|██████████| 12/12 [00:00<00:00, 60.69it/s]


Train Loss : 1.454
Valid Loss : 2.771
Valid Acc  : 0.236


100%|██████████| 12/12 [00:00<00:00, 62.13it/s]


Train Loss : 1.393
Valid Loss : 2.818
Valid Acc  : 0.232


100%|██████████| 12/12 [00:00<00:00, 62.84it/s]


Train Loss : 1.361
Valid Loss : 2.856
Valid Acc  : 0.232


100%|██████████| 12/12 [00:00<00:00, 64.36it/s]


Train Loss : 1.318
Valid Loss : 2.853
Valid Acc  : 0.216


100%|██████████| 12/12 [00:00<00:00, 62.84it/s]


Train Loss : 1.272
Valid Loss : 2.874
Valid Acc  : 0.212


100%|██████████| 12/12 [00:00<00:00, 60.73it/s]


Train Loss : 1.236
Valid Loss : 2.897
Valid Acc  : 0.216


100%|██████████| 12/12 [00:00<00:00, 60.30it/s]


Train Loss : 1.193
Valid Loss : 2.934
Valid Acc  : 0.232


100%|██████████| 12/12 [00:00<00:00, 59.94it/s]


Train Loss : 1.148
Valid Loss : 2.961
Valid Acc  : 0.224


100%|██████████| 12/12 [00:00<00:00, 62.26it/s]


Train Loss : 1.110
Valid Loss : 2.939
Valid Acc  : 0.200


100%|██████████| 12/12 [00:00<00:00, 61.63it/s]


Train Loss : 1.075
Valid Loss : 2.974
Valid Acc  : 0.224


100%|██████████| 12/12 [00:00<00:00, 62.63it/s]


Train Loss : 1.046
Valid Loss : 3.031
Valid Acc  : 0.224


100%|██████████| 12/12 [00:00<00:00, 65.27it/s]


Train Loss : 0.996
Valid Loss : 3.006
Valid Acc  : 0.200


100%|██████████| 12/12 [00:00<00:00, 62.61it/s]


Train Loss : 0.969
Valid Loss : 3.066
Valid Acc  : 0.224


100%|██████████| 12/12 [00:00<00:00, 60.58it/s]


Train Loss : 0.912
Valid Loss : 3.050
Valid Acc  : 0.216
