In [1]:
import numpy as np
import torch
from torch import nn, optim
from dataset import get_twit_company_dataloaders, split_sentence
from model import LSTMTwitClassifier
import torch.nn.functional as F

# text, label = next(iter(dataloader_train))

In [3]:
use_wandb = False

lr = 0.0005
embedding_size = 100
hidden_size = 100
epochs_cnt = 50
embeddings = "word2vec"
lstm_layers = 1
dropout = 0.5

dataset_train, dataloader_train, dataset_test, dataloader_test = get_twit_company_dataloaders(embedding_dim=embedding_size, embedding=embeddings)

model = LSTMTwitClassifier(4, embedding_dim=embedding_size, hidden_dim=hidden_size, dropout=dropout, lstm_layers=lstm_layers)

if use_wandb:
    import wandb

    wandb.init(project='twit_classification', entity='ars860')

    config = wandb.config
    config.loss = "BCE"
    config.optimizer = "Adam"
    config.learning_rate = lr
    config.hidden_size = hidden_size
    config.embedding_size = embedding_size
    config.embeddings = embeddings
    config.epochs = epochs_cnt
    config.dropout = dropout
    config.lstm_layers = lstm_layers
    config.stem = "snowballstemmer"

    wandb.watch(model)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

def loss_on_test():
    correct = 0
    losses = np.zeros(len(dataloader_test))

    with torch.no_grad():
        model.eval()
        for i, (txt, company) in enumerate(dataloader_test):
            prediction = model(txt)
            prediction = F.softmax(prediction, dim=0)

            losses[i] = F.binary_cross_entropy(prediction, company.view(-1))
            if torch.argmax(prediction) == torch.argmax(company):
                correct += 1

            # predictions_cnt[torch.argmax(prediction)] += 1

            # if i % 100 == 0:
            #     print(f"Iter: {i}/{len(dataloader_test)}")

    model.train()
    if use_wandb:
        wandb.log({"test_loss": np.mean(losses), "test_accuracy": correct / len(dataloader_test)})

losses = np.empty(100)
model.train()
for epoch in range(epochs_cnt):
    epoch_loss = np.zeros(len(dataloader_train))

    for i, (txt, company) in enumerate(dataloader_train):
        model.zero_grad()

        prediction = model(txt)
        prediction = F.softmax(prediction, dim=0)

        loss = criterion(prediction, company.view(-1))

        loss.backward()
        optimizer.step()

        loss = loss.detach().item()
        losses[i % 100] = loss
        epoch_loss[i] = loss

        if (i + 1) % 100 == 0:
            print(
                f"Epoch {epoch + 1}/{epochs_cnt}, iter: {i + 1}/{len(dataloader_train)}, mean loss: {np.mean(losses)}")
            if use_wandb:
                wandb.log({"loss": np.mean(losses)})

    if use_wandb:
        wandb.log({"epoch_loss": np.mean(epoch_loss)})
        loss_on_test()

# [model.get_word_embedding(word) for word in "hello_world".split(' ')]

http://t.co/48emAEID 
http://t.co/Izh7KaiU 
http://t.co/e5ClGzsI 
http://t.co/18xg3ivo! 
Поиск от 
サムスン電子のスマートフォン新機種「ギャラクシー・ネクサス」、グーグルの基本ソフト（ＯＳ）「アンドロイド」最新版を搭載。「クラウド」活用、音声認識やカメラの機能も向上させた戦略モデル 
Новите 
اعرف الكثير عن نظام ايسكريم ساندويتش http://t.co/Fzjd2Zx1 
看見 
نظام جديد .. و جهاز جديد شكراً جزيلاً 
ايسكريم ساندويش، عسل، زنجبيل .. مشكلة من كثر المسميات احسهم مسوين مقادير مب انظمة !! 😝  
الجهاز الجديد عجيب   
Я немного потрясен :) 
今日発表だった＾ﾛ＾　 
يبدو ان طفرة الاجهزة الالكترونية القادمة ستكون بقيادة موتورولا ،، لاسيم بعد استحواذ قوقل عليها.   
顔認識ロック解除失敗してる・・・デモで失敗しちゃっていいのか？ 
Με συγχισες 
デフォでデータ通信制御？　
http://t.co/gAPEyL5N 
http://t.co/J3p3KYHf 
http://t.co/h1IH7FN6 مايكروسوفت تقوم بتطوير تقنية تمكنك من استخدام يدك كهاتف  
http://t.co/ONI0JX8B 
http://t.co/JVidt6U4 
На сайте 
☼ 
Настоящий твиттерянин как только попадает в толпу стремиться тут же как можно быстрее попасть в 
Доброе утро 
【
なにやらフォロー制限に引っ掛かったようです…もっとフォロアーを増やさなくちゃ♪　
ツイッターを利用して感謝の気持ちとともに約２０００円が振り込まれ続ける方法→ｺｺ→ http://t.co/TyjUG



KeyboardInterrupt: 

In [7]:
model.eval()

print("Testing on train")

correct = 0
predictions_cnt = [0, 0, 0, 0]

with torch.no_grad():
    for i, (txt, company) in enumerate(dataloader_train):
        prediction = model(txt)
        prediction = F.softmax(prediction, dim=0)

        if torch.argmax(prediction) == torch.argmax(company):
            correct += 1

        predictions_cnt[torch.argmax(prediction)] += 1

        if i % 100 == 0:
            print(f"Iter: {i}/{len(dataloader_train)}")

print(f"Accuracy {correct / len(dataloader_train)}")

if use_wandb:
    wandb.run.summary.train_accuracy = correct / len(dataloader_train)
    wandb.run.summary.classified_as = {
        "apple": predictions_cnt[0],
        "google": predictions_cnt[1],
        "microsoft": predictions_cnt[2],
        "twitter": predictions_cnt[3]
    }
    wandb.finish()

Testing on train
Iter: 0/3382
Iter: 100/3382
Iter: 200/3382
Iter: 300/3382
Iter: 400/3382
Iter: 500/3382
Iter: 600/3382
Iter: 700/3382
Iter: 800/3382
Iter: 900/3382
Iter: 1000/3382
Iter: 1100/3382
Iter: 1200/3382
Iter: 1300/3382
Iter: 1400/3382
Iter: 1500/3382
Iter: 1600/3382
Iter: 1700/3382
Iter: 1800/3382
Iter: 1900/3382
Iter: 2000/3382
Iter: 2100/3382
Iter: 2200/3382
Iter: 2300/3382
Iter: 2400/3382
Iter: 2500/3382
Iter: 2600/3382
Iter: 2700/3382
Iter: 2800/3382
Iter: 2900/3382
Iter: 3000/3382
Iter: 3100/3382
Iter: 3200/3382
Iter: 3300/3382
Accuracy 0.9911295091661738


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,0.00455
_runtime,1582.0
_timestamp,1633816481.0
_step,1749.0
epoch_loss,0.01621
test_loss,0.40269
test_accuracy,0.81065
train_accuracy,0.99113


0,1
loss,▁▁▁▁▅█▁▁▁▂▁▂▁▁▂▂▂▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
_runtime,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch_loss,▇█▇▇█▇▇▇▆▇▅▅▄▄▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_loss,████▆▆▆▄▃▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▁▁▁▁▁▁▁▃▅▅▆▆▇▇▇▇▇▇▇▇███████▇▇█▇▇█▇█████


In [8]:
print("Testing on test")

correct = 0
predictions_cnt = [0, 0, 0, 0]

with torch.no_grad():
    for i, (txt, company) in enumerate(dataloader_test):
        prediction = model(txt)
        prediction = F.softmax(prediction, dim=0)

        if torch.argmax(prediction) == torch.argmax(company):
            correct += 1

        predictions_cnt[torch.argmax(prediction)] += 1

        if i % 100 == 0:
            print(f"Iter: {i}/{len(dataloader_test)}")

print(f"Accuracy {correct / len(dataloader_test)}")

Testing on test
Iter: 0/338
Iter: 100/338
Iter: 200/338
Iter: 300/338
Accuracy 0.8106508875739645
