In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
from main import  get_args_parser

import random

import numpy as np
import torch
from torch.utils.data import DataLoader

from datasets import build_fdb_data, collate_fn
from models import build_models
from engine import Engine

In [3]:
args_parser = get_args_parser()
args = args_parser.parse_args([
                               '--dataset_size', '1.0', 
                               '--batch_size', '4',
                               '--device', 'cpu', 
                               '--class_depth', '3',
                               '--bbox_depth', '5',
                               '--hidden_dim', '2048',
                               '--eval',
                               '--frozen_weights', './outputs/0223/checkpoint0003.pth'
                               ])

In [None]:
device = torch.device(args.device)

# fix the seed for reproducibility
seed = args.seed
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

print()
print("Loading Dataset...")

dataset_train, dataset_val, postprocessor, num_classes, freqs = build_fdb_data(args)
print('Using frequencies:', freqs)

print("Dataset loaded")
print()
print("Loading Models...")

tokenizer, model, criterion = build_models(num_classes, freqs, args)
model.to(device)

model.set_transformer_trainable(False)

if args.frozen_weights is not None:
    checkpoint = torch.load(args.frozen_weights, map_location="cpu")
    model.load_state_dict(checkpoint["model"])

print("Models Loaded")

data_loader_train = DataLoader(
    dataset_train,
    shuffle=True,
    batch_size=args.batch_size,
    collate_fn=collate_fn,
    num_workers=args.num_workers,
)
data_loader_val = DataLoader(
    dataset_val,
    shuffle=False,
    batch_size=args.batch_size,
    drop_last=False,
    collate_fn=collate_fn,
    num_workers=args.num_workers,
)

In [None]:
engine = Engine()

postprocessor.reset_results()
report = engine.evaluate(
    tokenizer=tokenizer,
    model=model,
    criterion=criterion,
    postprocessor=postprocessor,
    data_loader=data_loader_val,
    epoch=0,
    device=device,
)

In [6]:
postprocessor.evaluate()

Unnamed: 0,precision,recall,f1
Lead,0.450617,0.761738,0.566257
Position,0.03379,0.259375,0.059791
Evidence,0.372528,0.606438,0.461538
Claim,0.085247,0.176233,0.11491
Concluding Statement,0.662551,0.775903,0.714761
Counterclaim,0.007495,0.275448,0.014593
Rebuttal,0.013967,0.170731,0.025822
macro_avg,0.232314,0.432267,0.279667


# Altro

In [6]:
import pandas as pd
from datasets import load_texts, OrdinalEncoder, FBPPostProcess, PIPELINE

In [7]:
documents, tags = load_texts(args.input_path, PIPELINE, 1.0)

encoder = OrdinalEncoder()
label_unique = np.array(tags["discourse_type"].unique())  
encoder.fit(label_unique.reshape(-1, 1))

num_classes = len(label_unique)
postprocessor = FBPPostProcess(encoder, tags, num_classes)

Loading Dataset: 100%|██████████| 15594/15594 [00:02<00:00, 7494.58it/s]


In [12]:
results = pd.read_csv('./outputs/0223/results.csv', index_col=0)

In [16]:
report = postprocessor.evaluate(results)
report

Unnamed: 0,precision,recall,f1
Lead,0.423533,0.710597,0.530735
Position,0.110337,0.221824,0.147371
Evidence,0.396702,0.663553,0.496547
Claim,0.117321,0.290002,0.167058
Concluding Statement,0.664636,0.774663,0.715444
Counterclaim,0.017419,0.144876,0.031099
Rebuttal,0.011115,0.125444,0.02042
macro_avg,0.248723,0.418708,0.301239


In [18]:
maxx = 0
idx_m = None
repo = None
gb = results.groupby(by='id')

for idx in results['id'].unique():
    rep = postprocessor.evaluate(gb.get_group(idx))
    f1 = rep['f1']['macro_avg']
    if f1 > maxx:
        maxx = f1
        idx_m = idx
        repo = rep
        print(idx_m, maxx)

repo

87D53BFDACAE 0.3806457927764305
A7AB95A87C65 0.3806986175446987
DB4FE19B8FDA 0.39254493498075466
A6FCBC1C476D 0.40557568344353795
5976F16FE9D2 0.45420175487972664
31EEF9A4A29B 0.5136888847375385
A6504C463F7E 0.5179381448727023
A1C00871518B 0.5336057134310005
4014F3766E03 0.565344692070668


Unnamed: 0,precision,recall,f1
Lead,0.999001,0.999001,0.999001
Position,0.49975,0.999001,0.666223
Evidence,0.39992,0.9995,0.571265
Claim,0.142837,0.49975,0.222173
Concluding Statement,0.999001,0.999001,0.999001
Counterclaim,0.333222,0.999001,0.49975
Rebuttal,0.0,0.0,0.0
macro_avg,0.481962,0.785036,0.565345


In [40]:
idx_m = '31EEF9A4A29B'
postprocessor.evaluate(gb.get_group(idx_m))

Unnamed: 0,precision,recall,f1
Lead,0.999001,0.999001,0.999001
Position,0.49975,0.999001,0.666223
Evidence,0.59988,0.999667,0.749813
Claim,0.124984,0.333222,0.181785
Concluding Statement,0.999001,0.999001,0.999001
Counterclaim,0.0,0.0,0.0
Rebuttal,0.0,0.0,0.0
macro_avg,0.460374,0.618556,0.513689


In [41]:
import util.visualization as viz

viz.print_segments(idx_m, documents[idx_m], tags)


[32msome schools offer distance learning as an option for students to attend classes from home by way of online or video [Lead]

[31mi think that students will benefit more from distance learning than from going to school. not everyone agrees that going school is for them. distance learning is gives a better chance to learn all your [Position]
[0mare [0ma [0mcouple [0mreasons [0mwhy [0mi [0msupport [0mthe [0midea [0mof [0mdistance [0mlearning.first, [0mreason 
[35mwhy i think that distance learning is more beneficial to you is because you can manage your time better with class work and classes. you [Claim]

[34mthe opportunity to work on your own schedule. separate the classes throughout the day instead of all bunched up at once. more time for you to focus on either classes you kind of struggle with or classes most important. all work does not have to have a due date or time limit. you can take your time to understand the material at hand and do all your work when cap

In [43]:
viz.print_segments(idx_m, documents[idx_m], results)


[32msome schools offer distance learning as an option for students to attend classes from home by way of online or video conferencing. i think that students will benefit more from distance learning than from going to school. not everyone agrees [Lead]

[31mattend classes from home by way of online or video conferencing. i think that students will benefit more from distance learning than from going to school. not everyone agrees that going school is for them. distance [Position]

[35mvideo conferencing. i think that students will benefit more from distance learning than from going to school. not everyone agrees that going [Claim]

[35mis for them. distance learning is gives a better chance to learn all your education.here are a couple reasons why i support the idea of distance learning.first, [Claim]

[35mare a couple reasons why i support the idea of distance learning.first, reason why i think that distance learning is more beneficial to you is because you [Claim]

[34mi support