# WNC Library Functionality Testing

In [1]:
%load_ext lab_black
%load_ext autoreload
%autoreload 2

In [2]:
examples = [
    "Sir Alex Ferguson is the greatest football manager of all time.",
    "Sir Alex Ferguson is a great football manager.",
    "Sir Alex Ferguson is a football manager.",
    "the news-press was sold by the new york times company in 2000, and is now independently owned by wendy p. mccaw, a local resident and a biased and eerily thick-headed woman .",
    "it also marked the last season in quarteback 's brett favre illustrious career as a packer .",
]


tests = [
    "Sir Alex Ferguson is the greatest football manager of all time.",
    "Sir Alex Ferguson is considered by some to be the greatest football manager of all time.",
    "Sir Alex Ferguson is one of the greatest football managers of all time.",
    "Sir Alex Ferguson is widely regarded as one of the greatest football managers of all time",
]

## `SubjectivityNeutralizer` Testing

In [3]:
from src.inference import SubjectivityNeutralizer

MODEL_PATH = "/home/cdsw/models/bart-tst-full"
sn = SubjectivityNeutralizer(model_identifier=MODEL_PATH)

In [4]:
sn.transfer(examples)

['Sir Alex Ferguson is one of the greatest football managers of all time.',
 'Sir Alex Ferguson is a football manager.',
 'Sir Alex Ferguson is a football manager.',
 'the news-press was sold by the new york times company in 2000, and is now independently owned by wendy p. mccaw, a local resident.',
 "it also marked the last season in quarteback 's career."]

## `StyleIntensityClassifier` Testing

In [5]:
from src.inference import StyleIntensityClassifier

MODEL_PATH = "../models/TRIAL-J-shuffle-lr_3en06-epoch_15-wd_.1-bs_32/checkpoint-67466"
sc = StyleIntensityClassifier(model_identifier=MODEL_PATH)

In [6]:
sc.score(tests)

[{'label': 'LABEL_0',
  'score': 0.993498682975769,
  'distribution': [0.993498682975769, 0.006501343566924334]},
 {'label': 'LABEL_1',
  'score': 0.748929500579834,
  'distribution': [0.25107046961784363, 0.748929500579834]},
 {'label': 'LABEL_0',
  'score': 0.8673079609870911,
  'distribution': [0.8673079609870911, 0.13269200921058655]},
 {'label': 'LABEL_1',
  'score': 0.589175283908844,
  'distribution': [0.4108246862888336, 0.589175283908844]}]

In [7]:
sc.score(examples)

[{'label': 'LABEL_0',
  'score': 0.993498682975769,
  'distribution': [0.993498682975769, 0.006501343566924334]},
 {'label': 'LABEL_0',
  'score': 0.9942171573638916,
  'distribution': [0.9942171573638916, 0.005782798398286104]},
 {'label': 'LABEL_1',
  'score': 0.995918333530426,
  'distribution': [0.004081716760993004, 0.995918333530426]},
 {'label': 'LABEL_0',
  'score': 0.9957911968231201,
  'distribution': [0.9957911968231201, 0.004208787810057402]},
 {'label': 'LABEL_0',
  'score': 0.9760012626647949,
  'distribution': [0.9760012626647949, 0.02399877831339836]}]

### Calculate Style Transfer Intensity

In [8]:
# transfer style from examples
input_text = examples[:2]
output_text = sn.transfer(examples[:2])

text_pairs = {"input_text": input_text, "output_text": output_text}
text_pairs

{'input_text': ['Sir Alex Ferguson is the greatest football manager of all time.',
  'Sir Alex Ferguson is a great football manager.'],
 'output_text': ['Sir Alex Ferguson is one of the greatest football managers of all time.',
  'Sir Alex Ferguson is a football manager.']}

In [9]:
sc.calculate_transfer_intensity(**text_pairs)

[0.1262, 0.9901]

## `ContentPreservationScorer` Testing

TO-DO: show detailed walkthrough of how the main scoring method actually works by stepping through intermediate steps with an example. Do this also for the others as a final usage notebook.

In [10]:
from src.inference import ContentPreservationScorer

SBERT_MODEL_PATH = "sentence-transformers/all-MiniLM-L6-v2"
CLS_MODEL_PATH = (
    "../models/TRIAL-J-shuffle-lr_3en06-epoch_15-wd_.1-bs_32/checkpoint-67466"
)
cps = ContentPreservationScorer(
    sbert_model_identifier=SBERT_MODEL_PATH, cls_model_identifier=CLS_MODEL_PATH
)

In [11]:
cps.calculate_content_preservation_score(
    **text_pairs, threshold=0.3, mask_type="pad", return_all=True
)

{'scores': [0.9571159482002258, 0.9961498975753784],
 'masked_input_text': ['sir alex ferguson [PAD] the greatest [PAD] manager of all time.',
  'sir alex ferguson is a [PAD] football manager.'],
 'masked_output_text': ['sir alex ferguson is [PAD] of the greatest football managers of all time.',
  'sir alex ferguson is [PAD] football manager.']}

In [13]:
cps.calculate_content_preservation_score(
    **text_pairs, threshold=0.3, mask_type="none", return_all=True
)

{'scores': [0.9852172136306763, 0.9445622563362122],
 'masked_input_text': ['Sir Alex Ferguson is the greatest football manager of all time.',
  'Sir Alex Ferguson is a great football manager.'],
 'masked_output_text': ['Sir Alex Ferguson is one of the greatest football managers of all time.',
  'Sir Alex Ferguson is a football manager.']}