In [1]:
import pandas as pd
import numpy as np
from tqdm.auto import tqdm

import torch
import torchtext
from torch import nn
from torchtext.data import Field, LabelField
from torchtext.data import BucketIterator
from torchtext.datasets import IMDB
from transformers import BertTokenizer
from transformers import BertModel
from skorch import NeuralNetClassifier
from skorch.callbacks import Freezer
from skorch.callbacks import ProgressBar
import datasets
import nlp
from datasets import Dataset
import transformers
from transformers import AutoTokenizer, AutoModel, Trainer, TrainingArguments, AutoModelForSequenceClassification, pipeline
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

In [2]:
from util import *

In [6]:
candidate_labels = sorted(list(tier_translations[c] for c in tier1 if c in tier_translations))

In [7]:
candidate_labels

['Analysis and Modeling',
 'Anatomical Target',
 'Imaging',
 'Manufacturing',
 'Personalized Product',
 'Specification of Use',
 'Surgical Method']

In [8]:
training_set = pd.read_json("training_set.json.gz", lines=True, orient="records")
testing_set = pd.read_json("testing_set.json.gz", lines=True, orient="records")

In [9]:
training_set['labels'] = training_set[tier1].apply(array_labels,axis=1)
testing_set['labels'] = testing_set[tier1].apply(array_labels,axis=1)

In [10]:
training_set.labels

0      [Analysis and Modeling, Anatomical Target, Ima...
1                                 [Personalized Product]
2      [Analysis and Modeling, Anatomical Target, Man...
3      [Analysis and Modeling, Anatomical Target, Ima...
4              [Anatomical Target, Personalized Product]
                             ...                        
967    [Analysis and Modeling, Anatomical Target, Ima...
968       [Imaging, Manufacturing, Personalized Product]
969       [Imaging, Manufacturing, Personalized Product]
970    [Anatomical Target, Imaging, Personalized Prod...
971    [Anatomical Target, Imaging, Manufacturing, Pe...
Name: labels, Length: 972, dtype: object

In [11]:
classifier = pipeline("zero-shot-classification",
                      model="facebook/bart-large-mnli")

Some weights of the model checkpoint at facebook/bart-large-mnli were not used when initializing BartModel: ['model.encoder.version', 'model.decoder.version']
- This IS expected if you are initializing BartModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at facebook/bart-large-mnli were not used when initializing BartForSequenceClassification: ['model.encoder.version', 'model.decoder.version']
- This IS expected if you are initializing BartForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification m

In [12]:
training_set.labels[2]

['Analysis and Modeling',
 'Anatomical Target',
 'Manufacturing',
 'Personalized Product',
 'Specification of Use']

In [13]:
training_set.claims[2]

'What is claimed is: \n     \n       1. A femoral implant alignment guide for implanting a femoral component in a particular patient, comprising:\n a body configured to be placed on a distal end of a femur and be aligned on an axis from between the particular patient&#39;s femoral condyles through the particular patient&#39;s hip center, the body including:\n an elongated resection slot with a major axis substantially perpendicular to the axis through the particular patient&#39;s hip center when the body is placed on the distal end of the femur and aligned on the axis through the particular patient&#39;s hip center; \n a medial portion configured to contact a medial condyle of the femur when the body is placed on the distal end of the femur and aligned on the axis through the particular patient&#39;s hip center, the medial portion comprising a medial tab configured to engage the medial condyle; and \n a lateral portion configured to contact a medial condyle of the femur when the body i

In [14]:
classifier(training_set.claims[2], candidate_labels)

{'sequence': 'What is claimed is: \n     \n       1. A femoral implant alignment guide for implanting a femoral component in a particular patient, comprising:\n a body configured to be placed on a distal end of a femur and be aligned on an axis from between the particular patient&#39;s femoral condyles through the particular patient&#39;s hip center, the body including:\n an elongated resection slot with a major axis substantially perpendicular to the axis through the particular patient&#39;s hip center when the body is placed on the distal end of the femur and aligned on the axis through the particular patient&#39;s hip center; \n a medial portion configured to contact a medial condyle of the femur when the body is placed on the distal end of the femur and aligned on the axis through the particular patient&#39;s hip center, the medial portion comprising a medial tab configured to engage the medial condyle; and \n a lateral portion configured to contact a medial condyle of the femur wh