In [1]:
from transformers import pipeline
import random
classifier = pipeline("zero-shot-classification",
                      model="facebook/bart-large-mnli")

In [2]:
import pandas as pd
PATH_data_export = "/Users/huiziyu/Library/CloudStorage/GoogleDrive-huiziy@g.ucla.edu/My Drive/Project - LLM Bib in Biomedical & Health/data/processed"
data = pd.read_csv(f'{PATH_data_export}/relevant_data_gpt.csv')

data.head()
docs = data["Title_abstract"]
# Remove Theme Track, NLP Application 
zeroshot_topic_list = [
    "Commonsense Reasoning",
    "Computational Social Science and Cultural Analytics",
    "Dialogue and Interactive Systems",
    "Ethics in NLP",
    "Human-Centered NLP",
    "Information Extraction",
    "Information Retrieval and Text Mining",
    "Interpretability, Interactivity and Analysis of Models for NLP",
    "Language Modeling and Analysis of Language Models",
    "Cognitive Modeling and Psycholinguistics",
    "Multilinguality and Linguistic Diversity",
    "Natural Language Generation",
    "Question Answering",
    "Resources and Evaluation",
    "Semantics: Lexical, Sentence level, Document Level, Textual Inference, etc.",
    "Sentiment Analysis, Stylistic Analysis, and Argument Mining",
    "Image, Vision, Video and Multimodality",
    "Summarization",
]


In [3]:
sequence_to_classify = docs[1]
candidate_labels = zeroshot_topic_list
classifier(sequence_to_classify, candidate_labels)

{'sequence': 'How Does ChatGPT Perform on the United States Medical Licensing Examination? The Implications of Large Language Models for Medical Education and Knowledge Assessment. Background Chat Generative Pre-trained Transformer (ChatGPT) is a 175-billion-parameter natural language processing model that can generate conversation-style responses to user input. Objective This study aimed to evaluate the performance of ChatGPT on questions within the scope of the United States Medical Licensing Examination Step 1 and Step 2 exams, as well as to analyze responses for user interpretability. Methods We used 2 sets of multiple-choice questions to evaluate ChatGPT’s performance, each with questions pertaining to Step 1 and Step 2. The first set was derived from AMBOSS, a commonly used question bank for medical students, which also provides statistics on question difficulty and the performance on an exam relative to the user base. The second set was the National Board of Medical Examiners (N

In [15]:
# Tune the batch_size to fit in the memory
batch_size = 4 
n = 50
# Put reviews in a list
sequences = random.sample(docs.tolist(), n)

# Define the candidate labels 
candidate_labels = zeroshot_topic_list

# Set the hyppothesis template
hypothesis_template = "The topic of this paper is {}."

# Create an empty list to save the prediciton results
single_topic_prediction = []

# Loop through the batches
for i in range(0, len(sequences), batch_size):
    # Append the results 
    single_topic_prediction += classifier(sequences[i:i+batch_size], candidate_labels, hypothesis_template=hypothesis_template)

KeyboardInterrupt: 

In [5]:
import pandas as pd

# Assuming 'predictions' is the list of dictionaries you have as output
data = []

for prediction in single_topic_prediction:
    # Extract the sequence, labels, and scores
    sequence = prediction['sequence']
    labels = prediction['labels']
    scores = prediction['scores']

    # Pairing labels with their scores and sorting them
    label_score_pairs = sorted(zip(labels, scores), key=lambda x: x[1], reverse=True)

    # Taking the top 3 labels and their scores
    top_3 = label_score_pairs[:3]

    # Creating a row for the DataFrame
    row = (
        sequence,  # Original text
        top_3[0][0], top_3[0][1],  # Topic 1 and its score
        top_3[1][0], top_3[1][1],  # Topic 2 and its score
        top_3[2][0], top_3[2][1]   # Topic 3 and its score
    )
    data.append(row)

# Creating the DataFrame
columns = ['sequence', 'topic_1', 'score_1', 'topic_2', 'score_2', 'topic_3', 'score_3']
df = pd.DataFrame(data, columns=columns)

# df now contains your formatted data with the original texts

## Set Similarity threshold to be 0.1 
## each paper could have multiple labels if exceed 0.1
## Use example to demonstrate multi label




In [6]:
df

Unnamed: 0,sequence,topic_1,score_1,topic_2,score_2,topic_3,score_3
0,Generative Pre‐Trained Transformer 4 in health...,Ethics in NLP,0.085755,Summarization,0.077525,Resources and Evaluation,0.074122
1,The inevitable transformation of medicine and ...,Question Answering,0.081202,Discourse and Pragmatics,0.080593,Summarization,0.077747
2,Qilin-Med-VL: Towards Chinese Large Vision-Lan...,Question Answering,0.138336,"Image, Vision, Video and Multimodality",0.113636,Information Extraction,0.094945
3,The application of Chat Generative Pre-trained...,Resources and Evaluation,0.123443,Information Extraction,0.101501,Dialogue and Interactive Systems,0.098464
4,Conversational Chatbot Builder – Smarter Virtu...,Dialogue and Interactive Systems,0.117775,Resources and Evaluation,0.116473,Information Extraction,0.109784
5,Generation of Radiology Findings in Chest X-Ra...,Information Extraction,0.129765,Resources and Evaluation,0.090207,Natural Language Generation,0.087764
6,Implementing Natural Language Generation throu...,Natural Language Generation,0.912011,Resources and Evaluation,0.010654,Question Answering,0.010149
7,How GPT-3 responds to different publics on cli...,Dialogue and Interactive Systems,0.149789,Discourse and Pragmatics,0.122365,Commonsense Reasoning,0.120076
8,Harnessing the Open Access Version of ChatGPT ...,Resources and Evaluation,0.134291,Language Modeling and Analysis of Language Models,0.092064,Discourse and Pragmatics,0.087228
9,ChatGPT-Enabled daVinci Surgical Robot Prototy...,Dialogue and Interactive Systems,0.094912,Question Answering,0.094684,Resources and Evaluation,0.084354


In [13]:
df["sequence"][9]

'ChatGPT-Enabled daVinci Surgical Robot Prototype: Advancements and Limitations. The daVinci Surgical Robot has revolutionized minimally invasive surgery by enabling greater accuracy and less invasive procedures. However, the system lacks advanced features and autonomy necessary for it to function as a true partner. To enhance its usability, we introduce the implementation of a ChatGPT-based natural language robot interface. Overall, our integration of a ChatGPT-enabled daVinci Surgical Robot has potential to expand the utility of the surgical platform by supplying a more accessible interface. Our system can listen to the operator speak and, through the ChatGPT-enabled interface, translate the sentence and context to execute specific commands to alter the robot’s behavior or to activate certain features. For instance, the surgeon could say (even in Spanish) “please track my left tool” and the system will translate the sentence into a specific track command. This specific error-checked 

In [68]:
sequences[4]

"Editors' statement on the responsible use of generative artificial intelligence technologies in scholarly journal publishing. The new generative artificial intelligence (AI) tools, and especially the large language models (LLMs) of which ChatGPT is the most prominent example, have the potential to transform many aspects of scholarly publishing. How the transformations will play out remains to be seen, both because the different parties involved in the production and publication of scholarly work are still learning about these tools and because the tools themselves are still in development, but the tools have a vast range of potential uses. Authors are likely to use generative AI to conduct research, frame their thoughts, produce data, search for ways of articulating their thoughts, develop drafts, generate text, revise their writing, and create visuals. Peer reviewers might use AI to help them produce their reviews. Editors might use AI in the initial editorial screening of manuscript