In [2]:
# Install the Prediction Guard client
! pip install -U predictionguard
! pip install transformers --upgrade

# Python Imports
import os
import json
import re

import pandas as pd
from getpass import getpass

pg_access_token = getpass('Enter your Prediction Guard access token: ')
os.environ['PREDICTIONGUARD_TOKEN'] = pg_access_token
os.environ['PREDICTIONGUARD_URL'] = "https://intel.predictionguard.com"

import predictionguard as pg

# Transcripts
with open('transcripts.json') as f:
  transcripts = json.load(f)

# Test data
test = pd.read_csv('test.csv')

# Define a function to remove random signs from the transcript
def remove_random_signs(transcript):
  transcript = re.sub(r"[^a-zA-Z0-9\s]", "", transcript)
  return transcript

# Define a function to classify the transcript type
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-medical-dialogues")
model = AutoModelForSequenceClassification.from_pretrained("mrm8488/t5-base-finetuned-medical-dialogues")

def classify_transcript(transcript):
  inputs = tokenizer(transcript, return_tensors="pt")
  outputs = model(**inputs)
  predictions = outputs.logits.argmax(-1)
  labels = ["Doctor-Patient Conversation", "Doctor Dictation"]
  return labels[predictions[0]]

# Define a function to create a prompt for each question
def create_prompt(transcript, question, transcript_type):
  if transcript_type == "Doctor-Patient Conversation":
    prompt = f"""### Instruction:
Extract the patient's condition from the below input.

### Input:
{transcript}

### Response:
"""
  elif transcript_type == "Doctor Dictation":
    prompt = f"""### Instruction:
Extract the diagnosis and treatment from the below input.

### Input:
{transcript}

### Response:
"""
  else:
    prompt = f"""### Instruction:
Answer the question based on the below input.

### Input:
{transcript}

### Question:
{question}

### Response:
"""
  return prompt

# Define a function to use the LLM model to generate an answer
def generate_answer(transcript, question):
  # Remove random signs from the transcript
  transcript = remove_random_signs(transcript)
  # Classify the transcript type
  transcript_type = classify_transcript(transcript)
  # Create a prompt for the question
  prompt = create_prompt(transcript, question, transcript_type)
  # Use the Falcon-40B-Instruct model to generate an answer
  result = pg.Completion.create(
    model="Falcon-40B-Instruct",
    prompt=prompt
  )
  # Return the answer
  return result['text']

# Define a list to store the answers
answers = []

# Loop through the test data
for _, row in test.iterrows():
  # Get the transcript id and question from the row
  transcript_id = row['Transcript']
  question = row['Question']
  # Get the transcript text from the transcripts
  transcript = transcripts[str(transcript_id)]
  # Generate an answer using the LLM model
  answer = generate_answer(transcript, question)
  # Append the answer to the list
  answers.append([row['Id'], answer])

# Create a dataframe with the answers
df = pd.DataFrame(answers, columns=["Id", "Text"])

# Save the dataframe to a CSV file
df.to_csv("sample_submission_3.csv", index=False)


Collecting transformers
  Obtaining dependency information for transformers from https://files.pythonhosted.org/packages/9a/06/e4ec2a321e57c03b7e9345d709d554a52c33760e5015fdff0919d9459af0/transformers-4.35.0-py3-none-any.whl.metadata
  Downloading transformers-4.35.0-py3-none-any.whl.metadata (123 kB)
     ---------------------------------------- 0.0/123.1 kB ? eta -:--:--
     -------------------------------------  122.9/123.1 kB 3.6 MB/s eta 0:00:01
     -------------------------------------- 123.1/123.1 kB 2.4 MB/s eta 0:00:00
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)
  Obtaining dependency information for huggingface-hub<1.0,>=0.16.4 from https://files.pythonhosted.org/packages/ef/b5/b6107bd65fa4c96fdf00e4733e2fe5729bb9e5e09997f63074bb43d3ab28/huggingface_hub-0.18.0-py3-none-any.whl.metadata
  Downloading huggingface_hub-0.18.0-py3-none-any.whl.metadata (13 kB)
Collecting tokenizers<0.15,>=0.14 (from transformers)
  Obtaining dependency information for tokenizers<

ImportError: cannot import name 'AutoModelForSequenceClassification' from 'transformers' (C:\Users\avish\anaconda3\Lib\site-packages\transformers\__init__.py)