In [17]:

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, BitsAndBytesConfig
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
from datasets import load_dataset, Dataset,DatasetDict
import os 
import transformers


In [None]:
tokenizer = AutoTokenizer.from_pretrained("/tsukimi/llm/ft/checkpoint-1844", use_fast=False)   

model =  AutoModelForCausalLM.from_pretrained("/tsukimi/llm/ft/checkpoint-1844",quantization_config=BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_quant_type="nf4",
    ),device_map="auto",torch_dtype=torch.bfloat16,)

In [57]:

pipeline = transformers.pipeline('text-generation', model=model, tokenizer=tokenizer, return_full_text=False)

In [70]:
test_data_path = '/workdir/test_set.csv'
dataset = load_dataset("csv", data_files=test_data_path)['train']


all_test_data = []
for test_data in dataset:
    all_test_data.append(f"Question:\n{test_data['question']} \n\nAnswer:\n")
test_dataset= Dataset.from_dict({"text": all_test_data})


In [71]:
test_dataset

Dataset({
    features: ['text'],
    num_rows: 2479
})

In [81]:
output = pipeline(test_dataset['text'],max_length=512, num_return_sequences=1,truncation=True)


In [86]:

all_generated_text = []
for i in range(len(output)):
    all_generated_text.append(output[i][0]['generated_text'])
i=0
f = open("/workdir/ft.csv", "w")
for text in all_generated_text:
    f.write(f'{i},"{text}"\n')
    i+=1
f.close()

In [82]:
output

[[{'generated_text': "Polycystic ovary syndrome (PCOS) is a condition that affects a woman's hormone levels. Women with PCOS develop cysts on their ovaries. These cysts are small fluidfilled sacs that form on the outside of the ovary. They are different from the normal follicles that form inside the ovary each month during the cycle. The normal follicles release an egg each month as part of the woman's menstrual cycle. The cysts associated with PCOS do not release eggs. Instead, they continue to grow larger. The cysts can interfere with normal egg release and may cause the woman to stop having menstrual periods. Women with PCOS may have irregular or no menstrual periods. Other signs and symptoms of PCOS include: Balding or excess facial hair Weight gain or obesity High blood sugar High blood cholesterol High blood pressure Infertility PCOS can be diagnosed in women who are 15 to 44 years old and have at least two of the following three symptoms:  irregular menstrual periods  high level

In [84]:
len(output)

2479

In [88]:
import pandas as pd
df = pd.read_csv('/workdir/MedQA/data/test_set.csv')

In [114]:
df['qlora-1844']=all_generated_text

In [112]:
for i in range(len(output)):
    df.loc[i, 'qlora-1844'] = df.loc[i, 'qlora-1844'][1:-1]

In [115]:
df

Unnamed: 0,question,answer,AnswerID,url,qlora-1844
0,What is (are) Polycystic ovary syndrome ? (Als...,Polycystic ovary syndrome is a condition in wh...,ADAM_0003147_Sec1.txt,https://www.nlm.nih.gov/medlineplus/ency/artic...,Polycystic ovary syndrome (PCOS) is a conditio...
1,What causes Polycystic ovary syndrome ? (Also ...,PCOS is linked to changes in hormone levels th...,ADAM_0003147_Sec2.txt,https://www.nlm.nih.gov/medlineplus/ency/artic...,The cause of polycystic ovary syndrome (PCOS) ...
2,What causes Noonan syndrome ?,Noonan syndrome is linked to defects in severa...,ADAM_0002818_Sec2.txt,https://www.nlm.nih.gov/medlineplus/ency/artic...,What causes Noonan syndrome? Noonan syndrome i...
3,What are the complications of Noonan syndrome ?,- Buildup of fluid in tissues of body (lymphed...,ADAM_0002818_Sec7.txt,https://www.nlm.nih.gov/medlineplus/ency/artic...,People with Noonan syndrome have an increased ...
4,How to prevent Noonan syndrome ?,Couples with a family history of Noonan syndro...,ADAM_0002818_Sec9.txt,https://www.nlm.nih.gov/medlineplus/ency/artic...,How is Noonan syndrome inherited? Noonan syndr...
...,...,...,...,...,...
2474,What should I do if I forget a dose of Glimepi...,"Before you start to take glimepiride, ask you ...",MPlusDrugs_0000553_Sec5.txt,https://www.nlm.nih.gov/medlineplus/druginfo/m...,"If you forget to take glimepiride, take it as ..."
2475,What are the side effects or risks of Glimepir...,This medication may cause changes in your bloo...,MPlusDrugs_0000553_Sec6.txt,https://www.nlm.nih.gov/medlineplus/druginfo/m...,Some side effects of glimepiride may occur tha...
2476,What to do in case of emergency or overdose of...,"In case of overdose, call your local poison co...",MPlusDrugs_0000553_Sec8.txt,https://www.nlm.nih.gov/medlineplus/druginfo/m...,"If you experience an overdose, call your healt..."
2477,What other information should I know about Gli...,Keep all appointments with your doctor and the...,MPlusDrugs_0000553_Sec9.txt,https://www.nlm.nih.gov/medlineplus/druginfo/m...,Some glimepiride tablets may have an imprinted...


In [118]:
df.to_csv('/workdir/MedQA/data/test_set.csv',index=False)

In [121]:
f = open("/workdir/llama3.csv", "r")
lines = f.readlines()
# if ends with \n, that means the end of the row
buffer = ""
all_lines = []
for line in lines:
    
    if buffer=="" and (line.startswith("\"\n") or not line.startswith("\"")):
        
        all_lines[-1] += line
    elif line.endswith("\"\n"):
        buffer += line
        all_lines.append(buffer)
        buffer = ""

    else:
        buffer += line
all_lines

['"Polycystic ovary syndrome (PCOS) is a hormonal disorder common among women of reproductive age. Women with PCOS may have infrequent or prolonged menstrual periods or excess male hormone (androgen) levels. The ovaries may develop numerous small collections of fluid (follicles) and fail to regularly release eggs. \nWomen with PCOS can have a difficult time getting pregnant. They are also at increased risk of diabetes and metabolic syndrome, a cluster of conditions that includes high blood pressure, high blood sugar, and high cholesterol levels. \nPCOS is a common health problem that affects 1 in 10 women of reproductive age. It can affect women in their teens and early 20s through their 30s and early 40s."\n',
 '"Polycystic ovary syndrome (PCOS) is a common health problem related to the female reproductive system. It is caused by an imbalance of hormones. This imbalance can cause a woman\'s body to make extra male hormones. These hormones are called androgens. Women with PCOS have hig

In [110]:
len(all_lines)

2479

In [117]:
df["llama3"] = all_lines

In [119]:
for i, line in enumerate(all_lines):
    all_lines[i] = line.replace("\n","")[1:-1]
