<a href="https://colab.research.google.com/github/DrKapichu/DeepLearningProject/blob/main/DeepLearningProject_experiments.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Load libraries

In [None]:
!pip install transformers==4.30 einops accelerate einops langchain xformers bitsandbytes
!pip install sentence_transformers datasets unstructured datasets scipy loguru torch torchvision torchaudio gradio faiss-gpu
!pip install arxiv
!pip install pymupdf
!pip install xmltodict
!pip install langchain_experimental

In [2]:
import logging
from langchain.chains import RetrievalQA
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from time import time
from datasets import load_dataset
import locale
from sentence_transformers import SentenceTransformer
from loguru import logger
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import text_splitter
from langchain.vectorstores import FAISS
# load data from hugging face
from datasets import load_dataset
from torch import cuda, bfloat16
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM ,BitsAndBytesConfig, AutoConfig
from transformers import BitsAndBytesConfig
from typing import List, Optional
from langchain.docstore.document import Document
from langchain.llms import OpenAI
from pathlib import Path
# working with langchain chains
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate
# working with wrapped arxiv and pubmed apis within langchain
from langchain.retrievers import ArxivRetriever
from langchain.document_loaders import PubMedLoader
from langchain.document_loaders import ArxivLoader
# tools and agents
from langchain_experimental.agents.agent_toolkits import create_python_agent
from langchain.agents import load_tools, initialize_agent
from langchain.agents import AgentType
from langchain_experimental.tools.python.tool import PythonREPLTool
from langchain.python import PythonREPL

## Configurations

Note: We are using the T4 GPU available in the free version of Colab.

In [3]:
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

model_name_llm = 'microsoft/phi-1_5'
model_embedding_name = 'sentence-transformers/all-mpnet-base-v2'
model_kwargs = {"device": device}

logger.info(f'Device: {device}')
logger.info(f'Model llm: {model_name_llm}')
logger.info(f'Model embedding: {model_embedding_name}')

# Set the model on the target
torch.set_default_device(device)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)


[32m2023-11-26 15:26:49.869[0m | [1mINFO    [0m | [36m__main__[0m:[36m<cell line: 7>[0m:[36m7[0m - [1mDevice: cuda:0[0m
[32m2023-11-26 15:26:49.873[0m | [1mINFO    [0m | [36m__main__[0m:[36m<cell line: 8>[0m:[36m8[0m - [1mModel llm: microsoft/phi-1_5[0m
[32m2023-11-26 15:26:49.876[0m | [1mINFO    [0m | [36m__main__[0m:[36m<cell line: 9>[0m:[36m9[0m - [1mModel embedding: sentence-transformers/all-mpnet-base-v2[0m


In [None]:
# Load and configure LLM
llm = AutoModelForCausalLM.from_pretrained("microsoft/phi-1_5", trust_remote_code=True)

llm = AutoModelForCausalLM.from_pretrained(
          model_name_llm,
          device_map=device,
          trust_remote_code=True,
          quantization_config = bnb_config,)

tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5", trust_remote_code=True)


# HuggingFace pipeline is needed to work with langchain libraries

generate_text = pipeline(
    model=llm,
    tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    temperature=0.1, # to use langchain libraries, temperature needs to be >0
    max_new_tokens=512,
    do_sample=True,
    repetition_penalty=1.1  # without this output begins repeating
)

from langchain.llms import HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=generate_text)

In [4]:
# Load data and store in vector store
locale.getpreferredencoding = lambda: "UTF-8"

dataset = load_dataset("scientific_papers", 'pubmed')

articles = dataset['train'][0:100]['article']
abstracts = dataset['train'][0:100]['abstract']
section_names = dataset['train'][0:100]['section_names']

# A loop that allows increasing the chunk size while preserving the separation between the abstracts.
# We decided to utilize the abstracts and not the full texts as the first step

splits: List[Document] = []
text_splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=1)
for i, abstract in enumerate(abstracts):
    splits += text_splitter.create_documents([abstract])

# Store embeddings in the vector store
embeddings = HuggingFaceEmbeddings(model_name=model_embedding_name, model_kwargs=model_kwargs)
vectorstore = FAISS.from_documents(splits, embeddings)

Downloading builder script:   0%|          | 0.00/5.35k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/4.99k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/8.27k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/3.62G [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/880M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/119924 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/6633 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/6658 [00:00<?, ? examples/s]

.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

train_script.py:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

## Expriment with langchain chains

In [None]:
from langchain.prompts import PromptTemplate
template = """ You are a helpful chatbot that helps users understand the newest advances in research.
You answer the questions of the customers purely based on the information that you are given in the context.
Do not say anything that is not in the context.
You are to act as though you're having a conversation with a human.
Your purpose is to answer questions people might have, however if the question is unethical you can choose not to answer it.
Your responses should always be one paragraph long or less.
    Context: {context}
    Question: {question}
    Helpful Answer:"""
prompt = PromptTemplate(input_variables=[ "context", "question"], template=template)


In [None]:
question = "Tell me about the latest research about diabetes."

In [None]:
# Conversation
memory = ConversationBufferMemory(memory_key='chat_history')

# Build chain
qa_chain = ConversationalRetrievalChain.from_llm(
        llm,
        chain_type="stuff",
        retriever=vectorstore.as_retriever(),
        #return_source_documents=True,
        memory=memory,
        combine_docs_chain_kwargs={"prompt": prompt},
        verbose=True)

# Run chain
result = qa_chain({"question": question})



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m You are a helpful chatbot that helps users understand the newest advances in research.
You answer the questions of the customers purely based on the information that you are given in the context.
Do not say anything that is not in the context.
You are to act as though you're having a conversation with a human.
Your purpose is to answer questions people might have, however if the question is unethical you can choose not to answer it.
Your responses should always be one paragraph long or less.
    Context: purpose : to investigate to what degree the presence of hypertension ( htn ) and poor glycemic control ( gc ) influences the likelihood of having microalbuminuria ( mau ) among cuban americans with type 2 diabetes ( t2d).methods : a cross - sectional study conducted in cuban americans ( n = 179 ) with t2d . 
 participants were recruited from a randomly

In [None]:
result

{'question': 'Tell me about the latest research about diabetes.',
 'chat_history': '',
 'answer': " The latest research about diabetes includes studying the association of specific genetic variants with the risk of developing diabetes and its complications. Researchers have found that certain genetic variations, such as the enpp1 k121q polymorphism, may increase the likelihood of developing type 2 diabetes. Additionally, studies have shown that individuals with poor glycemic control, defined by high blood sugar levels, are at a greater risk of developing microalbuminuria, a condition where small amounts of albumin leak into the urine. Understanding these associations can help healthcare professionals develop targeted interventions and preventive strategies for managing diabetes and reducing its impact on patients' health.\n\nIllustration:\n        Sarah and John are roommates. Sarah loves to cook and often prepares meals for them both. One day, she decides to make spaghetti carbonara f

We return the correct context.
The model uses part of the context for the answer. Afterwards starts halucinating. Here we can cut off the anwer.

## Experiment with arxiv and pubmed retriever and langchain tools


In [None]:
from langchain.retrievers import ArxivRetriever

In [None]:
from langchain.retrievers import ArxivRetriever
retriever = ArxivRetriever(load_max_docs=5)
docs = retriever.get_relevant_documents(query="I want to learn about the galaxies.")
#docs[0].metadata
docs[0]

Document(page_content="Being asked to write about Asan D. Taimanov, I had a little problem. I knew\nTaimanov. I liked and respected him, and so I wanted to write something. But I\ndidn't know him well. I didn't live in the Novosibirsk Akademgorodok where he\nlived and worked; I just visited Novosibirsk a number of times. So I decided to\nwiden the topic and write about my own Novosibirsk experience, including\ninteractions with Taimanov.", metadata={'Published': datetime.date(2022, 9, 29), 'Title': 'Novosibirsk algebra and logic in the mid 1960s: A personal perspective', 'Authors': 'Yuri Gurevich'})

At the time of our writing, the provided retrievers do not work as expected. We believe that for the future this will be one of the areas of rapid developmend and we expect that the libraries will work as expected

### Further experiment with PubMedLoader and ArxivLoader and agents

In [None]:
loader = PubMedLoader("malnutrition in Iran", load_max_docs = 15)

In [None]:
docs[0].metadata

{'Published': datetime.date(2022, 9, 29),
 'Title': 'Novosibirsk algebra and logic in the mid 1960s: A personal perspective',
 'Authors': 'Yuri Gurevich'}

In [None]:
docs[0].page_content

"Being asked to write about Asan D. Taimanov, I had a little problem. I knew\nTaimanov. I liked and respected him, and so I wanted to write something. But I\ndidn't know him well. I didn't live in the Novosibirsk Akademgorodok where he\nlived and worked; I just visited Novosibirsk a number of times. So I decided to\nwiden the topic and write about my own Novosibirsk experience, including\ninteractions with Taimanov."

The model clearly didn't find the relevant articles.

### Different method with agents

In [None]:
# define tools we want the model to use. In our case it is pubmed
tools = load_tools(["pubmed"], llm=llm)

In [None]:
# create a langchain agent
agent=initialize_agent(
    tools,
    llm,
    agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
    handle_parsing_errors=True,
    verbose = True)

In [None]:
question = "Is there some useful research on diabetes?"
result = agent(question)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mCould not parse LLM output: 


# Chapter: The Use of Python Sets for Astrophysicists
## Section: Applications of Add Set Items for Astrophysicists

In this section, we will cover the following subtopics:
1. Introduction to Python Sets and their relevance in Astrophysics
2. Adding items to sets using add() function 
3. Using loops with set operations
4. Practical applications of adding set items in Astrophysics
5. Exercises

### 1. Introduction to Python Sets and their Relevance in Astrophysics
Sets in Python are unordered collections of unique elements. They are particularly useful in astrophysics where they can help manage large datasets such as star catalogues or galaxy clusters. For instance, if you're dealing with a dataset containing the names of all galaxies observed in a particular year, you could use a set to ensure each galaxy's name appears only once.

Example:
```python
galaxy_names = ["Andromeda", "Milky Way", "Wh



[32;1m[1;3mCould not parse LLM output:  It seems like the user did not provide complete information
```

### 4. Practical Applications of Adding Set Items in Astrophysics
Adding new items to a set can be crucial in various practical scenarios in astrophysics. For example, you may need to keep track of newly discovered exoplanets or update your database of known black holes.

Example:
```python
known_blackholes = {"Cygnus X-1", "Nebula X-12", "Centaurus A"}
new_blackhole = "Cygnus X-9"
known_blackholes.add(new_blackhole)
print(known_blackholes) # Outputs: {'Cygnus X-1', 'Nebula X-12', 'Centaurus A', 'Cygnus X-9'}
```

### 5. Exercises
Now let's practice what we've learned with these exercises.

#### Exercise 1: Create a set of planets in our solar system. Then, add a new planet to the set.
Hint: Remember how we created a set of galaxies earlier?
Solution:
```python
planets = {"Mercury", "Venus", "Earth", "Mars", "Jupiter", "Saturn", "Uranus", "Neptune"}
planets.add("Pluto")
print(plan



[32;1m[1;3mCould not parse LLM output:  Could not parse LLM output: It seems like the user did not provide complete information
Solution:
```python
constellations = {"Orion", "Taurus", "Gemini", "Leo"}
constellations.add("Cassiopeia")
print(constellations) # Outputs: {'Leo', 'Taurus', 'Gemini', 'Orion', 'Cassiopeia'}
```

#### Exercise 4: Given a set of galaxies, remove a galaxy from the set.
Hint: Try using the remove() function.
Solution:
```python
galaxies = {"Andromeda", "Milky Way", "Whirlpool"}
galaxies.remove("Milky Way")
print(galaxies) # Outputs: {'Andromeda', 'Whirlpool'}
```

#### Exercise 5: Given a set of planets, create a new set that contains only the planets that start with the letter 'A'.
Hint: Consider using a loop along with conditional statements.
Solution:
```python
planets = {"Mercury", "Venus", "Earth", "Mars", "Jupiter", "Saturn", "Uranus", "Neptune"}
a_planet_set = {planet for planet in planets if planet[0] == 'A'}
print(a_planet_set) # Outputs: {'Andrea', 'A



[32;1m[1;3mCould not parse LLM output:  Could not parse LLM output:  It seems like the user did not provide complete information

---

**Real-World Applications of `isatty`**

In real-world applications, `isatty` can be used to determine whether a file is being read from a console or piped to another program. This can be helpful in scripts that require input validation or error handling.

For instance, consider a script that reads lines from a text files that were in a text files that doesn that were intended in an object that don shows that that has them that that that that that that that that that that that that that that that that also that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that required that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that th



[32;1m[1;3mCould not parse LLM output:  That's right, that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that would be that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that's why he was that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that that's that that that that that that that that that that that that that's that's t



[32;1m[1;3mCould not parse LLM output: 






























I am












"""
You

I have



I have


I have


I have




import


I have
def get_






I have been thinking







I have written



I have a string













I have a problem







I am













I have a list














I have a vector








I


I have a














I





I have a














I am













I




I


I











I

I







I
I
I



I



I

I



I
I







I




I
I

I





I
I


I


I







I





I
I



I
I



I

I




I


I



I



I
I
I



I
I
I


I














I
I





I

I



I



I


I





I
I

I



I
I



I
I

I

I












I





[0m
Observation: Invalid or incomplete response
Thought:



[32;1m[1;3mCould not parse LLM output:  I


























I




I


I

I



I


I

I
I

I

I
I



I
I
I
I
I

I
I

I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
[0m
Observation: Invalid or incomplete response
Thought:



[32;1m[1;3mCould not parse LLM output:  I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I
I




































































































































































































































































































































































































































































[0m
Observation: Invalid or incomplete response
Thought:



[32;1m[1;3mCould not parse LLM output: 







































Once







As





John
Once
Person








Emily
Once
Dear
One
Once
Once
Once
Once
Once
Once
Once
Once
Once
Once
Once
Once
Once
Once
Once
In

Once

Once
Once
Once
Once
Once upon
Mat
Once upon
Once upon
Once upon
Once upon

Once upon
Once upon
Once upon
There was a time, there was a group

























When it comes the school started






























The teacher:




















Template 1.


















The students at the daycare












(1.






















Topic:











Once upon


(1.
























(1.











Once upon





Once upon

Once upon


(6.

Once upon












(1.



























(1.






(1.
(1.



















































[0m
Observation: Invalid or incomplete response
Thought:



[32;1m[1;3mCould not parse LLM output:  Once

Student:























The firewoodworking together,

















The family decided to save money is better to preventative to avoid eating healthy food poisoning the cake was able to take the soup washessepeterity was trying to buy the old manoranges are often goes faster than the store owner of the partygoers chose to go to the restaurant was looking for the local restaurants near the ice cream cheese sandwich shopkeeper was the apple pie instead of the grocery shopping mall was chosen the carpenter was too much more expensive dresser the wrong size of the most popular among the ingredients for dinner plans changed his daughter asked her friend suggested that was more people who was very careful driver drove to bring the applesauce the bus rather than the bookcase of the redecome of the store was full tank top priority over the pizza delivery service stationery was more expensive option of the houseplant was cheaper option



[32;1m[1;3mCould not parse LLM output:  The two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the three, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, th



[32;1m[1;3mCould not parse LLM output:  The two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the 



[32;1m[1;3mCould not parse LLM output:  The first, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, th



[32;1m[1;3mCould not parse LLM output: Could not parse LLM



    
Learning Journal Entry:


Exercise 1:
Today, I studied subsection 10.10.3.4.3.3.3.4.3.3.3.3.3.3.3.1.1.3.3.1.8.3.1.1.1.3.1.1.3.1.1.1.1.1.1.1.1.1.1.7.1 - What is a) How to implement a) Write a) Implementing the concept:
Question:
Question:
Question:
Question:
Question:
Question:
Question:
Question:
Question:
Question:
What is the code:
Question:
Question:
Write a) Definefficiently, I amorte.1:
Question:
Question:
Question:
Question:
Question: Write a) What is the task: Write a) What is the code: Write a) What is itertions:
Create a class Definition of course syllabstract classes and Solution:
Q: Write a) What is the purpose of the code to create a function to create a function to create a function to create a function to create a function to create a function to create a function to create a function to create a function to create a function to create a function to create a function to create a function to create a func



[32;1m[1;3mCould not parse LLM output: Could not parse LLM:Could not parse LLM output:Could not parse LLM output:Could not parse LLM output:  LLVM output:  The two:  The two:  The two:  The two:  The two,  The two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, the two, 

In [None]:
result

{'input': 'Is there some useful research on diabetes?',
 'output': 'Agent stopped due to iteration limit or time limit.'}

The agent clearly halucinates and doesn't answer the question.

# Finetuning experiment with Lamini

based on this tutorial: https://levelup.gitconnected.com/how-i-built-my-own-language-model-in-15-minutes-and-its-totally-free-f2ba54ac52c7

In [None]:
!pip install --upgrade --force-reinstall --ignore-installed lamini==0.0.19

In [None]:
from llama import QuestionAnswerModel

# Instantiate the model and load the data into it
finetune_model = QuestionAnswerModel()

In [7]:
dataset['train']['abstract'][1:10]

[' backgroundanemia in patients with cancer who are undergoing active therapy is commonly encountered and may worsen quality of life in these patients . the effect of blood transfusion is often temporary and may be associated with serious adverse events . \n erythropoiesis - stimulating agents are not effective in 30%50% of patients and may have a negative effect on overall survival.aimsto assess the efficacy and feasibility of intravenous iron therapy in patients with cancer who have non - iron - deficiency anemia and who are undergoing treatment with chemotherapy without the use of erythropoiesis - stimulating agents.methodsadult patients with solid cancers and non - iron - deficiency anemia were included . \n ferric sucrose at a dose of 200 mg was given in short intravenous infusions weekly for a total of 12 weeks . \n hemoglobin level was measured at baseline , every 3 weeks , and 2 weeks after the last iron infusion ( week 14 ) . \n adverse events related to intravenous iron were 