# llama_cpp

> Basic Experiments with Ollama using LangChain models, chains, and agents.    

In [None]:
import sqlalchemy as db
from sqlalchemy.engine import create_engine
import json

from langchain.llms.ollama import Ollama
import os
import pandas as pd
from pprint import pprint
import textwrap

# Image Tech Text and Extractions

In [None]:
engine = create_engine(
    'databricks://token:'+os.environ['DB_TOKEN']+ 
    '@czi-shared-infra-czi-sci-general-prod-databricks.cloud.databricks.com'+
    '?http_path=/sql/1.0/warehouses/1c4df94f2f1a6305')

sql = '''
SELECT * FROM gburns.nature_methods.papers WHERE LEN(ABSTRACT) > 0
'''
with engine.connect() as con:
    stmt = db.text(sql)
    rs = con.execute(stmt)
    df = pd.DataFrame(rs.fetchall(), columns=rs.keys())
df

#metadata = db.MetaData()
#metadata.reflect(bind=engine)
#tbls = metadata.tables
#print(json.dumps([str(t) for t in metadata.tables], indent=4))

#logs = Table("corpus", MetaData(bind=engine), autoload=True)
sql = '''
SELECT * FROM gburns.nature_methods.papers WHERE LEN(ABSTRACT) > 0

'''
with engine.connect() as con:
    stmt = db.text(sql)
    rs = con.execute(stmt)
    df = pd.DataFrame(rs.fetchall(), columns=rs.keys())
df
#print(select([func.count("*")], from_obj=logs).scalar())

In [None]:
sql = '''
SELECT p.* 
FROM gburns.imaging_tech.corpus_papers as cp
    JOIN gburns.imaging_tech.papers as p on (p.ID_PAPER = cp.ID_PAPER)
WHERE LEN(p.ABSTRACT)>0 AND p.YEAR>=2022
'''
with engine.connect() as con:
    stmt = db.text(sql)
    rs = con.execute(stmt)
    df_2022 = pd.DataFrame(rs.fetchall(), columns=rs.keys())

df_2022


Unnamed: 0,ID_PAPER,DOI,YEAR,ABSTRACT,TITLE,MESH_DESCRIPTORS
0,34811556,10.1038/s41592-021-01316-y,2022,Tissues and organs are composed of distinct ce...,Spatial mapping of protein composition and tis...,Antibodies | Diagnostic Imaging | Cell Communi...
1,34824477,10.1038/s41592-021-01308-y,2022,Highly multiplexed tissue imaging makes detail...,"MCMICRO: a scalable, modular image-processing ...",Humans | Neoplasms | Diagnostic Imaging | Imag...
2,34887550,10.1038/s41592-021-01335-9,2022,Mako is a software tool that converts microbio...,Fast and flexible analysis of linked microbiom...,Animals | Computational Biology | Computer Gra...
3,34887551,10.1038/s41592-021-01334-w,2022,Recent whole-brain mapping projects are collec...,Cross-modal coherent registration of whole mou...,"Brain | Animals | Mice, Inbred C57BL | Imaging..."
4,34916672,10.1038/s41592-021-01341-x,2022,Self-labeling protein tags such as HaloTag are...,Engineered HaloTag variants for fluorescence l...,Cell Line | Humans | Rhodamines | Hydrolases |...
...,...,...,...,...,...,...
6026,PPR587527,10.1101/2022.12.19.519427,2022,Cohort studies increasingly collect biosamples...,Omada: Robust clustering of transcriptomes thr...,
6027,PPR587989,10.1101/2022.12.21.520923,2022,"The two clathrin isoforms, CHC17 and CHC22, ge...",CHC22 clathrin membrane recruitment uses SNX5 ...,
6028,PPR588013,10.1101/2022.12.21.521417,2022,With the recent growth in spectral flow cytome...,Back to the Future- Unleashing your cytometer’...,
6029,PPR588026,10.1101/2022.12.21.521384,2022,The shape of cells is the outcome of the balan...,Friction patterns guide actin network contraction,


# Starting analysis with Langchain

In [None]:
from langchain.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

template = """Question: {question}

Answer: Let's work this out in a step by step way to be sure we have the right answer."""

prompt = PromptTemplate(template=template, input_variables=["question"])

# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

n_gpu_layers = 1  # Change this value based on your model and your GPU VRAM pool.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.

# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path="/Users/gburns/Documents/Coding/ChatGPT_etc/LLMs/llama-2-70b-chat.Q5_K_M.gguf",
    n_ctx=4096,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    callback_manager=callback_manager,
    verbose=True, # Verbose is required to pass to the callback manager
)

llama_model_loader: loaded meta data with 19 key-value pairs and 723 tensors from /Users/gburns/Documents/Coding/ChatGPT_etc/LLMs/llama-2-70b-chat.Q5_K_M.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q5_K     [  8192, 32000,     1,     1 ]
llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  8192,     1,     1,     1 ]
llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q6_K     [ 28672,  8192,     1,     1 ]
llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q5_K     [  8192, 28672,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q5_K     [  8192, 28672,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_norm.weight f32      [  8192,     1,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.attn_k.weight q5_K     [  8192,  1024,     1,     1 ]
llama_model_loader: - tensor    7:         blk.0.attn_outp

In [None]:
from __future__ import annotations

from abc import abstractmethod
from typing import List
import re

from langchain.schema import BaseOutputParser, OutputParserException

import json
from json import JSONDecodeError
from typing import Any, List

class JsonEnclosedByTextOutputParser(BaseOutputParser[Any]):
    """Parse the output of an LLM call to a JSON object."""

    def parse(self, text: str) -> Any:
        text = text.strip()
        m = re.search('.*([\[\{](.|\n)*[\}\]]).*', text, flags=re.M)
        if m:
            text1 = m.group(1)
            try:
                return json.loads(text1)
            except JSONDecodeError as e:
                return None
                #raise OutputParserException(f"Invalid json output: {text1} derived from {text}") from e
        else: 
            raise OutputParserException(f"Could not find json-formatted data in: {text}")

    @property
    def _type(self) -> str:
        return "json_enclosed_by_text_output_parser"
    

In [None]:
from langchain import PromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser

output_parser = JsonEnclosedByTextOutputParser()

In [None]:
t = """Please identify any imaging techniques or imaging technologies described in the abstract as a list of short noun phrases. 

The article text is delimited with triple backticks. 
Format your response as a JSON list. 
If the information isn't present, use "none" as the value.
Make your response as short as possible while still being accurate.
Only include the method name. 
Do not include any other response other than the JSON list.

Article text: '''{article_text}'''
"""
p = PromptTemplate(
    template=t,
    input_variables=["article_text"],
)

In [None]:
t2 = """
You are an expert biological scientist trained in imaging and microscopy. 
Your knowledge is deep and wide. You are terse and precise in your language.

Your job is to read the title and abstract of a scientific paper in detail, understand what you read, and perform the task described below based on what you read. 
The text of the title and the text of the abstract will be delimited with triple backticks. 

First, break down the text of the title and abstract into separate sentences.

Second, identify which sentences describe 
(A) background information about the work, 
(B) the goals of the work being described, 
(C) the methods used to achieve the goals, 
(D) the results of the work, and 
(E) the conclusions of the work. 

Third, use only sentences you identify as goals, methods, and results to identify the main contribution of the paper. 
Record this contribution as a complete noun phrase shorter than 12 words or so as main_contribution in the output.

Fourth, decide if that contribution of the paper is developing an imaging technique or method. Be careful about this decision and only answer 'true' 
if you are confident that the contribution of the paper states that a new imaging technique or method was developed. Possible examples of imaging techniques
could be new types of microscope, new imaging techniques, hardware, optics, or sample preparation for imaging. 

If the main contribution is the development of a new imaging method, record a true value in a variable called is_imaging_method in the output. 

If the main contribution is not the development of a new imaging method (such as the development of a genetic test, or a finding about a biological system), record a false value in a variable called is_imaging_method in the output. 

Format your response as a JSON object with "main_contribution" and "is_imaging_method" as the keys. 

Do not include any other response other than the JSON object. Remember, stop generating text after you finish the JSON object. Do not provide additional explanations of your answer.

Article
title: '''{title}'''
abstract: '''{abstract}'''
"""
p2 = PromptTemplate(
    template=t2,
    input_variables=["title", "abstract"],
)

In [None]:
t3 = """
Your job is to read the title and abstract of a scientific paper and perform the task described below. 
The text of the title and the text of the abstract will be delimited with triple backticks. 

First, split the text of the title and abstract into separate sentences.

Second, identify which sentences describe 
(A) background information about the work, 
(B) the goals of the work being described, 
(C) the methods used to achieve the goals, 
(D) the results of the work, and 
(E) the conclusions of the work. 

Third, use only sentences you identify as goals, methods, or results and write a description of what the authors did as the 
main contribution of the paper by saying 'The authors ...' and then describing what they did in a complete sentence shorter than 12 words. 

Fourth, decide if that contribution is developing an imaging technique or method. Be careful about this decision and only answer 'true' 
if you are confident that the main goal of the published work was to create or test this new imaging technique or method. Possible examples of imaging techniques
could be new types of microscope, new imaging techniques, hardware, optics, or sample preparation for imaging. If the main contribution is the development 
of a new imaging method, record a true value in a variable called is_imaging_method in the output. 

Finally, if the paper provides a name for the technique being developed record it in a variable called 'tech_name' in the output.

Format your response as a JSON object with "main_contribution", "is_imaging_method", and "tech_name" as the keys.  

Stop generating text after you finish the JSON object. Do not provide additional explanations of your answer.

Article
title: '''{title}'''
abstract: '''{abstract}'''
"""
p3 = PromptTemplate(
    template=t3,
    input_variables=["title", "abstract"],
)

In [None]:
t3 = """
You are an expert biological scientist trained in imaging and microscopy. 
Your knowledge is deep and wide. You are terse and precise in your language.

Your job is to read the title and abstract of a scientific paper delimited with triple backticks below. 

First, break down the text of the title and abstract into separate sentences.

Second, identify which sentences describe 
* background information about the work, 
* the goals of the work being described, 
* the methods used to achieve the goals, 
* the results of the work, and 
* the conclusions of the work. 

Third, use only sentences you identify as goals, methods, or results and write a description of what the authors did as the main contribution of the paper by 
saying 'The authors ...' and then describing what they did in a complete sentence. 

Fourth, decide if the paper os the contribution of the paper is concerned with developing one of three possible types of biomedical imaging technology: probes, acquisition hardware, or data analysis. 
Be careful about this decision and only include the paper in one of the subtypes if you are confident that the contribution of the paper focuses 
on one of the specified types of biomedical imaging methods. 

If the main contribution is the development of molecular probes, stains, antibodies, or any other way of generating a visual signal from a biomedical sample, record a value of 'probe' in the 'technology_type' field of the output.

If the main contribution is the development of methods to acquire biomedical images from biomedical samples in new ways (such as methods in microscopy, optics, spectroscopy, etc.), record a value of 'acquisition hardware' in the 'technology_type' field of the output.

If the main contribution is the development of data analysis methods for imaging data, record a value of 'data analysis' in the 'technology_type' field of the output.

If the main contribution is the development of technology but is not any of these types, record a value of 'other' in the 'technology_type' field of the output.

If the main contribution is not the development of imaging technology (such as an experiment describing other phenomenon), record a value of 'none' in the 'technology_type' field of the output.

Format your response as a JSON object with "main_contribution" and "technology_type" as the keys. Do not include any other response other than the JSON object. 

Remember, stop generating text after you finish the JSON object. Do not provide additional explanations of your answer.

Article
title: '''{title}'''
abstract: '''{abstract}'''
"""
p3 = PromptTemplate(
    template=t3,
    input_variables=["title", "abstract"],
)

In [None]:
from ctransformers import AutoModelForCausalLM

# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-70B-chat-GGML", 
                                           model_file="llama-2-70b-chat.q4_K_M.gguf", 
                                           model_type="llama", 
                                           gpu_layers=50)

print(llm("AI is going to"))

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading (…)90663ed1/config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Fetching 0 files: 0it [00:00, ?it/s]

ValueError: Model file 'llama-2-70b-chat.q4_K_M.gguf' not found in '/Users/gburns/.cache/huggingface/hub/models--TheBloke--Llama-2-70B-chat-GGML/snapshots/2eb277ec3007dcd8e5978ab07dbb235c90663ed1'

In [None]:
from langchain.llms import CTransformers

llm = CTransformers(model="TheBloke/Llama-2-70B-chat-GGML")

print(llm('AI is going to'))

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading (…)chat.ggmlv3.q2_K.bin:   0%|          | 0.00/28.6G [00:00<?, ?B/s]

In [None]:
# check
r = df[df.ID_PAPER=="28945706"]
a = r.ABSTRACT.values[0]
t = r.TITLE.values[0]
prompt = p2.format(title=t, abstract=a)
print(prompt)
output = llm(p2.format(title=t, abstract=a))
#m = output_parser.parse(output) 
print(output)


You are an expert biological scientist trained in imaging and microscopy. 
Your knowledge is deep and wide. You are terse and precise in your language.

Your job is to read the title and abstract of a scientific paper in detail, understand what you read, and perform the task described below based on what you read. 
The text of the title and the text of the abstract will be delimited with triple backticks. 

First, break down the text of the title and abstract into separate sentences.

Second, identify which sentences describe 
(A) background information about the work, 
(B) the goals of the work being described, 
(C) the methods used to achieve the goals, 
(D) the results of the work, and 
(E) the conclusions of the work. 

Third, use only sentences you identify as goals, methods, and results to identify the main contribution of the paper. 
Record this contribution as a complete noun phrase shorter than 12 words or so as main_contribution in the output.

Fourth, decide if that contrib


llama_print_timings:        load time = 11699.64 ms
llama_print_timings:      sample time =    14.70 ms /     6 runs   (    2.45 ms per token,   408.19 tokens per second)
llama_print_timings: prompt eval time = 17669.69 ms /   777 tokens (   22.74 ms per token,    43.97 tokens per second)
llama_print_timings:        eval time =   838.51 ms /     5 runs   (  167.70 ms per token,     5.96 tokens per second)
llama_print_timings:       total time = 18558.42 ms


In [None]:
pprint(a)

('Förster resonance energy transfer (FRET)-based tension sensor modules (TSMs) '
 'are available for investigating how distinct proteins bear mechanical forces '
 'in cells. Yet, forces in the single piconewton (pN) regime remain difficult '
 'to resolve, and tools for multiplexed tension sensing are lacking. Here, we '
 'report the generation and calibration of a genetically encoded, FRET-based '
 'biosensor called FL-TSM, which is characterized by a near-digital force '
 'response and increased sensitivity at 3-5 pN. In addition, we present a '
 'method allowing the simultaneous evaluation of coexpressed tension sensor '
 'constructs using two-color fluorescence lifetime microscopy. Finally, we '
 'introduce a procedure to calculate the fraction of mechanically engaged '
 'molecules within cells. Application of these techniques to new talin '
 'biosensors reveals an intramolecular tension gradient across talin-1 that is '
 'established upon integrin-mediated cell adhesion. The tensio

In [None]:
import time

def execute_llm(llm, p, i, t, a):
    print(textwrap.fill(i, 100))
    print(textwrap.fill(t, 100))
    print(textwrap.fill(a, 100))
    output = llm(p.format(title=t, abstract=a))
    return output

l = []
for i, r in df[701:].iterrows():
    if r.ABSTRACT is None: 
        continue
    print('\n-----------------------------------------------------------------\n')
    print(i)
    # measure time to execute
    t = time.time()
    output = execute_llm(llm, p2, r.ID_PAPER, r.TITLE, r.ABSTRACT)
    m = output_parser.parse(output)
    if m is None: 
        m = {
            'main_contribution': None,
            'is_imaging_method': False,
        } 
    m['llm_output'] = output
    m['ID_PAPER'] = r.ID_PAPER
    dt = time.time() - t    
    m['time'] = dt
    l.append(m)

df1 = pd.DataFrame(l)
df1


-----------------------------------------------------------------

701
19898486
Optical interrogation of neural circuits in Caenorhabditis elegans.
The nematode Caenorhabditis elegans has a compact nervous system with only 302 neurons. Whereas most
of the synaptic connections between these neurons have been identified by electron microscopy serial
reconstructions, functional connections have been inferred between only a few neurons through
combinations of electrophysiology, cell ablation, in vivo calcium imaging and genetic analysis. To
map functional connections between neurons, we combined in vivo optical stimulation with
simultaneous calcium imaging. We analyzed the connections from the ASH sensory neurons and RIM
interneurons to the command interneurons AVA and AVD. Stimulation of ASH or RIM neurons using
channelrhodopsin-2 (ChR2) resulted in activation of AVA neurons, evoking an avoidance behavior. Our
results demonstrate that we can excite specific neurons expressing ChR2 while 

Unnamed: 0,main_contribution,is_imaging_method,llm_output,ID_PAPER,time
0,demonstrated the ability to excite specific ne...,True,"{\n""main_contribution"": ""demonstrated the abi...",19898486,147.822898
1,Automated high-throughput mapping of promoter-...,True,"{\n""main_contribution"": ""Automated high-throu...",19898487,77.961682
2,miR-SP system for transgenic microRNA inhibiti...,False,"{\n""main_contribution"": ""miR-SP system for tr...",19915559,82.453050
3,Floxin technology for targeted modification of...,False,"{\n""main_contribution"": ""Floxin technology fo...",19966808,75.900989
4,A tool for live-cell imaging that facilitates ...,True,"{\n""main_contribution"": ""A tool for live-cell...",19966809,86.998896
...,...,...,...,...,...
420,Virtual microfluidics for digital quantificati...,False,"{\n""main_contribution"": ""Virtual microfluidic...",27479330,52.461597
421,uDISCO clearing to overcome limitations in vol...,True,"{\n""main_contribution"": ""uDISCO clearing to o...",27548807,74.625292
422,Augmenting CRISPR applications in Drosophila w...,False,"{\n""main_contribution"": ""Augmenting CRISPR ap...",27595403,72.643309
423,genome engineering in postnatal mice using AAV...,False,"{\n""main_contribution"": ""genome engineering i...",27595405,77.260483


In [None]:
df1.to_csv('/Users/gburns/Documents/2023H2/image_tech_landscaping/alhazen_analysis/p2_08-25-23/df_200-700_70b.tsv', sep='\t')

In [None]:
df.to_csv('/Users/gburns/Documents/2023H2/image_tech_landscaping/alhazen_analysis/df.tsv', sep='\t')

In [None]:
# format df.ID_PAPER as int64
df.ID_PAPER = df.ID_PAPER.astype('int64') 
df1.ID_PAPER = df1.ID_PAPER.astype('int64') 
df2 = df.set_index('ID_PAPER').join(df1.set_index('ID_PAPER'))
df2.to_csv('/Users/gburns/Documents/2023H2/image_tech_landscaping/alhazen_analysis/p2_08-25-23/df_merged_200-500_70b.tsv', sep='\t')

In [None]:


article_text = df.iloc[5]['TITLE'] + ' ' + df.iloc[5]['ABSTRACT']
print(textwrap.fill(df.iloc[5]['TITLE'], 100))
print(textwrap.fill(df.iloc[5]['ABSTRACT'], 100))

p2.format(article_text=article_text)

output = llm(p2.format(article_text=article_text), )
pprint(output, depth=2)
#pprint(output_parser.parse(output), depth=2)

Fluorescent indicators for simultaneous reporting of all four cell cycle phases.
A robust method for simultaneous visualization of all four cell cycle phases in living cells is
highly desirable. We developed an intensiometric reporter of the transition from S to G2 phase and
engineered a far-red fluorescent protein, mMaroon1, to visualize chromatin condensation in mitosis.
We combined these new reporters with the previously described Fucci system to create Fucci4, a set
of four orthogonal fluorescent indicators that together resolve all cell cycle phases.
 [ `'an intensiometric reporter of the transition from S to G2 phase', 'a far-red fluorescent protein, mMaroon1', 'the previously described Fucci system''](" [ `'an intensiometric reporter of the transition from S to G2 phase', 'a "
 "far-red fluorescent protein, mMaroon1', 'the previously described Fucci "
 "system'']")


In [None]:
llm('What is immuno-oncology?')

 Immuno-oncology (IO) is a field of cancer research and treatment that focuses on using the body's own immune system to fight cancer. The goal of IO is to enhance the immune system's ability to recognize, attack, and eliminate cancer cells.

Cancer cells have ways to evade the immune system and avoid being targeted by immune cells, such as T cells. IO strategies aim to overcome these evasion mechanisms and restore the immune system's ability to recognize and attack cancer cells.

There are several approaches to IO, including:

1. Checkpoint inhibitors: These drugs work by blocking proteins on immune cells that normally prevent them from attacking cancer cells. Examples include pembrolizumab (Keytruda) and nivolumab (Opdivo).
2. Immune stimulators: These drugs enhance the activity of immune cells, such as T cells, to recognize and attack cancer cells. Examples include talimogene laherparepvec (Imlygic) and CimaVax-EGF.
3. Cancer vaccines: These vaccines aim to stimulate an immune respon

" Immuno-oncology (IO) is a field of cancer research and treatment that focuses on using the body's own immune system to fight cancer. The goal of IO is to enhance the immune system's ability to recognize, attack, and eliminate cancer cells.\n\nCancer cells have ways to evade the immune system and avoid being targeted by immune cells, such as T cells. IO strategies aim to overcome these evasion mechanisms and restore the immune system's ability to recognize and attack cancer cells.\n\nThere are several approaches to IO, including:\n\n1. Checkpoint inhibitors: These drugs work by blocking proteins on immune cells that normally prevent them from attacking cancer cells. Examples include pembrolizumab (Keytruda) and nivolumab (Opdivo).\n2. Immune stimulators: These drugs enhance the activity of immune cells, such as T cells, to recognize and attack cancer cells. Examples include talimogene laherparepvec (Imlygic) and CimaVax-EGF.\n3. Cancer vaccines: These vaccines aim to stimulate an immu

In [None]:
import time

l = []
t = []
for i, row in df.iterrows():
    if row.ABSTRACT == '':
        continue
    start_time = time.time()
    print(row.ID_PAPER)
    print(textwrap.fill(row.TITLE, 100))
    print(textwrap.fill(row.ABSTRACT, 100))
    p = prompt.format(article_text=(row.TITLE+'\n'+row.ABSTRACT))
    output_text = llm(p)
    output = output_parser.parse(output_text)
    pprint(output, depth=2)
    delta_t = time.time() - start_time
    t.append(delta_t)
    print(f'Elapsed time: {delta_t:.2f} seconds')
    print('-------------------')
    for ner in output:
        l.append((row.ID_PAPER, ner))
l

27749836
Random-access scanning microscopy for 3D imaging in awake behaving animals.
Understanding how neural circuits process information requires rapid measurements of activity from
identified neurons distributed in 3D space. Here we describe an acousto-optic lens two-photon
microscope that performs high-speed focusing and line scanning within a volume spanning hundreds of
micrometers. We demonstrate its random-access functionality by selectively imaging cerebellar
interneurons sparsely distributed in 3D space and by simultaneously recording from the soma,
proximal and distal dendrites of neocortical pyramidal cells in awake behaving mice.
['acousto-optic lens microscopy',
 'two-photon microscopy',
 'random-access scanning microscopy',
 'line scanning']
Elapsed time: 8.05 seconds
-------------------
27749837
ATAC-see reveals the accessible genome by transposase-mediated imaging and sequencing.
Spatial organization of the genome plays a central role in gene expression, DNA replication

OutputParserException: Invalid json output: [
"DNA-PAINT",
"traditional DNA-PAINT",
"concatenated, periodic DNA sequence motifs"
"multiplexed imaging"
] derived from Sure! Here is the list of imaging techniques or technologies described in the abstract:

[
"DNA-PAINT",
"traditional DNA-PAINT",
"concatenated, periodic DNA sequence motifs"
"multiplexed imaging"
]

Is there anything else I can help you with?

In [None]:
from tqdm import tqdm

lset=set()
for id in l:
    lset.add(id[0])
print(lset)
print(len(lset))

{'30061677', '28604724', '31932777', '33941937', '28783155', '28319112', '28967889', '28869755', '31548704', '31591578', '28394337', '31591580', '29298292', '31501548', '33398191', '30349041', '32066960', '30804550', '32123392', '27749837', '28417997', '31086341', '33589838', '34312566', '28650478', '29355847', '32661427', '32015543', '30013046', '31097821', '29334378', '29320487', '29039416', '27869816', '31086342', '30082898', '27776112', '34341582', '27798610', '27798611', '33875885', '34594032', '32616928', '28481362', '30643212', '29786093', '28869758', '34059826', '30397326', '29553579', '32895537', '30108338', '33633410', '30988469', '33408404', '30377376', '32958921', '28858338', '29131163', '30504871', '31501551', '28068315', '30573831', '32424271', '34608319', '28945705', '28846087', '29638227', '28250468', '30504889', '28628129', '31040436', '32203384', '30573813', '33963344', '34845388', '28661494', '33795876', '32807955', '31611690', '30559429', '32203385', '31308547', '30

In [None]:
l2 = []
t2 = []
for i, row in df.iterrows():
    if row.ABSTRACT == '':
        continue
    if row.ID_PAPER in lset:
        continue
    start_time = time.time()
    print(row.ID_PAPER)
    print(textwrap.fill(row.TITLE, 100))
    print(textwrap.fill(row.ABSTRACT, 100))
    p = prompt.format(article_text=(row.TITLE+'\n'+row.ABSTRACT))
    try:
        output_text = llm(p)
        output = output_parser.parse(output_text)
        pprint(output, depth=2)
        delta_t = time.time() - start_time
        t.append(delta_t)
        print(f'Elapsed time: {delta_t:.2f} seconds')
        print('-------------------')
        for ner in output:
            l2.append((row.ID_PAPER, ner))
    except Exception as e:
        print(e)


34949807
Rapid, efficient and activation-neutral gene editing of polyclonal primary human resting
CD4<sup>+</sup> T cells allows complex functional analyses.
CD4<sup>+</sup> T cells are central mediators of adaptive and innate immune responses and constitute
a major reservoir for human immunodeficiency virus (HIV) in vivo. Detailed investigations of resting
human CD4<sup>+</sup> T cells have been precluded by the absence of efficient approaches for genetic
manipulation limiting our understanding of HIV replication and restricting efforts to find a cure.
Here we report a method for rapid, efficient, activation-neutral gene editing of resting, polyclonal
human CD4<sup>+</sup> T cells using optimized cell cultivation and nucleofection conditions of
Cas9-guide RNA ribonucleoprotein complexes. Up to six genes, including HIV dependency and
restriction factors, were knocked out individually or simultaneously and functionally characterized.
Moreover, we demonstrate the knock in of double-stran

In [None]:
l.extend(l2)
print(len(l))
t.extend(t2)
print(len(t))

3941
1115


In [None]:
print(len(l2))

2510


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()

In [None]:
from databricks import sql
import os

with sql.connect(server_hostname = "czi-shared-infra-czi-sci-general-prod-databricks.cloud.databricks.com",
                 http_path       = "/sql/1.0/warehouses/1c4df94f2f1a6305",
                 access_token    = os.getenv("DB_TOKEN")) as connection:

  with connection.cursor() as cursor:
    cursor.execute("SELECT * FROM gburns.imaging_tech.papers WHERE LEN(ABSTRACT) > 0 LIMIT 10")
    result = cursor.fetchall()

    for row in result:
      print(row)

DatabricksRetryPolicy is currently bypassed. The CommandType cannot be set.
DatabricksRetryPolicy is currently bypassed. The CommandType cannot be set.
DatabricksRetryPolicy is currently bypassed. The CommandType cannot be set.
DatabricksRetryPolicy is currently bypassed. The CommandType cannot be set.
DatabricksRetryPolicy is currently bypassed. The CommandType cannot be set.
DatabricksRetryPolicy is currently bypassed. The CommandType cannot be set.
DatabricksRetryPolicy is currently bypassed. The CommandType cannot be set.
DatabricksRetryPolicy is currently bypassed. The CommandType cannot be set.
DatabricksRetryPolicy is currently bypassed. The CommandType cannot be set.


Row(ID_PAPER='PPR418001', DOI='10.1101/2021.11.09.467827', YEAR='2021', ABSTRACT='<h4>ABSTRACT</h4> The close association between astrocytes and microglia causes great difficulties to distinguish their individual roles in innate immune responses in central nervous system. Current chemical-based methods to eliminate microglia in glial cell culture introduce various molecular and functional alterations to astrocytes. Here, we describe a novel two-step approach to achieve a complete elimination of microglia without affecting the biological properties of co-cultured astrocytes by temporal treatment of histone deacetylase inhibitor trichostatin A (TSA). We verify TSA as a potent inducer for microglial-specific cell death, which also causes comprehensive gene expression changes in astrocytes. However, withdrawal of TSA not only ensures no microglia repopulation, but also restores all the gene expression changes in terms of astrocyte functions, including neurotrophic factors, glutamate and po

In [None]:
engine = create_engine(
    'databricks://token:'+os.environ['DB_TOKEN']+ 
    '@czi-shared-infra-czi-sci-general-prod-databricks.cloud.databricks.com'+
    '?http_path=/sql/1.0/warehouses/1c4df94f2f1a6305')

sql = '''
SELECT * FROM gburns.imaging_tech.papers 
WHERE LEN(ABSTRACT) > 0 and YEAR=2022
'''
with engine.connect() as con:
    stmt = db.text(sql)
    rs = con.execute(stmt)
    df = pd.DataFrame(rs.fetchall(), columns=rs.keys())
df

DatabricksRetryPolicy is currently bypassed. The CommandType cannot be set.
DatabricksRetryPolicy is currently bypassed. The CommandType cannot be set.
DatabricksRetryPolicy is currently bypassed. The CommandType cannot be set.
DatabricksRetryPolicy is currently bypassed. The CommandType cannot be set.
DatabricksRetryPolicy is currently bypassed. The CommandType cannot be set.
DatabricksRetryPolicy is currently bypassed. The CommandType cannot be set.
DatabricksRetryPolicy is currently bypassed. The CommandType cannot be set.


Unnamed: 0,ID_PAPER,DOI,YEAR,ABSTRACT,TITLE,MESH_DESCRIPTORS
0,PPR438151,10.1101/2022.01.02.474731,2022,Neuronal responses to similar stimuli change d...,Contribution of behavioural variability to rep...,
1,PPR438262,10.1101/2022.01.01.474694,2022,Red blood cells (RBCs) express the nucleic aci...,Human red blood cells express the RNA sensor T...,
2,PPR438779,10.1101/2021.12.30.474580,2022,Severe acute respiratory syndrome coronavirus ...,SARS-CoV-2 entry sites are present in all stru...,
3,PPR438788,10.1101/2022.01.01.474713,2022,Thailand was the first country outside China t...,The SARS-CoV-2 infection in Thailand: analysis...,
4,PPR438789,10.1101/2022.01.03.474773,2022,The SARS-CoV-2 Omicron variant is currently ca...,Reduced interferon antagonism but similar drug...,
...,...,...,...,...,...,...
3939,PPR587527,10.1101/2022.12.19.519427,2022,Cohort studies increasingly collect biosamples...,Omada: Robust clustering of transcriptomes thr...,
3940,PPR587989,10.1101/2022.12.21.520923,2022,"The two clathrin isoforms, CHC17 and CHC22, ge...",CHC22 clathrin membrane recruitment uses SNX5 ...,
3941,PPR588013,10.1101/2022.12.21.521417,2022,With the recent growth in spectral flow cytome...,Back to the Future- Unleashing your cytometer’...,
3942,PPR588026,10.1101/2022.12.21.521384,2022,The shape of cells is the outcome of the balan...,Friction patterns guide actin network contraction,


In [None]:
t2 = """
You are an expert biological scientist trained in imaging and microscopy. 
Your knowledge is deep and wide. 
You are terse and precise in your language.

Your job is to read the title and abstract of a scientific paper in detail, understand what you read, and perform the task described below based on what you read. 
The text of the title and the text of the abstract will be delimited with triple backticks. 

First, break down the text of the title and abstract into separate sentences.

Second, identify which sentences describe 
(A) background information about the work, 
(B) the goals of the work being described, 
(C) the methods used to achieve the goals, 
(D) the results of the work, and 
(E) the conclusions of the work. 

Third, use only sentences you identify as goals, methods, and results to identify the main contribution of the paper. 
Record this contribution as a complete noun phrase shorter than 12 words or so as main_contribution in the output.

Fourth, decide if that contribution of the paper is developing an imaging technique or method. Be careful about this decision and only answer 'true' 
if you are confident that the contribution of the paper states that a new imaging technique or method was developed. Possible examples of imaging techniques
could be new types of microscope, new imaging techniques, hardware, optics, or sample preparation for imaging. 

If the main contribution is the development of a new imaging method, record a true value in a variable called is_imaging_method in the output. 

If the main contribution is not the development of a new imaging method (such as the development of a genetic test, or a finding about a biological system), record a false value in a variable called is_imaging_method in the output. 

Format your response as a JSON object with "main_contribution" and "is_imaging_method" as the keys. 

Do not include any other response other than the JSON object. Remember, stop generating text after you finish the JSON object. Do not provide additional explanations of your answer.

Article
title: '''{title}'''
abstract: '''{abstract}'''
"""

In [None]:
t3 = """
Your job is to read the title and abstract of a scientific paper and perform the task described below. 
The text of the title and the text of the abstract will be delimited with triple backticks. 

First, split the text of the title and abstract into separate sentences.

Second, classify each sentence as describing 
- background information about the work, 
- the goals of the paper, 
- the methods used by the scientists, 
- the results of the work, and 
- the conclusions of the work. 

Third, using only the goal and method sentences, decide if the work is concerned with (A) developing new technology or methods or 
(B) investigating how biological systems work.  

Be careful about this decision and only answer 'true' if you are confident that the main goal of the published work 
was to create or test a new technique or method.  

If paper describes the development of a new method, technique or approach, record a true value in a variable called is_method_paper in the output. 

If the goal of the work understanding biological phenomena, record a false value in a variable called is_method_paper in the output.

Format your response as a JSON object with "is_method_paper" as the key.  

Briefly explain your reasoning for your decision in one or two sentences.

Article
title: '''{title}'''
abstract: '''{abstract}'''
"""
p3 = PromptTemplate(
    template=t3,
    input_variables=["title", "abstract"],
)

In [None]:
df

Unnamed: 0,ID_PAPER,DOI,ABSTRACT,TITLE,MESH_DESCRIPTORS
0,27749836,10.1038/nmeth.4033,Understanding how neural circuits process info...,Random-access scanning microscopy for 3D imagi...,Cerebellar Cortex | Pyramidal Cells | Visual C...
1,27749837,10.1038/nmeth.4031,Spatial organization of the genome plays a cen...,ATAC-see reveals the accessible genome by tran...,Neutrophils | CD4-Positive T-Lymphocytes | Cel...
2,27776112,10.1038/nmeth.4034,Small-molecule fluorophores are important tool...,Bright photoactivatable fluorophores for singl...,"COS Cells | Cell Line, Tumor | Animals | Human..."
3,27798609,10.1038/nmeth.4046,We describe a red-shifted fluorescence resonan...,Simultaneous dual-color fluorescence lifetime ...,Hela Cells | Endoplasmic Reticulum | Animals |...
4,27798610,10.1038/nmeth.4045,A robust method for simultaneous visualization...,Fluorescent indicators for simultaneous report...,Hela Cells | NIH 3T3 Cells | Chromatin | Anima...
...,...,...,...,...,...
1121,27479330,10.1038/nmeth.3955,We have developed hydrogel-based virtual micro...,Virtual microfluidics for digital quantificati...,Escherichia coli | Staphylococcus aureus | Hyd...
1122,27548807,10.1038/nmeth.3964,Recent tissue-clearing approaches have become ...,Shrinkage-mediated imaging of entire organs an...,"Central Nervous System | Animals | Mice, Inbre..."
1123,27595403,10.1038/nmeth.3972,We present tRNA-based vectors for producing mu...,Augmenting CRISPR applications in Drosophila w...,"Animals | Animals, Genetically Modified | Dros..."
1124,27595405,10.1038/nmeth.3993,CRISPR-Cas9 delivery by adeno-associated virus...,A multifunctional AAV-CRISPR-Cas9 and its host...,"Animals | Mice, Inbred C57BL | Humans | Mice |..."


In [None]:
llm

LlamaCpp(cache=None, verbose=True, callbacks=<langchain.callbacks.manager.CallbackManager object>, tags=None, metadata=None, client=<llama_cpp.llama.Llama object>, model_path='/Users/gburns/Documents/LLms/llama-2-7b-chat.Q5_K_M.gguf', lora_base=None, lora_path=None, n_ctx=512, n_parts=-1, seed=-1, f16_kv=True, logits_all=False, vocab_only=False, use_mlock=False, n_threads=None, n_batch=4096, n_gpu_layers=40, suffix=None, max_tokens=256, temperature=0.8, top_p=0.95, logprobs=None, echo=False, stop=[], repeat_penalty=1.1, top_k=40, last_n_tokens_size=64, use_mmap=True, rope_freq_scale=1.0, rope_freq_base=10000.0, model_kwargs={}, streaming=True, grammar_path=None, grammar=None)

In [None]:
import time

def execute_llm(llm, p, i, t, a):
    print(textwrap.fill(i, 100))
    print(textwrap.fill(t, 100))
    print(textwrap.fill(a, 100))
    output = llm(p.format(title=t, abstract=a))
    return output

def run_df(df, llm, p):
    l = []
    for i, r in df.iterrows():
        if r.ABSTRACT is None: 
            continue
        print('\n-----------------------------------------------------------------\n')
        print(i)
        # measure time to execute
        t = time.time()
        output = execute_llm(llm, p, r.ID_PAPER, r.TITLE, r.ABSTRACT)
        m = output_parser.parse(output)
        if m is None: 
            m = {
                'main_contribution': None,
                'is_imaging_method': False,
            } 
        m['llm_output'] = output
        m['ID_PAPER'] = r.ID_PAPER
        dt = time.time() - t    
        m['time'] = dt
        l.append(m)

    df2 = pd.DataFrame(l)
    return df2
    
df_2022_2 = run_df(df_2022[0:2], llm, p3)
#df_nat_meth_0_100 = run_df(df_nat_meth[0:100], llm, p3)


-----------------------------------------------------------------

0
PPR590719
Lung lipid deposition in pneumonias of viral and non-viral aetiology
Pneumonia is an acute respiratory disease of varying aetiology, which drew much attention during the
COVID-19 pandemic. Among many thoroughly studied aspects of pneumonia, lipid metabolism has been
addressed insufficiently. Here, we report on abnormal lipid metabolism of both COVID-19- and non-
COVID-19-associated pneumonias in human lungs. Morphometric analysis revealed extracellular and
intracellular lipid depositions, most notably within vessels adjacent to inflamed regions, where
they apparently interfere with the blood flow. Lipids were visualized on Sudan III- and Oil Red
O-stained cryosections and on OsO 4 -contrasted semi-thin and ultrathin sections. Chromato-mass
spectrometry revealed that unsaturated fatty acid content was elevated at inflammation sites
compared with the intact sites of the same lung. The genes involved in lipid 


llama_print_timings:        load time = 88410.89 ms
llama_print_timings:      sample time =     8.68 ms /    12 runs   (    0.72 ms per token,  1382.01 tokens per second)
llama_print_timings: prompt eval time = 88410.80 ms /   629 tokens (  140.56 ms per token,     7.11 tokens per second)
llama_print_timings:        eval time =  1971.93 ms /    11 runs   (  179.27 ms per token,     5.58 tokens per second)
llama_print_timings:       total time = 90409.99 ms
ggml_metal_free: deallocating


OutputParserException: Could not find json-formatted data in: Please do this task. 

Thank you.

In [None]:
df_nat_meth

Unnamed: 0,ID_PAPER,DOI,YEAR,ABSTRACT,TITLE,MESH_DESCRIPTORS
0,34811556,10.1038/s41592-021-01316-y,2022,Tissues and organs are composed of distinct ce...,Spatial mapping of protein composition and tis...,Antibodies | Diagnostic Imaging | Cell Communi...
1,34824477,10.1038/s41592-021-01308-y,2022,Highly multiplexed tissue imaging makes detail...,"MCMICRO: a scalable, modular image-processing ...",Humans | Neoplasms | Diagnostic Imaging | Imag...
2,34887550,10.1038/s41592-021-01335-9,2022,Mako is a software tool that converts microbio...,Fast and flexible analysis of linked microbiom...,Animals | Computational Biology | Computer Gra...
3,34887551,10.1038/s41592-021-01334-w,2022,Recent whole-brain mapping projects are collec...,Cross-modal coherent registration of whole mou...,"Brain | Animals | Mice, Inbred C57BL | Imaging..."
4,34916672,10.1038/s41592-021-01341-x,2022,Self-labeling protein tags such as HaloTag are...,Engineered HaloTag variants for fluorescence l...,Cell Line | Humans | Rhodamines | Hydrolases |...
...,...,...,...,...,...,...
135,36396787,10.1038/s41592-022-01669-y,2022,Fluorescent in-situ hybridization (FISH)-based...,"RS-FISH: precise, interactive, fast, and scala...","Microscopy | In Situ Hybridization, Fluorescen..."
136,36424441,10.1038/s41592-022-01673-2,2022,Tissue function depends on cellular organizati...,Image-seq: spatially resolved single-cell sequ...,Humans | Leukemia | Diagnostic Imaging | Cell ...
137,36443486,10.1038/s41592-022-01675-0,2022,Achieving state-of-the-art performance with de...,A large-scale neural network training framewor...,Motor Cortex | Somatosensory Cortex | Animals ...
138,36456784,10.1038/s41592-022-01684-z,2022,We present proximity sequencing (Prox-seq) for...,"Quantification of extracellular proteins, prot...","Leukocytes, Mononuclear | CD8-Positive T-Lymph..."


In [None]:
df_nat_met_out = df1

In [None]:
df_nat_meth_0_100

Unnamed: 0,is_method_paper,llm_output,ID_PAPER,time
0,True,"{\n""is_method_paper"": true\n}\n\nThe title an...",34811556,66.178736
1,True,"{\n""is_method_paper"": true\n}\n\nThe title in...",34824477,54.061088
2,True,"{\n""is_method_paper"": true\n}\n\nThe title su...",34887550,56.417907
3,True,"{\n""is_method_paper"": true\n}\n\nThe title su...",34887551,67.477511
4,True,"{\n""is_method_paper"": true\n}\n\nThe title an...",34916672,60.608709
...,...,...,...,...
95,True,"{\n""is_method_paper"": true\n}\n\nThe title in...",36064772,69.074156
96,False,"{\n""is_method_paper"": false\n}\n\nThe title a...",36064773,70.971109
97,True,"{\n""is_method_paper"": true\n}\n\nThe title an...",36064775,77.545475
98,True,"{\n""is_method_paper"": true\n}\n\nThe title an...",36068320,65.244119


In [None]:
df_biorxiv_0_100

Unnamed: 0,is_method_paper,llm_output,ID_PAPER,time,main_contribution,is_imaging_method
0,False,"{\n""is_method_paper"": false\n}\n\nExplanation...",PPR512857,137.945491,,
1,False,"{\n""is_method_paper"": false\n}\n\nThe article...",PPR512860,98.742057,,
2,True,"{\n""is_method_paper"": true\n}\n\nThe title an...",PPR512861,145.949785,,
3,False,"{\n""is_method_paper"": false\n}\n\nThe title a...",PPR512885,89.226944,,
4,False,"{\n""is_method_paper"": false\n}\n\nThe title a...",PPR512913,118.418904,,
...,...,...,...,...,...,...
95,True,"{\n""is_method_paper"": true\n}\n\nThe title an...",PPR517227,80.874245,,
96,False,"{\n""is_method_paper"": false\n}\n\nThe article...",PPR517231,115.259636,,
97,False,"{\n""is_method_paper"": false\n}\n\nThe title a...",PPR517235,145.999283,,
98,False,"{\n""is_method_paper"": false\n}\n\nThe title a...",PPR517237,94.295474,,


In [None]:
t3 = """[INST]
<<SYS>>
Your job is to read the title and abstract of a scientific paper and perform the task described below. 
The text of the title and the text of the abstract will be delimited with triple backticks. 
<</SYS>>

First, split the text of the title and abstract into separate sentences.

Second, classify each sentence as describing 
- background information about the work, 
- the goals of the paper, 
- the methods used by the scientists, 
- the results of the work, and 
- the conclusions of the work. 

Third, using only the goal and method sentences, decide if the work is concerned with (A) developing new technology or methods or 
(B) investigating how biological systems work.  

Be careful about this decision and only answer 'true' if you are confident that the main goal of the published work 
was to create or test a new technique or method.  

If paper describes the development of a new method, technique or approach, record a true value in a variable called is_method_paper in the output. 

If the goal of the work understanding biological phenomena, record a false value in a variable called is_method_paper in the output.

Format your response as a JSON object with "is_method_paper" as the key.  

Briefly explain your reasoning for your decision in one or two sentences.
Article
title: '''{title}'''
abstract: '''{abstract}'''
[INST]
"""
p3 = PromptTemplate(
    template=t3,
    input_variables=["title", "abstract"],
)

In [None]:
# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

n_gpu_layers = 10  # Change this value based on your model and your GPU VRAM pool.
n_batch = 1024  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.

# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path="/Users/gburns/Documents/Coding/ChatGPT_etc/LLMs/llama-2-70b-chat.Q5_K_M.gguf",
    n_ctx=4096,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    callback_manager=callback_manager,
    f16_kv=True,
    verbose=True, # Verbose is required to pass to the callback manager
)

llama_model_loader: loaded meta data with 19 key-value pairs and 723 tensors from /Users/gburns/Documents/Coding/ChatGPT_etc/LLMs/llama-2-70b-chat.Q5_K_M.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q5_K     [  8192, 32000,     1,     1 ]
llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  8192,     1,     1,     1 ]
llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q6_K     [ 28672,  8192,     1,     1 ]
llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q5_K     [  8192, 28672,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q5_K     [  8192, 28672,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_norm.weight f32      [  8192,     1,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.attn_k.weight q5_K     [  8192,  1024,     1,     1 ]
llama_model_loader: - tensor    7:         blk.0.attn_outp

In [None]:
# check
r = df[df.ID_PAPER=="28945706"]
a = r.ABSTRACT.values[0]
t = r.TITLE.values[0]
prompt = p2.format(title=t, abstract=a)
print(prompt)
output = llm(p3.format(title=t, abstract=a))
#m = output_parser.parse(output)

print('\n\n\n'+output)


You are an expert biological scientist trained in imaging and microscopy. 
Your knowledge is deep and wide. You are terse and precise in your language.

Your job is to read the title and abstract of a scientific paper in detail, understand what you read, and perform the task described below based on what you read. 
The text of the title and the text of the abstract will be delimited with triple backticks. 

First, break down the text of the title and abstract into separate sentences.

Second, identify which sentences describe 
(A) background information about the work, 
(B) the goals of the work being described, 
(C) the methods used to achieve the goals, 
(D) the results of the work, and 
(E) the conclusions of the work. 

Third, use only sentences you identify as goals, methods, and results to identify the main contribution of the paper. 
Record this contribution as a complete noun phrase shorter than 12 words or so as main_contribution in the output.

Fourth, decide if that contrib


llama_print_timings:        load time = 14193.17 ms
llama_print_timings:      sample time =    89.03 ms /   123 runs   (    0.72 ms per token,  1381.60 tokens per second)
llama_print_timings: prompt eval time = 14193.08 ms /   615 tokens (   23.08 ms per token,    43.33 tokens per second)
llama_print_timings:        eval time = 20797.63 ms /   122 runs   (  170.47 ms per token,     5.87 tokens per second)
llama_print_timings:       total time = 35276.71 ms
