In [2]:
from dotenv import load_dotenv
import os
from bs4 import BeautifulSoup
from selenium import webdriver
import numpy as np
import openai
import pandas as pd
import pickle
import tiktoken
from tqdm import tqdm

load_dotenv()

openai.api_key = os.getenv('OPENAI_API_KEY')


COMPLETIONS_MODEL = "text-davinci-003"
EMBEDDING_MODEL = "text-embedding-ada-002"

## Scrape Conference data

#### Setup Selenium

In [12]:
dr = webdriver.Chrome()
dr.get("https://aasldpubs.onlinelibrary.wiley.com/doi/10.1002/hep.32697")
bs = BeautifulSoup(dr.page_source,"html")

#### Find and scrape titles and abstracts

In [13]:
abstracts = bs.find_all('section', {'class': 'article-section__content'})

In [14]:
titles = []
_abs = []

for i in tqdm(range(len(abstracts))):
    titles.append(' '.join(abstracts[i].find('h3', {'class': 'article-section__sub-title section2'}).stripped_strings))
    _abs.append(' '.join(abstracts[i].find_all('p')[1].stripped_strings))


100%|██████████| 2031/2031 [00:00<00:00, 6922.02it/s]


In [15]:
data = pd.DataFrame({'title': titles, 'abstract': _abs})

In [20]:
data

Unnamed: 0,title,abstract
0,TENOFOVIR-DF THERAPY PREVENTS HEPATITIS B VERT...,Background: Maternal tenofovir disoproxil fuma...
1,GLYCOLYSIS PROMOTES EXTRACELLULAR VESICLE RELE...,Background: Liver fibrosis is characterized by...
2,THE MULTI-ORGAN DYSFUNCTION AND EVALUATION FOR...,Background: Patients with acute-on-chronic liv...
3,ASSESSMENT OF KIDNEY FUNCTION IN PATIENTS WITH...,"Background: Recently, new estimating GFR equat..."
4,NOT ALL MELD SCORES ARE CREATED EQUAL: MELD DR...,Background: For the same MELD score at listing...
...,...,...
2026,LOW PREVALENCE OF HEPATITIS B IMMUNITY AMONG P...,Background: The Fontan operation is a final st...
2027,EARLY MINIMIZATION OF IMMUNOSUPPRESSION AFTER ...,Background: Hepatoblastoma (HPB) is a malignan...
2028,SUSCEPTIBILITY OF HUMAN FETAL HEPATOCYTES (hTE...,Background: Intrauterine exposure to ethanol (...
2029,ACQUIRED IMPAIRMENTS IN ATAXIA TELANGIECTASIA ...,Background: The integrity of DNA replication m...


## Define embbedding search functions

In [4]:
def get_embedding(text: str, model: str=EMBEDDING_MODEL) -> list[float]:
    result = openai.Embedding.create(
      model=model,
      input=text
    )
    return result["data"][0]["embedding"]

def compute_doc_embeddings(df: pd.DataFrame) -> dict[tuple[str, str], list[float]]:
    """
    Create an embedding for each row in the dataframe using the OpenAI Embeddings API.
    
    Return a dictionary that maps between each embedding vector and the index of the row that it corresponds to.
    """
    return {
        idx: get_embedding(r.content) for idx, r in df.iterrows()
    }

In [6]:
def load_embeddings(fname: str) -> dict[tuple[str, str], list[float]]:
    """
    Read the document embeddings and their keys from a CSV.
    
    fname is the path to a CSV with exactly these named columns: 
        "title", "heading", "0", "1", ... up to the length of the embedding vectors.
    """
    
    df = pd.read_csv(fname, header=0)
    max_dim = max([int(c) for c in df.columns if c != "title"])
    return {
           (r.title): [r[str(i)] for i in range(max_dim + 1)] for _, r in df.iterrows()
    }

In [36]:
data = data.rename(columns = {'abstract': 'content'})

In [9]:
document_embeddings = pd.read_csv('AASLD_embeddings.csv').drop(columns='Unnamed: 0')

In [26]:
document_embeddings.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2021,2022,2023,2024,2025,2026,2027,2028,2029,2030
0,-0.030594,-0.012845,0.003543,-0.000206,-0.004966,0.00463,-0.007686,-0.035994,-0.011646,-0.01507,...,0.000742,-0.014881,-0.015115,-0.023274,-0.022889,-0.01392,-0.004562,-0.013241,-0.028122,0.005009
1,-0.012238,-0.013851,-0.006504,0.003811,-0.008996,0.004609,0.004411,-0.010513,0.003088,-0.014373,...,0.011657,0.015114,0.014385,0.02629,0.009256,0.002864,0.009185,-0.006951,0.008461,0.009196
2,-0.031655,0.007666,0.027547,0.018677,0.021802,0.023698,0.025907,0.016879,0.024858,0.031881,...,0.003363,0.007537,0.032086,0.009643,-0.001252,-0.00495,0.012045,-0.021838,-0.003897,-0.002672
3,-0.037638,-0.038661,-0.043213,-0.04647,-0.037981,-0.028972,-0.052369,-0.027567,-0.029419,-0.036784,...,-0.017677,-0.020869,-0.043223,-0.014543,-0.023585,-0.031832,-0.014271,-0.046781,-0.055005,-0.042568
4,-0.018588,-0.035942,-0.015991,0.014279,-0.023021,0.00189,-0.018491,-0.015474,-0.013503,-0.023844,...,-0.027087,-0.027762,-0.035082,-0.01797,-0.030866,-0.037646,-0.036819,-0.046619,-0.063838,-0.023531


In [28]:
#document_embeddings = compute_doc_embeddings(data)
embs = {}
for i in range(len(data['title'])):
    embs[data['title'][i]] = list(document_embeddings[f'{i}'])

In [13]:
#pd.DataFrame(document_embeddings).to_csv('AASLD_embeddings.csv')

In [29]:
example_entry = list(embs.items())[0]
print(f"{example_entry[0]} : {example_entry[1][:5]}... ({len(example_entry[1])} entries)")

TENOFOVIR-DF THERAPY PREVENTS HEPATITIS B VERTICAL TRANSMISSION IN HIGHLY VIREMIC MOTHERS WITHOUT HBV IMMUNOGLOBULIN FOR INFANTS : [-0.030594365671277, -0.0122377462685108, -0.0316549688577652, -0.0376378670334816, -0.018587777391076]... (1536 entries)


In [30]:
def vector_similarity(x: list[float], y: list[float]) -> float:
    """
    Returns the similarity between two vectors.
    
    Because OpenAI Embeddings are normalized to length 1, the cosine similarity is the same as the dot product.
    """
    return np.dot(np.array(x), np.array(y))

def order_document_sections_by_query_similarity(query: str, contexts: dict[(str, str), np.array]) -> list[(float, (str, str))]:
    """
    Find the query embedding for the supplied query, and compare it against all of the pre-calculated document embeddings
    to find the most relevant sections. 
    
    Return the list of document sections, sorted by relevance in descending order.
    """
    query_embedding = get_embedding(query)
    
    document_similarities = sorted([
        (vector_similarity(query_embedding, doc_embedding), doc_index) for doc_index, doc_embedding in contexts.items()
    ], reverse=True)
    
    return document_similarities

In [31]:
order_document_sections_by_query_similarity("What are some of the new topics being discussed regarding NAFLD and other fatty liver diseases?", embs)[:5]

[(0.8817804675403065,
  'AN ANNUAL SURVEY OF PATIENT PERSPECTIVES TO UNDERSTAND AND MONITOR THE EVOLVING SOCIAL AND MEDICAL EXPERIENCE OF NON-ALCOHOLIC FATTY LIVER DISEASE PATIENTS (NAFLD) IN THE UNITED STATES.'),
 (0.8775478601703193, 'BARRIERS IN THE TREATMENT OF OBESITY IN NAFLD'),
 (0.8727768436498491,
  'NEW INSIGHT INTO THE PATHOPHYSIOLOGICAL PATHWAY OF NON-ALCOHOLIC FATTY LIVER DISEASE: A LIVER TISSUE PROTEOMICS ANALYSIS'),
 (0.8723336487560348,
  'RAPID REPURPOSING OF DRUGS FOR NON-ALCOHOLIC FATTY LIVER DISEASE USING HIGH CONTENT IMAGING'),
 (0.8722550359036778,
  'VARIATIONS IN DIETARY LIPIDS AND LIPID SOLUBLE MICRONUTRIENTS ASSOCIATED WITH NAFLD: ANALYSIS OF A NATIONAL US COHORT DATABASE')]

In [60]:
order_document_sections_by_query_similarity("What are some of the new topics being discussed regarding the treatment of NAFLD and other fatty liver diseases?", embs)[:5]

[(0.8776485020083575, 'BARRIERS IN THE TREATMENT OF OBESITY IN NAFLD'),
 (0.8733085335938694,
  'AN ANNUAL SURVEY OF PATIENT PERSPECTIVES TO UNDERSTAND AND MONITOR THE EVOLVING SOCIAL AND MEDICAL EXPERIENCE OF NON-ALCOHOLIC FATTY LIVER DISEASE PATIENTS (NAFLD) IN THE UNITED STATES.'),
 (0.8726046854199494,
  'A PROFILE OF GASTROENTEROLOGISTS’ AND HEPATOLOGISTS’ PREPARING OR WAREHOUSING NAFLD AND NASH PATIENTS FOR AN EVENTUAL FDA APPROVED THERAPY'),
 (0.8719296618077453,
  'RAPID REPURPOSING OF DRUGS FOR NON-ALCOHOLIC FATTY LIVER DISEASE USING HIGH CONTENT IMAGING'),
 (0.870781724890584,
  'NEW INSIGHT INTO THE PATHOPHYSIOLOGICAL PATHWAY OF NON-ALCOHOLIC FATTY LIVER DISEASE: A LIVER TISSUE PROTEOMICS ANALYSIS')]

In [None]:
order_document_sections_by_query_similarity("What are some of the new topics being discussed regarding the treatment of NAFLD and other fatty liver diseases?", embs)[:5]

In [32]:
MAX_SECTION_LEN = 1000
SEPARATOR = "\n* "
ENCODING = "gpt2"  # encoding for text-davinci-003

encoding = tiktoken.get_encoding(ENCODING)
separator_len = len(encoding.encode(SEPARATOR))

f"Context separator contains {separator_len} tokens"

'Context separator contains 3 tokens'

In [78]:
data[data['title'] == 'BARRIERS IN THE TREATMENT OF OBESITY IN NAFLD']#['content'].values[0]

Unnamed: 0,title,content
1130,BARRIERS IN THE TREATMENT OF OBESITY IN NAFLD,Background: Non-alcohol-related fatty liver di...


In [77]:
data.loc[0]

title      TENOFOVIR-DF THERAPY PREVENTS HEPATITIS B VERT...
content    Background: Maternal tenofovir disoproxil fuma...
Name: 0, dtype: object

In [41]:
def construct_prompt(question: str, context_embeddings: dict, df: pd.DataFrame) -> str:
    """
    Fetch relevant 
    """
    most_relevant_document_sections = order_document_sections_by_query_similarity(question, context_embeddings)
    
    chosen_sections = []
    chosen_sections_len = 0
    chosen_sections_indexes = []
     
    for _, section_index in most_relevant_document_sections:
        # Add contexts until we run out of space.        
        document_section = df[df['title'] == section_index]
        
        chosen_sections_len += len(document_section.content.values[0].split(' ')) + separator_len
        if chosen_sections_len > MAX_SECTION_LEN:
            break
            
        chosen_sections.append(SEPARATOR + document_section.content.values[0].replace("\n", " "))
        chosen_sections_indexes.append(str(section_index))
            
    # Useful diagnostic information
    print(f"Selected {len(chosen_sections)} document sections:")
    print("\n".join(chosen_sections_indexes))
    
    #header = """Answer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say "I don't know."\n\nContext:\n"""

    header = """Summarize each of the medical abstracts provided below into a bulleted list that explains the purpose, methods, and results."\n\nContext:\n"""
    
    return header + "".join(chosen_sections) + "\n\n Summary: "# + question + "\n A:"

In [42]:
prompt = construct_prompt(
    "NAFLD gene therapy",
    embs,
    data
)

print("===\n", prompt)

Selected 2 document sections:
IDENTIFICATION OF THERAPEUTIC TARGETS FOR NON-ALCOHOLIC FATTY LIVER DISEASE THROUGH IN VIVO GENETIC SCREENS OF A PATIENT TRANSCRIPTOME-DERIVED shRNA LIBRARY
NEW INSIGHT INTO THE PATHOPHYSIOLOGICAL PATHWAY OF NON-ALCOHOLIC FATTY LIVER DISEASE: A LIVER TISSUE PROTEOMICS ANALYSIS
===
 Summarize each of the medical abstracts provided below into a bulleted list that explains the purpose, methods, and results."

Context:

* Background: Non-alcoholic fatty liver disease (NAFLD) is rapidly being recognized as the most common cause of chronic liver disease worldwide with the obesity and diabetes mellitus epidemics. However, unlike other highly prevalent diseases, it remains an underrepresented disease with limited effective treatment options. Therefore, there is an urgent need to prioritize innovative therapeutic development. In this study, to address this, we combine transcriptomic profiling of NAFLD patient tissues with in vivo functional genetic screens in mouse

In [43]:
COMPLETIONS_API_PARAMS = {
    # We use temperature of 0.0 because it gives the most predictable, factual answer.
    "temperature": 0.0,
    "max_tokens": 1000,
    "model": COMPLETIONS_MODEL,
}

In [44]:
def answer_query_with_context(
    query: str,
    df: pd.DataFrame,
    document_embeddings: dict[(str, str), np.array],
    show_prompt: bool = False
) -> str:
    prompt = construct_prompt(
        query,
        document_embeddings,
        df
    )
    
    if show_prompt:
        print(prompt)

    response = openai.Completion.create(
                prompt=prompt,
                **COMPLETIONS_API_PARAMS
            )

    return response["choices"][0]["text"].strip(" \n")

In [45]:
answer_query_with_context("NAFLD gene therapy", data, embs, show_prompt=True)

Selected 2 document sections:
IDENTIFICATION OF THERAPEUTIC TARGETS FOR NON-ALCOHOLIC FATTY LIVER DISEASE THROUGH IN VIVO GENETIC SCREENS OF A PATIENT TRANSCRIPTOME-DERIVED shRNA LIBRARY
NEW INSIGHT INTO THE PATHOPHYSIOLOGICAL PATHWAY OF NON-ALCOHOLIC FATTY LIVER DISEASE: A LIVER TISSUE PROTEOMICS ANALYSIS
Summarize each of the medical abstracts provided below into a bulleted list that explains the purpose, methods, and results."

Context:

* Background: Non-alcoholic fatty liver disease (NAFLD) is rapidly being recognized as the most common cause of chronic liver disease worldwide with the obesity and diabetes mellitus epidemics. However, unlike other highly prevalent diseases, it remains an underrepresented disease with limited effective treatment options. Therefore, there is an urgent need to prioritize innovative therapeutic development. In this study, to address this, we combine transcriptomic profiling of NAFLD patient tissues with in vivo functional genetic screens in mouse mice

'* Study 1:\n    * Purpose: To identify potential therapeutic targets for non-alcoholic fatty liver disease (NAFLD) \n    * Methods: RNA sequencing on liver tissues from 130 patients, generation of a pooled shRNA library targeting the dysregulated genes in patients, delivery of library into hepatocytes by a transposon-based vector system and hydrodynamic tail vein injection for the in vivo genetic screen, mice fed two NAFLD-inducing diets, livers harvested for deep sequencing\n    * Results: 16 and 38 candidate genes selected from the screening in the two models, one of them selected as the best candidate, target gene silencing showed a reduction in disease progression in NAFLD mice models compared to the control shRNA, target gene silencing promoted regeneration of the liver without tumor developments\n\n* Study 2:\n    * Purpose: To compare the significant changes in proteins across the NAFLD disease with normal subjects\n    * Methods: Liver tissue flash frozen and stored at -80°C, 

In [93]:
print('- Purpose: To investigate potential barriers in the treatment of obesity in NAFLD\n- Methods: Using the 2017-2020 National Health and Nutrition Examination Survey (NHANES) database\n- Results: \n    - Prevalence of NAFLD was 35.9%\n    - 62.6% attempted dietary changes, 39.1% attempted exercise, 3.3% joined a weight loss program, 4.9% took a non-prescription supplement, 0.7% took laxatives or vomited, and 0.5% started smoking\n    - Increase in food insecurity as BMI increased\n    - Black and White NAFLD patients reported similar rates of counseling, prescription of weight loss medications, bariatric surgery, interface with healthcare, and dietary changes\n    - Black NAFLD patients were more likely to exercise, more likely to have household food insecurity, less likely to have private or any insurance coverage, and less likely to perceive themselves as overweight\n    - Rate of prescribing weight loss medications or bariatric surgery for NAFLD was overall low at <5%\n- Conclusion: Need to better understand the social determinants of health in promoting weight loss and treating NAFLD')

- Purpose: To investigate potential barriers in the treatment of obesity in NAFLD
- Methods: Using the 2017-2020 National Health and Nutrition Examination Survey (NHANES) database
- Results: 
    - Prevalence of NAFLD was 35.9%
    - 62.6% attempted dietary changes, 39.1% attempted exercise, 3.3% joined a weight loss program, 4.9% took a non-prescription supplement, 0.7% took laxatives or vomited, and 0.5% started smoking
    - Increase in food insecurity as BMI increased
    - Black and White NAFLD patients reported similar rates of counseling, prescription of weight loss medications, bariatric surgery, interface with healthcare, and dietary changes
    - Black NAFLD patients were more likely to exercise, more likely to have household food insecurity, less likely to have private or any insurance coverage, and less likely to perceive themselves as overweight
    - Rate of prescribing weight loss medications or bariatric surgery for NAFLD was overall low at <5%
- Conclusion: Need to bet

In [52]:
print('''
Selected 2 document sections:
IDENTIFICATION OF THERAPEUTIC TARGETS FOR NON-ALCOHOLIC FATTY LIVER DISEASE THROUGH IN VIVO GENETIC SCREENS OF A PATIENT TRANSCRIPTOME-DERIVED shRNA LIBRARY
NEW INSIGHT INTO THE PATHOPHYSIOLOGICAL PATHWAY OF NON-ALCOHOLIC FATTY LIVER DISEASE: A LIVER TISSUE PROTEOMICS ANALYSIS
Summarize each of the medical abstracts provided below into a bulleted list that explains the purpose, methods, and results."

Context:

* Background: Non-alcoholic fatty liver disease (NAFLD) is rapidly being recognized as the most common cause of chronic liver disease worldwide with the obesity and diabetes mellitus epidemics. However, unlike other highly prevalent diseases, it remains an underrepresented disease with limited effective treatment options. Therefore, there is an urgent need to prioritize innovative therapeutic development. In this study, to address this, we combine transcriptomic profiling of NAFLD patient tissues with in vivo functional genetic screens in mouse mice of NAFLD to identify potential therapeutic targets. Methods: We performed RNA sequencing on liver tissues from 130 patients covering different stages of NAFLD. Based on the transcriptomic data, we generated a pooled shRNA library targeting the dysregulated genes in patients. The library was delivered into hepatocytes by a by a transposon-based vector system and hydrodynamic tail vein injection for the in vivo genetic screen. Mice were fed two NAFLD-inducing diets, a high-fat diet with high fructose and a Choline deficient L-amino acid-defined high-fat diet, respectively. As a control, normal diet animals were served. At the end of diet treatment, livers were harvested for deep sequencing to determine the abundance of each shRNA. Highly enriched shRNAs compared to control mice were selected as target candidates. The candidates were validated for therapeutic impact by various studies, including in vivo validation assay in mice that induced that every hepatocyte expresses the shRNA of interest by using a FAH -/- model. The mice were fed the two NAFLD-inducing diets, and diverse histopathology parameters were evaluated. Results: We selected 16 and 38 candidate genes from the screening in the two models, respectively. One of them was selected as the best candidate owing to the target gene shRNA being found to be highly enriched in both models. In the in vivo validation assays, the target gene silencing showed a reduction in disease progression in NAFLD mice models compared to the control shRNA. Further validation assays showed that the target gene silencing promoted regeneration of the liver without tumor developments. Conclusion: The results show that our approach, combining the NAFLD patient liver transcriptome research with in vivo functional genetic screen in the mouse models, could pinpoint potential targets for NAFLD therapeutics. The target gene identified by our approach demonstrated alleviated disease progression and promoted regeneration in vivo when inhibited. Our study may contribute to new insights into the therapeutic development of NAFLD.
* Background: A few approved treatment options are available for non-alcoholic fatty liver disease (NAFLD), the commonest chronic liver disease in the USA. The search for a newer pharmacological agent is based on the current understanding of pathophysiology of the NAFLD, derived mostly from animal models. Human liver tissue analysis of subjects with this disease is limited. This study compares the significant changes in proteins across the NAFLD disease with normal subjects. Methods: A portion of liver tissue was flash frozen and stored at -80°C at the time of a liver biopsy procedure. We included 71 consecutive subjects diagnosed with NAFLD (21 without non-alcoholic steatohepatitis [NASH], 37 with NASH without advanced fibrosis, and 13 with advanced fibrosis) and nine with normal histology for the study. Following lipid fraction extraction, the liver tissue was sonicated and digested with trypsin at 37°C. Mass spectrometric analysis was performed for proteins. We used the following parameters to increase the specificity and decrease the number of analytes discovered: p-value < 0.01, log fold 2 change, ≤25% missing values, sensitivity, and specificity of 60%. Results: Different protein groups were compared was made between subjects with normal histology (without NAFLD) and those with NAFLD. Compared to normal, there were 70 different proteins found to be up or down-regulated in those with NAFLD (p<0.01). Among these proteins, three proteins up-regulated (based on fold increase) are aldo-keto reductase family 1 member (2.72), inhibin beta E chain (2.2) [shown in the figure], and perilipin-2 (2.1). The down-regulated proteins are as follows: cytochrome P450 2C19 (-2.4), UDP glucuronosyltransferase 1-3 (-2.0) and glutathione-S-transferase Mu (-1.0). The 70 significant proteins found in this analysis belonged to the following five pathways in descending order of significance (p-value 2.9x10^-10 to 1.0x10^-5), with the number of proteins (n) is as follows: neutrophil degranulation (7), JAK-STAT expression (4), post translation protein phosphorylation (3), and regulation of insulin-like growth factor [IGF] (3). Conclusion: The study identified proteins besides those belonging to the IGF pathway traditionally related to NAFLD pathogenesis. These included pathways associated with the inflammatory process seen among NASH subjects. These proteins could be a potential target for intervention to reduce the NAFLD occurrence and progression. We also detected specific up-regulated and down-regulated proteins as additional targets for treatment consideration. Further elucidation of the identified proteins with comprehensive plasma proteomics will guide in finding the elusive non-invasive markers for NAFLD and NASH.

 Summary: 
'* Study 1:\n    * Purpose: To identify potential therapeutic targets for non-alcoholic fatty liver disease (NAFLD) \n    * Methods: RNA sequencing on liver tissues from 130 patients, generation of a pooled shRNA library targeting the dysregulated genes in patients, delivery of library into hepatocytes by a transposon-based vector system and hydrodynamic tail vein injection for the in vivo genetic screen, mice fed two NAFLD-inducing diets, livers harvested for deep sequencing\n    * Results: 16 and 38 candidate genes selected from the screening in the two models, one of them selected as the best candidate, target gene silencing showed a reduction in disease progression in NAFLD mice models compared to the control shRNA, target gene silencing promoted regeneration of the liver without tumor developments\n\n* Study 2:\n    * Purpose: To compare the significant changes in proteins across the NAFLD disease with normal subjects\n    * Methods: Liver tissue flash frozen and stored at -80°C, lipid fraction extraction, liver tissue sonicated and digested with trypsin at 37°C, mass spectrometric analysis performed for proteins\n    * Results: 70 different proteins found to be up or down-regulated in those with NAFLD (p<0.01), three proteins up-regulated (based on fold increase): aldo-keto reductase family 1 member, inhibin beta E chain, and perilipin-2, three proteins down-regulated: cytochrome P450 2C19, UDP glucuronosyltransferase 1-3, and glutathione-S-transferase Mu, 70 significant proteins belonged to five pathways in descending order of significance'
''')


Selected 2 document sections:
IDENTIFICATION OF THERAPEUTIC TARGETS FOR NON-ALCOHOLIC FATTY LIVER DISEASE THROUGH IN VIVO GENETIC SCREENS OF A PATIENT TRANSCRIPTOME-DERIVED shRNA LIBRARY
NEW INSIGHT INTO THE PATHOPHYSIOLOGICAL PATHWAY OF NON-ALCOHOLIC FATTY LIVER DISEASE: A LIVER TISSUE PROTEOMICS ANALYSIS
Summarize each of the medical abstracts provided below into a bulleted list that explains the purpose, methods, and results."

Context:

* Background: Non-alcoholic fatty liver disease (NAFLD) is rapidly being recognized as the most common cause of chronic liver disease worldwide with the obesity and diabetes mellitus epidemics. However, unlike other highly prevalent diseases, it remains an underrepresented disease with limited effective treatment options. Therefore, there is an urgent need to prioritize innovative therapeutic development. In this study, to address this, we combine transcriptomic profiling of NAFLD patient tissues with in vivo functional genetic screens in mouse mic

In [51]:
'* Study 1:\n    * Purpose: To identify potential therapeutic targets for non-alcoholic fatty liver disease (NAFLD) \n    * Methods: RNA sequencing on liver tissues from 130 patients, generation of a pooled shRNA library targeting the dysregulated genes in patients, delivery of library into hepatocytes by a transposon-based vector system and hydrodynamic tail vein injection for the in vivo genetic screen, mice fed two NAFLD-inducing diets, livers harvested for deep sequencing\n    * Results: 16 and 38 candidate genes selected from the screening in the two models, one of them selected as the best candidate, target gene silencing showed a reduction in disease progression in NAFLD mice models compared to the control shRNA, target gene silencing promoted regeneration of the liver without tumor developments\n\n* Study 2:\n    * Purpose: To compare the significant changes in proteins across the NAFLD disease with normal subjects\n    * Methods: Liver tissue flash frozen and stored at -80°C, lipid fraction extraction, liver tissue sonicated and digested with trypsin at 37°C, mass spectrometric analysis performed for proteins\n    * Results: 70 different proteins found to be up or down-regulated in those with NAFLD (p<0.01), three proteins up-regulated (based on fold increase): aldo-keto reductase family 1 member, inhibin beta E chain, and perilipin-2, three proteins down-regulated: cytochrome P450 2C19, UDP glucuronosyltransferase 1-3, and glutathione-S-transferase Mu, 70 significant proteins belonged to five pathways in descending order of significance'

'* Study 1:\n    * Purpose: To identify potential therapeutic targets for non-alcoholic fatty liver disease (NAFLD) \n    * Methods: RNA sequencing on liver tissues from 130 patients, generation of a pooled shRNA library targeting the dysregulated genes in patients, delivery of library into hepatocytes by a transposon-based vector system and hydrodynamic tail vein injection for the in vivo genetic screen, mice fed two NAFLD-inducing diets, livers harvested for deep sequencing\n    * Results: 16 and 38 candidate genes selected from the screening in the two models, one of them selected as the best candidate, target gene silencing showed a reduction in disease progression in NAFLD mice models compared to the control shRNA, target gene silencing promoted regeneration of the liver without tumor developments\n\n* Study 2:\n    * Purpose: To compare the significant changes in proteins across the NAFLD disease with normal subjects\n    * Methods: Liver tissue flash frozen and stored at -80°C, 