In [13]:
from datetime import datetime, timedelta, timezone
import os
import io
import hydra
from omegaconf import DictConfig, OmegaConf
from hydra import initialize, compose

import dr_util.file_utils as fu
import bytom.author_profiles as ap

from IPython.display import Markdown, display

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
with initialize(config_path="../configs/", version_base=None):
    cfg = compose(config_name="paper_data.yaml")

In [3]:
cfg_resolved = OmegaConf.to_container(cfg, resolve=True)
print(f"Configuration: \n\n{OmegaConf.to_yaml(cfg_resolved)}")

Configuration: 

data_dir: /Users/daniellerothermel/drotherm/data/
raw_pdf_dir: /Users/daniellerothermel/drotherm/data/raw_pdfs/
parsed_pdf_dir: /Users/daniellerothermel/drotherm/data/parsed_pdfs/
metadata_dir: /Users/daniellerothermel/drotherm/data/parsed_pdfs/
author_data_dir: /Users/daniellerothermel/drotherm/data/author_data/
author_summaries_dir: /Users/daniellerothermel/drotherm/data/author_data/summaries/
author_info_file: /Users/daniellerothermel/drotherm/data/author_data/manual_profiles.json
prof_pattern: (?P<professor_name>[\w_]+)
file_type_pattern: (?P<file_type>\w+)
version_pattern: v(?P<version>\d+)
author_summary_file_pattern: (?P<professor_name>[\w_]+)\.(?P<file_type>\w+)\.v(?P<version>\d+)



In [4]:
AUTHOR_INFO = fu.load_file(cfg.author_info_file)

In [5]:
print("Authors:")
for k in AUTHOR_INFO.keys():
    print(f" - {k}")

Authors:
 - He He
 - Eunsol Choi
 - Mengye Ren
 - Rajesh Ranganath
 - Tal Linzen
 - Kyunghyun Cho
 - Lerrel Pinto
 - Pavel Izmailov


In [11]:
he_papers = ap.get_author_papers('He He')

In [26]:
display(Markdown(ap.format_response_abstract_to_markdown(he_papers[-1])))

### **Title:** GOODAT: Towards Test-time Graph Out-of-Distribution Detection

**Publish Date:** 2024-01-10

**First Author:** Luzhi Wang

**Last Author:** Tat-Seng Chua

**Middle Authors:** Dongxiao He, He Zhang, Yixin Liu, Wenjie Wang, Shirui Pan, Di Jin

**Abstract:** Graph neural networks (GNNs) have found widespread application in modeling graph data across diverse domains. While GNNs excel in scenarios where the testing data shares the distribution of their training counterparts (in distribution, ID), they often exhibit incorrect predictions when confronted with samples from an unfamiliar distribution (out-of-distribution, OOD). To identify and reject OOD samples with GNNs, recent studies have explored graph OOD detection, often focusing on training a specific model or modifying the data on top of a well-trained GNN. Despite their effectiveness, these methods come with heavy training resources and costs, as they need to optimize the GNN-based models on training data. Moreover, their reliance on modifying the original GNNs and accessing training data further restricts their universality. To this end, this paper introduces a method to detect Graph Out-of-Distribution At Test-time (namely GOODAT), a data-centric, unsupervised, and plug-and-play solution that operates independently of training data and modifications of GNN architecture. With a lightweight graph masker, GOODAT can learn informative subgraphs from test samples, enabling the capture of distinct graph patterns between OOD and ID samples. To optimize the graph masker, we meticulously design three unsupervised objective functions based on the graph information bottleneck principle, motivating the masker to capture compact yet informative subgraphs for OOD detection. Comprehensive evaluations confirm that our GOODAT method outperforms state-of-the-art benchmarks across a variety of real-world datasets. The code is available at Github: https://github.com/Ee1s/GOODAT

---------------




In [49]:
# Write Author Page
ap.write_author_page(
    cfg, "He He", '1',
    max_papers=100,
    max_years=1,
)

In [30]:
display(Markdown(
    ap.make_author_page("He He", responses=he_papers, author_info=AUTHOR_INFO["He He"], max_papers=50)
))
fu.dump_file(author_page, f"{cfg.author_summaries_dir}he_he.markdown.v1.txt")

# Research Summary for **He He**

## He He Bio


Assistant Professor of Computer Science and Data Science

Bio: He He is an Assistant Professor in Computer Science and Data Science. She is broadly interested in natural language process and machine learning. Her recent research focuses on understanding large language models, improving their trustworthiness, and human-AI interaction. Prior to joining NYU, she obtained her PhD from University of Maryland, did a post-doc at Stanford, and spent one year at AWS working in dialogue platforms.

Research Areas:

- Machine learning
- Deep learning
- Natural language processing

I want to build intelligent systems that can communicate with humans effectively and enable individuals to achieve their goals. Today’s systems are often opaque, brittle, and difficult to control, which limits their usefulness in human-centered applications. To make them our trustworthy collaborators, my research aims to (i) understand the computational foundation of generalization in novel scenarios, and (ii) build interactive systems that align with user’s goals.

I am an Assistant Professor of Computer Science and Center for Data Science at New York University. I am affiliated with the CILVR Lab, the Machine Learning for Language Group, and the Alignment Research Group.

Here are some directions I’m excited about nowadays:

- Robustness: Machine learning models are trained on a fixed and often biased dataset, but face a constantly-changing world. How can we build predictors that align with human rationales, avoid spurious correlations, and generalize to out-of-distribution data? How can models adapt quickly given new information?
- Truthfulness: We are increasingly relying on machine learning models (e.g., large language models) for critical tasks. How can we make sure that the model outputs conform to facts? Does the model know what it doesn’t know? Can it output a “proof” for its answer? How do we evaluate factuality efficiently for questions beyond the ability of an average person?
- Human-AI collaboration: We want AI agents to deal with our daily minutiae, support our decision-making, and teach us complex concepts. How should the agent infer user intention and preferences, allow for fine-grained control, and take (natural language) feedback? How will this collaboration shape the future workforce?
    

## Recent Papers

### **Title:** Language Models Learn to Mislead Humans via RLHF

**Publish Date:** 2024-09-19

**First Author:** Jiaxin Wen

**Last Author:** Shi Feng

**Middle Authors:** Ruiqi Zhong, Akbir Khan, Ethan Perez, Jacob Steinhardt, Minlie Huang, Samuel R. Bowman, He He

**Abstract:** Language models (LMs) can produce errors that are hard to detect for humans, especially when the task is complex. RLHF, the most popular post-training method, may exacerbate this problem: to achieve higher rewards, LMs might get better at convincing humans that they are right even when they are wrong. We study this phenomenon under a standard RLHF pipeline, calling it "U-SOPHISTRY" since it is Unintended by model developers. Specifically, we ask time-constrained (e.g., 3-10 minutes) human subjects to evaluate the correctness of model outputs and calculate humans' accuracy against gold labels. On a question-answering task (QuALITY) and programming task (APPS), RLHF makes LMs better at convincing our subjects but not at completing the task correctly. RLHF also makes the model harder to evaluate: our subjects' false positive rate increases by 24.1% on QuALITY and 18.3% on APPS. Finally, we show that probing, a state-of-the-art approach for detecting Intended Sophistry (e.g. backdoored LMs), does not generalize to U-SOPHISTRY. Our results highlight an important failure mode of RLHF and call for more research in assisting humans to align them.

---------------


### **Title:** Spontaneous Reward Hacking in Iterative Self-Refinement

**Publish Date:** 2024-07-05

**First Author:** Jane Pan

**Last Author:** Shi Feng

**Middle Authors:** He He, Samuel R. Bowman

**Abstract:** Language models are capable of iteratively improving their outputs based on natural language feedback, thus enabling in-context optimization of user preference. In place of human users, a second language model can be used as an evaluator, providing feedback along with numerical ratings which the generator attempts to optimize. However, because the evaluator is an imperfect proxy of user preference, this optimization can lead to reward hacking, where the evaluator's ratings improve while the generation quality remains stagnant or even decreases as judged by actual user preference. The concern of reward hacking is heightened in iterative self-refinement where the generator and the evaluator use the same underlying language model, in which case the optimization pressure can drive them to exploit shared vulnerabilities. Using an essay editing task, we show that iterative self-refinement leads to deviation between the language model evaluator and human judgment, demonstrating that reward hacking can occur spontaneously in-context with the use of iterative self-refinement. In addition, we study conditions under which reward hacking occurs and observe two factors that affect reward hacking severity: model size and context sharing between the generator and the evaluator.

---------------


### **Title:** LLMs Are Prone to Fallacies in Causal Inference

**Publish Date:** 2024-06-18

**First Author:** Nitish Joshi

**Last Author:** He He

**Middle Authors:** Abulhair Saparov, Yixin Wang

**Abstract:** Recent work shows that causal facts can be effectively extracted from LLMs through prompting, facilitating the creation of causal graphs for causal inference tasks. However, it is unclear if this success is limited to explicitly-mentioned causal facts in the pretraining data which the model can memorize. Thus, this work investigates: Can LLMs infer causal relations from other relational data in text? To disentangle the role of memorized causal facts vs inferred causal relations, we finetune LLMs on synthetic data containing temporal, spatial and counterfactual relations, and measure whether the LLM can then infer causal relations. We find that: (a) LLMs are susceptible to inferring causal relations from the order of two entity mentions in text (e.g. X mentioned before Y implies X causes Y); (b) if the order is randomized, LLMs still suffer from the post hoc fallacy, i.e. X occurs before Y (temporal relation) implies X causes Y. We also find that while LLMs can correctly deduce the absence of causal relations from temporal and spatial relations, they have difficulty inferring causal relations from counterfactuals, questioning their understanding of causality.

---------------




## Utils

In [None]:
def get_author_metadata_path(author):
    assert author in AUTHORS
    return f'{METADATA_DIR}{author.replace(" ", "_")}_query_metadata.json'

def get_author_metadata(author):
    md_path = get_author_metadata_path(author)
    md = fu.load_file(md_path)
    return md

In [None]:
def get_parsed_pdf_path(pdf_name):
    return f'{PARSED_PDF_DIR}{pdf_name}.pkl'

def get_parsed_pdf(pdf_name):
    ppdf_path = get_parsed_pdf_path(pdf_name)
    if os.path.exists(ppdf_path):
        return fu.load_file(ppdf_path)
    return None

In [None]:
def get_author_parsed_papers(author):
    md = get_author_metadata(author)
    pdfs_dict = md['pdfs_metadata']
    parsed_pdfs_dict = []
    for pdf_name, pdf_data in pdfs_dict.items():
        ppdf = get_parsed_pdf(pdf_name)
        if ppdf is None:
            continue
        ppdf_dict = {**pdf_data}
        ppdf_dict['parsed_pdf'] = ppdf
        parsed_pdfs_dict.append(ppdf_dict)
    return parsed_pdfs_dict

## Load Parsed, Extract Structure

In [None]:
parsed_pdfs_pavel = get_author_parsed_papers(AUTHORS[0])
print(f">> Number of parsed papers for {AUTHORS[0]}: {len(parsed_pdfs_pavel)}")

In [None]:
test_ppdf = parsed_pdfs_pavel[0]

In [None]:
print(test_ppdf['title'])
print(test_ppdf['published'])
print(test_ppdf['authors'])
print(test_ppdf['pdf_link'])
print(f">> Num blocks in parsed pdf: {len(test_ppdf['parsed_pdf'])}")

### Utils

In [None]:
def reconstruct_split_text(split_text, verbose=False):
    buff = io.StringIO()
    for section in split_text:
        if verbose:
            buff.write(f"\n\n ===== Heading: {section['heading']} \n\n")
        buff.write("\n\n".join(section['lines']))
        buff.write("\n\n")
    return buff.getvalue()

In [None]:
def split_by_heading(text, title):
    tls = text.split("\n")
    title_str = f"# {title}"
    sections = []

    start_tl_strip = tls[0].strip()
    if title_str in start_tl_strip or start_tl_strip[0] != "#":
        start_heading = "From Previous Block"
        start_lines = []
    else:
        start_heading = start_tl_strip[2:]
        start_lines = [start_tl_strip]
        
    curr_section = {"heading": start_heading, "lines": start_lines}
    for tl in tls[1:]:
        tl_strip = tl.strip()
        if len(tl_strip) == 0 or tl_strip[0].isdigit():
            continue

        if tl_strip[0] == "#":
            # Drop all header mentions of the title, we'll add it back in
            if title_str in tl_strip:
                continue
            # Otherwise start a new section
            sections.append(curr_section)
            curr_section = {"heading": tl_strip[2:], "lines": []}
        curr_section['lines'].append(tl_strip)
    
    sections.append(curr_section)
    return sections

In [None]:
def get_all_sects(input_ppdf, input_title):
    all_sects = []
    for i, block in enumerate(input_ppdf):
        sects = split_by_heading(block.text, input_title)
        if i == 0:
            # Drop the title section
            all_sects.extend(sects[1:])
        else:
            all_sects.extend(sects)
    return all_sects

In [None]:
def group_sections(sections):
    grouped_sections = []

    figs = []
    last_was_fig = False
    for section in sections:
        if len(section['lines']) == 0:
            continue
            
        heading = section['heading']
        
        # For ease of reading split the starting case out
        if len(grouped_sections) == 0:
            grouped_sections.append({
                'heading': heading,
                'lines': [],
            })
            
        if heading.startswith("Figure"):
            figs.append(section)
            last_was_fig = True
            continue

        if last_was_fig:
            last_was_fig = False
            if len(section['lines']) == 0:
                print(section)
                assert False
            if len(section['lines'][0]) == 0:
                print(section)
                assert False
            if section['lines'][0][0].islower():
                first_l = f"{section['heading']} {section['lines'][0]}"
                grouped_sections[-1]['lines'].append(first_l)
                grouped_sections[-1]['lines'].extend(section['lines'][1:])
                continue
        
        if (heading != "From Previous Block" and
            grouped_sections[-1]['heading'] != heading
        ):
            grouped_sections.append({
                'heading': heading,
                'lines': [],
            })
        grouped_sections[-1]['lines'].extend(section['lines'])    
    return grouped_sections, figs

In [None]:
def ppdf_to_body_refs_figs(input_ppdf):
    all_s = get_all_sects(input_ppdf['parsed_pdf'], input_ppdf['title'])
    print(f">> There are {len(all_s)} sections total.")

    grouped_s, figs_s = group_sections(all_s)
    print(f">> There are {len(grouped_s)} grouped sections and {len(figs_s)} figures.")

    body_s = []
    references = None
    for s in grouped_s:
        if 'References' in s['heading']:
            references = s
            break
        body_s.append(s)
    return body_s, figs_s, references

## Test Full Flow

In [None]:
bd_s, fg_s, rfs = ppdf_to_body_refs_figs(test_ppdf)

In [None]:
# print(reconstruct_split_text(bd_s + fg_s))
# rfs

In [None]:
for gt in bd_s:
    print(f"{len(gt['lines']):2} | {gt['heading']}")

### Sub Section Tests

In [None]:
all_sections_test = get_all_sects(test_ppdf['parsed_pdf'], test_ppdf['title'])
print(f">> There are {len(all_sections_test)} sections total")

In [None]:
grouped_test, figs_test = group_sections(all_sections_test)
print(f">> There are {len(grouped_test)} grouped sections")

In [None]:
for gt in grouped_test:
    print(f"{len(gt['lines']):3} | {gt['heading']}")

## Putting it All Together

In [None]:
def make_author_page(author):
    bio = AUTHOR_INFO[author]
    
    buff = io.StringIO()
    buff.write(f"# Research Summary for {author}\n\n")
    buff.write(f"## Bio\n{bio}\n\n")
    

    buff.write("## Recent Papers\n\n")
    parsed_pdfs_author = get_author_parsed_papers(author)
    for ppdf in parsed_pdfs_author:
        buff.write(f"# Title: {ppdf['title']}\n Published: {ppdf['published']}\n")
        buff.write("Authors: " + ", ".join(ppdf['authors']) + "\n\n")

        bd_s, fg_s, rfs = ppdf_to_body_refs_figs(ppdf)
        buff.write(reconstruct_split_text(bd_s + fg_s))# + [rfs]))
        buff.write(f"\n\n -------------- End Paper: {ppdf['title']}")
    return buff.getvalue()

In [None]:
bio_and_one_paper = make_author_page(AUTHORS[0])

In [None]:
fu.dump_file(bio_and_one_paper, '/Users/daniellerothermel/drotherm/data/pavel_izmailov_summary_markdown.txt')

In [None]:
for author in AUTHORS:
    bio_and_one_paper = make_author_page(author)
    fu.dump_file(bio_and_one_paper, f'/Users/daniellerothermel/drotherm/data/{author.replace(" ", "_").lower()}_summary_markdown.txt', verbose=True)
    