In [41]:
import os
import time
import json
import boto3
import shutil
import numpy as np

from copy import deepcopy
from botocore.config import Config

In [42]:
def get_model_tags(endpoint):
    if 'claude' in endpoint:
        human_tag = '\n\nHuman:'
        robot_tag = '\n\nAssistant:'
    elif 'ai21.j2' in endpoint:
        human_tag = '\n##\n'
        robot_tag = ''
    elif 'titan' in endpoint:
        human_tag = '\n\nUser:'
        robot_tag = '\n\nBot:'
    else:
        human_tag = '\n\n'
        robot_tag = ''
    return human_tag, robot_tag


def query_endpoint(payload):
    client = payload['client']
    endpoint = payload['endpoint']
    if 'titan' in endpoint:
        body = json.dumps({
            'inputText':payload['prompt'],
            'textGenerationConfig':{
                'maxTokenCount':payload['max_len'],
                'temperature':payload['temp'],
                'topP':payload['top_p'],
        }})
        response, attempts, gen_time = call_bedrock(client, body, endpoint)
        try:
            response_body = json.loads(response.get('body').read()).get('results')[0].get('outputText')
        except:
            response_body = '**Failed to generate!**'
    elif 'claude' in endpoint:
        body = json.dumps({
            'prompt':payload['prompt'],
            'max_tokens_to_sample':payload['max_len'],
            'temperature':payload['temp'],
            'top_p':payload['top_p'],
        })
        response, attempts, gen_time = call_bedrock(client, body, endpoint)
        try:
            response_body = json.loads(response.get("body").read()).get("completion")
        except:
            response_body = '**Failed to generate!**'
    elif 'ai21.j2' in endpoint:
        body = json.dumps({
            'prompt':payload['prompt'],
            'maxTokens':payload['max_len'],
            'temperature':payload['temp'],
            'topP':payload['top_p'],
            'stopSequences':['##']
        })
        response, attempts, gen_time = call_bedrock(client, body, endpoint)
        try:
            response_body = json.loads(response.get("body").read()).get("completions")[0].get("data").get("text")
        except:
            response_body = '**Failed to generate!**'
    return (response_body, attempts, gen_time)


def call_bedrock(client, body, endpoint, attempts=45, accept='application/json', contentType='application/json'):
    for i in range(attempts):
        try:
            tic = time.time()
            response = client.invoke_model(
                body=body,
                modelId=endpoint,
                accept=accept,
                contentType=contentType
            )
            toc = time.time()
            return response, i+1, toc-tic
        except Exception as e:
            print(e)
            time.sleep(2 + np.random.rand()/2.)
            continue
    return None, i+1, 0.

In [4]:
config = Config(read_timeout=240)
client = boto3.client(service_name='bedrock-runtime', region_name='us-east-1', config=config)
endpoint = 'anthropic.claude-v2:1'
payload = {
    'prompt': '',
    'max_len': 9000,
    'temp': 0.75,
    'top_p': .99,
    'endpoint': endpoint,
    'client': client,
}

In [5]:
in_dir = 'papers'
filepaths = [in_dir+'/'+f for f in os.listdir("papers")]

In [6]:
human_tag, robot_tag = get_model_tags(endpoint)

prompt_persona = "You are an AI Researcher whose goal is to analyze articles and produce robust and understandable Python code examples.\n"

prompt_instructions = """Read the following research paper and implement a well formatted and well documented code example,
that impliments the models and/or algorithms precented in the paper. This can use sample data, but the explanation should be clear in the code:
\nPaper:<paper>INPUT_PAPER</paper>\n\nFrom the above paper, implement a well formatted and well documented code example in <code></code> tags,
that impliments the models and/or algorithms precented in the paper. This can use sample data, but the explanation should be clear in the code.\n"""

prompt_details = """Follow these steps when writing code:
1. Read the paper thoroughly and make sure you understand the key concepts. Don't just skim it and try to code. Take notes, draw diagrams, explain ideas out loud to yourself or others. Comprehension is key before attempting implementation.
2. Identify the key components that need to be coded - the models, algorithms, data preprocessing steps, etc. Break the implementation down into smaller modular pieces.
3. Find or create appropriate data to test the models on. Many papers include links to data or code repositories used. If not, try to create or find representative sample data.
4. Start by hard-coding and verifying the simplest pieces first. For example, if implementing a complex transformer model, first recreate the multi-headed self-attention mechanism and test it independently before adding other components.
5. Adhere to sound coding principles - modularize code into functions and classes, add comments explaining the purpose of areas of code, use descriptive variable names matching paper terminology. These practices will help ensure clear understanding.
6. Visualize and print intermediate outputs to check that data transforms, model internal states, etc are as expected per paper descriptions to methodically validate implementation components.  
7. Once pieces are individually working, incrementally connect and test them together until the full model or algorithm is constructed. Confirm outputs match expected behavior from paper either mathematically or qualitatively at each step.
8. Write tests and experiment with model variations. Try ablations by removing components and quantify differences observed. Push and analyze limits of model capacity using larger data. These tests further validate understanding and highlight subtleties.

In summary, read critically, decompose implementations into verifiable steps, validate incrementally against paper, and test rigorously. This systematic process can reliably translate research ideas into reproducible code."""

prompt_post = ""
prompt_end = "<code>"

prompt_template = human_tag + prompt_persona + prompt_instructions + prompt_details + robot_tag + prompt_post + prompt_end

print(len(prompt_template))
print(len(prompt_template.split()))

papers/2312.00752.txt
2697
392


In [7]:
responses = []
for i, fp in enumerate(filepaths):
    with open(fp, 'r') as file:
        paper = str(file.read())
    prompt_full = prompt_template.replace("INPUT_PAPER", paper)
    
    payload['prompt'] = prompt_full
    response = query_endpoint(payload)
    responses.append(response)
    print(i+1)

1
2
3
4
5
6


In [10]:
print(responses[0][0])


import torch
import torch.nn as nn
import math

class SelectiveSSMLayer(nn.Module):
    """
    Selective State Space Model (S6) Layer
    Implements the core algorithm from Section 3.3
    
    Key Components:
    - Input-dependent SSM parameters A, B, C 
    - Efficient parallel scan implementation with kernel fusion and recomputation
    
    Args:
        input_size: Input dimension
        state_size: Latent SSM state dimension
        discretize_fn: Discretization function 
        activation_fn: Activation function for A parameter
        
    """
    
    def __init__(self, input_size, state_size, 
                 discretize_fn, activation_fn):
        super().__init__()
        
        self.input_proj = nn.Linear(input_size, state_size) 
        self.state_proj = nn.Linear(input_size, state_size)
        self.output_proj = nn.Linear(state_size, input_size)
        
        self.discretize_fn = discretize_fn
        self.activation_fn = activation_fn
        
        self.re

In [11]:
print(responses[1][0])



# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from cvxpy import *

# Set random seed for reproducibility
np.random.seed(0)

# Generate a random frame in R^2 with M vectors
N = 2
M = 7  
Phi = np.random.randn(N, M) 
Phi /= np.linalg.norm(Phi, axis=0) # Normalize columns to unit norm

# Define frame analysis operator
def analysis_op(Phi):
    return Phi.T @ Phi

# Objective function to minimize 
u = Variable(M)
objective = Minimize(sum_entries(u))

# Constraints 
constraints = [analysis_op(diag(u) * Phi) == Identity(N), 
               norm(u,1) == 1,
               u >= 0]

# Solve optimization problem
prob = Problem(objective, constraints)
result = prob.solve()

# Extract scaling values  
scaling_vals = np.sqrt(u.value)  

# Plot original and scaled frames
fig, ax = plt.subplots()
ax.scatter(Phi[0,:], Phi[1,:], color='blue')  
ax.scatter(diag(scaling_vals) @ Phi[0,:], 
           diag(scaling_vals) @ Phi[1,:], color='red')
ax.set_aspect('equal')
ax.s

In [12]:
print(responses[2][0])


import numpy as np
import random

# Game of 24 Task
# Input is 4 numbers, output is an equation that uses all numbers to equal 24 
def game_of_24(numbers):

    numbers = sorted(numbers)
    
    # Thought generator - generate possible intermediate equation steps 
    def generate_thought(state):
        left_numbers = state[0]
        thoughts = []
        
        for i in range(len(left_numbers)-1):
            n1 = left_numbers[i]
            for j in range(i+1, len(left_numbers)):
                n2 = left_numbers[j]
                
                # Try all pairs of numbers
                thoughts.append(f"{n1}+{n2}=") 
                thoughts.append(f"{n1}-{n2}=")
                thoughts.append(f"{n1}*{n2}=")
                if n2 != 0:
                    thoughts.append(f"{n1}/{n2}=")
                    
        random.shuffle(thoughts)
        
        return thoughts[:5] # Top 5 options
    
    # State evaluator 
    def evaluate_state(state):
        left_numbers = s

In [14]:
print(responses[3][0])


import torch
import torch.nn as nn
from torch.nn import functional as F

# Implementation of the LoRA (Low-Rank Adaptation) approach
# for efficiently adapting large pre-trained language models 
# to downstream tasks

class LoRA(nn.Module):
    def __init__(self, pretrained_model, rank):
        super().__init__()
        self.pretrained_model = pretrained_model
        self.rank = rank
        
        # Freeze parameters in pretrained model
        for param in self.pretrained_model.parameters():
            param.requires_grad = False
            
        # Add LoRA layers        
        self.lora_layers = nn.ModuleList()
        for layer in self.pretrained_model.transformer.h:
            self.lora_layers.append(
                LoRALayer(layer.attn.in_proj_weight, self.rank))
            
    def forward(self, x):
        # Forward pass through pretrained model
        x = self.pretrained_model(x)
        
        # Forward pass through each LoRA layer
        for lora_layer in

In [15]:
print(responses[4][0])


import torch
import torch.nn as nn
import torch.optim as optim

"""
Implementation of ReLoRA training algorithm from paper: 
"ReLoRA: High-Rank Training Through Low-Rank Updates"

Key concepts:
- Replace linear layers in model with Low Rank Adaptation (LoRA) layers 
- LoRA layers decompose weight matrix into low rank A and B matrices
- Periodically merge A and B into original weight matrix W 
- Reset optimizer momentum and LR schedule at merge points
- Over multiple merge cycles, aggregate low rank updates into high rank

Advantages:
- Memory efficient - fewer trainable parameters
- Faster training - improved hardware efficiency

Results:
- Matches performance of full rank training
- Saves GPU memory and improves training speed
"""

class LoRA(nn.Module):
    """
    Low Rank Adaptation linear layer.
    
    Decomposes weight matrix into low rank A and B 
    with only B being trainable.
    """
    def __init__(self, in_features, out_features, rank):
        super().__init__()
     

In [16]:
print(responses[5][0])



import re
import difflib
import spacy
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Load gendered word lookup tables from paper
pronouns = {
    "he":"they", 
    "she":"they",
    "his":"their", 
    "her":"their",
    ...
}

nouns = {
    "chairman":"chairperson",
    "congressman":"member of congress",
    ... 
}

# Load and preprocess parallel data
dataset = load_dataset("oscar", split="train")  
dataset = filter_and_deduplicate(dataset)

def tokenize(example):
    return tokenizer(example["text"])

tokenizer = AutoTokenizer.from_pretrained("t5-base")  
dataset = dataset.map(tokenize)

# Create training examples with rule-based augmentation
def forward_augment(example):
    text = back_translate(example["text"]) 
    for k,v in pronouns.items():
        text = text.replace(k,v) 
    for k,v in nouns.items():
        text = text.replace(k,v)
    example["text"] = text
    return example

augmented_dataset = dataset.map(forward_a

In [17]:
human_tag, robot_tag = get_model_tags(endpoint)

prompt_persona = "You are an AI Researcher whose goal is to analyze articles and produce robust and understandable Python code examples.\n"

prompt_instructions = """Read the following research paper and implement a well formatted and well documented code example,
that impliments the models and/or algorithms precented in the paper. This can use sample data, but the explanation should be clear in the code:
\nPaper:<paper>INPUT_PAPER</paper>\n\nFrom the above paper, implement a well formatted and well documented code example in <code></code> tags,
that impliments the models and/or algorithms precented in the paper. This can use sample data, but the explanation should be clear in the code.\n"""

prompt_details = """Follow these steps when writing code:
1. Read the paper thoroughly and make sure you understand the key concepts. Don't just skim it and try to code. Take notes, draw diagrams, explain ideas out loud to yourself or others. Comprehension is key before attempting implementation.
2. Identify the key components that need to be coded - the models, algorithms, data preprocessing steps, etc. Break the implementation down into smaller modular pieces.
3. Find or create appropriate data to test the models on. Many papers include links to data or code repositories used. If not, try to create or find representative sample data.
4. Start by hard-coding and verifying the simplest pieces first. For example, if implementing a complex transformer model, first recreate the multi-headed self-attention mechanism and test it independently before adding other components.
5. Adhere to sound coding principles - modularize code into functions and classes, add comments explaining the purpose of areas of code, use descriptive variable names matching paper terminology. These practices will help ensure clear understanding.
6. Visualize and print intermediate outputs to check that data transforms, model internal states, etc are as expected per paper descriptions to methodically validate implementation components.  
7. Once pieces are individually working, incrementally connect and test them together until the full model or algorithm is constructed. Confirm outputs match expected behavior from paper either mathematically or qualitatively at each step.
8. Write tests and experiment with model variations. Try ablations by removing components and quantify differences observed. Push and analyze limits of model capacity using larger data. These tests further validate understanding and highlight subtleties.

In summary, read critically, decompose implementations into verifiable steps, validate incrementally against paper, and test rigorously. This systematic process can reliably translate research ideas into reproducible code."""

prompt_post = ""
prompt_end = ""

prompt_template = human_tag + prompt_persona + prompt_instructions + prompt_details + robot_tag + prompt_post + prompt_end

print(len(prompt_template))
print(len(prompt_template.split()))

2691
392


In [18]:
responses_free = []
for i, fp in enumerate(filepaths):
    with open(fp, 'r') as file:
        paper = str(file.read())
    prompt_full = prompt_template.replace("INPUT_PAPER", paper)
    
    payload['prompt'] = prompt_full
    response = query_endpoint(payload)
    responses_free.append(response)
    print(i+1)

An error occurred (ThrottlingException) when calling the InvokeModel operation (reached max retries: 4): Too many requests, please wait before trying again. You have sent too many requests.  Wait before trying again.
1
2
An error occurred (ThrottlingException) when calling the InvokeModel operation (reached max retries: 4): Too many requests, please wait before trying again. You have sent too many requests.  Wait before trying again.
3
4
5
6


In [19]:
print(responses_free[0][0])

 Here is an example implementation of the selective state space model (S6) architecture from the paper in PyTorch. It focuses on clearly documenting and explaining the components rather than being optimized for efficiency.

```python
import torch
import torch.nn as nn
import torch.nn.functional as F

class SelectiveSSM(nn.Module):
    """
    Selective State Space Model (S6)
    Allows input-dependent interaction along sequence length dimension
    """
    
    def __init__(self, input_size, state_size, discretize='zoh'):
        super().__init__()
        self.input_size = input_size
        self.state_size = state_size
        
        # Continuous SSM parameters  
        self.A_param = nn.Parameter(torch.randn(state_size)) 
        self.B = nn.Linear(input_size, state_size)
        self.C = nn.Linear(state_size, input_size)
        
        # Discretization rule
        if discretize.lower() == 'zoh':
            self.discretize = self._zoh
        else:
            raise ValueErro

In [20]:
print(responses_free[1][0])

 Here is an example code implementation of the scalable frames and convex optimization concepts from the paper:

```python
import numpy as np
from cvxpy import Variable, Minimize, Problem

# Frame generation
def generate_frame(n, m):
    """Generate random frame matrix Phi in R^n with m frame vectors"""
    Phi = np.random.randn(n, m) 
    Phi = Phi / np.linalg.norm(Phi, axis=0) # Normalize columns
    return Phi

# Objective functions
def linear_obj(u):
    return np.ones(len(u)) @ u

def log_barrier(u):
    t = 1e-3
    return sum(np.log(u+t))

# Scalability constraints
def scalability_constraints(Phi, u):   
    n, m = Phi.shape
    
    # Construct F(Phi) matrix
    d = int((n-1)*(n+2)/2)  
    F_phi = np.zeros((d, m))
    
    for j in range(m):
        col = np.zeros(d)
        phi_j = Phi[:, j]
        
        for k in range(n+1):
            col[k] = (phi_j @ phi_j) - 1
        
        for k in range(1, n):
            col[n+k] = phi_j[0] * phi_j[k]
            for l in range

In [21]:
print(responses_free[2][0])

 Unfortunately I do not have the capability to fully implement and test the models from the paper in this context. However, I can provide a high-level overview of the key ideas and outline some of the major components that would need to be coded up to realize the Tree of Thoughts (ToT) framework:

The main goal of ToT is to enable more systematic and deliberate exploration of reasoning paths for a language model (LM) to solve problems. This is done by representing the intermediate steps toward a solution as a tree of "thoughts", where each thought is a coherent piece of text. 

Key aspects that would need implementation:

1. **Thought Generator**: Function that can sample possible next thoughts by prompting the LM, given the current state (input + thoughts so far). Two strategies are proposed: (a) Sample thoughts independently or (b) Propose them sequentially.

```python
# Example sketch
def generate_thoughts(lm, state, num_samples):
    # Strategy (a)
    thoughts = [] 
    for i in r

In [22]:
print(responses_free[3][0])

 Here is an example Python implementation of the LoRA (Low-Rank Adaptation) method from the paper:

```python
import torch
import torch.nn as nn

class LoRA(nn.Module):
    """
    Implements Low-Rank Adaptation (LoRA) for efficient tuning of large pre-trained models.
    
    Freezes base model weights and injects trainable low-rank matrices A and B 
    to decompose weight updates in each layer.
    """
    
    def __init__(self, base_model, rank):
        """
        Args:
            base_model: Pre-trained model whose weights will be frozen
            rank: Rank of the LoRA decomposition matrices 
        """
        super().__init__()
        self.base_model = base_model
        self.rank = rank
        
        # Freeze parameters of base model
        for param in base_model.parameters():
            param.requires_grad = False
            
        # Initialize LoRA A and B matrices for each layer
        self.A_matrices = []
        self.B_matrices = []
        for layer in 

In [23]:
print(responses_free[4][0])

 Here is an example Python implementation of the ReLoRA algorithm from the paper:

```python
import torch
import torch.nn as nn
from torch.optim import Adam

class ReLoRA(nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model
        self.a_matrices = []
        self.b_matrices = []
        
        # Replace linear layers with LoRA decomposition
        for name, module in self.model.named_modules():
            if isinstance(module, nn.Linear):
                out_features, in_features = module.weight.shape
                
                a = nn.Parameter(torch.randn(out_features, rank))
                b = nn.Parameter(torch.randn(rank, in_features))
                
                self.a_matrices.append(a) 
                self.b_matrices.append(b)
                
                module.weight.requires_grad = False
                module.weight = nn.Parameter(module.weight)
                
        self.optimizer = Adam(self.parameters()

In [24]:
print(responses_free[5][0])

 Here is a Python implementation of the key models and algorithms from the paper "Exploiting Biased Models to De-bias Text: A Gender-Fair Rewriting Model":

```python
import spacy
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

# Load pre-trained translation models
en_de_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/wmt19-en-de") 
de_en_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/wmt19-de-en")

# Load tokenizers
en_tokenizer = AutoTokenizer.from_pretrained("facebook/wmt19-en-de")
de_tokenizer = AutoTokenizer.from_pretrained("facebook/wmt19-de-en")

# Example gender-biased English input text
text = "The policemen searched for a capable leader."

# Round-trip translation for pseudo German text
en_text = en_tokenizer(text, return_tensors="pt")
de_text = de_model.generate(**en_text)  
en_text2 = de_model.generate(**de_text)
rt_text = de_tokenizer.decode(de_text[0], skip_special_tokens=True)

# Merge to preserve gendered forms  
doc = spacy.load("de_core

In [25]:
human_tag, robot_tag = get_model_tags(endpoint)

prompt_persona = "You are an AI Researcher whose goal is to analyze articles and produce robust and understandable Python code examples.\n"

prompt_instructions = """Read the following research paper and implement a well formatted and well documented code example,
that impliments the models and/or algorithms precented in the paper. This should use sample data, but the explanation should be clear in the code:
\nPaper:<paper>INPUT_PAPER</paper>\n\nFrom the above paper, implement well formatted and well documented Python code,
that impliments the models and/or algorithms precented in the paper. This can use sample data, but the explanation should be clear in the code.\n"""

prompt_details = """Follow these steps when writing code:
1. Read the paper thoroughly and make sure you understand the key concepts. Don't just skim it and try to code. Take notes, draw diagrams, explain ideas out loud to yourself or others. Comprehension is key before attempting implementation.
2. Identify the key components that need to be coded - the models, algorithms, data preprocessing steps, etc. Break the implementation down into smaller modular pieces.
3. Find or create appropriate data to test the models on. Many papers include links to data or code repositories used. If not, try to create or find representative sample data.
4. Start by hard-coding and verifying the simplest pieces first. For example, if implementing a complex transformer model, first recreate the multi-headed self-attention mechanism and test it independently before adding other components.
5. Adhere to sound coding principles - modularize code into functions and classes, add comments explaining the purpose of areas of code, use descriptive variable names matching paper terminology. These practices will help ensure clear understanding.
6. Visualize and print intermediate outputs to check that data transforms, model internal states, etc are as expected per paper descriptions to methodically validate implementation components.  
7. Once pieces are individually working, incrementally connect and test them together until the full model or algorithm is constructed. Confirm outputs match expected behavior from paper either mathematically or qualitatively at each step.
8. Write tests and experiment with model variations. Try ablations by removing components and quantify differences observed. Push and analyze limits of model capacity using larger data. These tests further validate understanding and highlight subtleties.

In summary, read critically, decompose implementations into verifiable steps, validate incrementally against paper, and test rigorously. This systematic process can reliably translate research ideas into reproducible code."""

prompt_post = ""
prompt_end = "```python\n# Detailed Description:"

prompt_template = human_tag + prompt_persona + prompt_instructions + prompt_details + robot_tag + prompt_post + prompt_end

print(len(prompt_template))
print(len(prompt_template.split()))

2701
392


In [26]:
responses_free = []
for i, fp in enumerate(filepaths):
    with open(fp, 'r') as file:
        paper = str(file.read())
    prompt_full = prompt_template.replace("INPUT_PAPER", paper)
    
    payload['prompt'] = prompt_full
    response = query_endpoint(payload)
    responses_free.append(response)
    print(i+1)

1
2
3
4
5
6


In [27]:
print(responses_free[0][0])



# This code implements the core Selective State Space Model (S6) described in 
# the paper, including the efficient fused kernel for training.
# It allows the SSM parameters A, B, C to be functions of the input x, 
# making it selective and able to perform content-dependent reasoning.

# The S6 layer can flexibly be incorporated into neural network architectures.
# Here we demonstrate usage by wrapping it in the simplified "Mamba" architecture
# also described in the paper, which stacks the S6 layer with linear projections.

# For demonstration, we train the Mamba model on a small synthetic "Selective Copy"
# task that requires filtering irrelevant information. We show Mamba can solve this
# while non-selective baselines cannot.

import torch
import torch.nn as nn
import torch.nn.functional as F

# S6 Layer definition
class S6Layer(nn.Module):
    def __init__(self, input_size, state_size):
        super().__init__()
        
        # Core SSM parameters  
        self.A = nn.Parame

In [28]:
print(responses_free[1][0])


# This program implements the scalable frame algorithms described in the paper
# "On Optimal Frame Conditioners" by Chae Clark and Kasso A. Okoudjou.
# Key components implemented:
# 1. Functions to generate random frames 
# 2. Scaling algorithms: Linear Programming, Barrier, Augmented Lagrangian
# 3. Analysis of sparsity patterns and scalability testing

# Imports and Dependencies
import numpy as np
from cvxpy import Problem, Minimize, Maximize, Variable, Parameter, norm1, abs

# Frame Generation 
def generate_random_frame(N, M):
    """Generate an NxM frame matrix with gaussian random elements"""
    F = np.random.normal(size=(N,M)) 
    return F

# Objective Functions 
def linear_program_obj(F, u):
    """Linear program objective - l1 norm minimization"""
    return norm1(u)

def barrier_obj(F, u):
    """Barrier objective with log barrier epsilon term""" 
    eps = 1e-3
    return sum(np.log(u + eps))

def augmented_lagrangian_obj(F, u):
    """Augmented lagrangian objective - l2 n

In [29]:
print(responses_free[2][0])

 

This code implements the Tree of Thoughts (ToT) framework from the paper 
"Tree of Thoughts: Deliberate Problem Solving with Large Language Models".

ToT allows language models (LMs) to explore multiple reasoning paths over intermediate
"thought" steps to solve problems that require planning or search. 

It involves maintaining a tree where each node is a "state" representing the 
sequence of thoughts so far. New thoughts extend the state to child nodes. 
A state evaluator heuristic guides tree search to promising states.

This code shows an example implementation of ToT for the "Game of 24" mathematical
reasoning challenge, where the goal is to reach 24 from 4 input numbers using basic 
arithmetic operations.

The key components implemented:

1. ThoughtGenerator: Generate next possible thought (intermediate equation)
   given current state

2. StateEvaluator: Evaluate if a state's thoughts can reach 24
   Values each state as "sure", "maybe", "impossible" 
   
3. SearchManager: Exp

In [30]:
print(responses_free[3][0])


# This code implements the LoRA approach for adapting pre-trained language models 
# for downstream tasks, as presented in the paper "LORA: Low-Rank Adaptation of Large 
# Language Models".
#
# LoRA is a method for updating pre-trained model weights to tailor them for a particular task
# using low-rank decompositions of the weight updates instead of directly modifying all the weights.
# This significantly reduces the number of parameters needed to be tuned for adaptation while
# maintaining the performance.
#
# The key components implemented:
# - LowRankAdaptationModule: Custom module that replaces existing weight matrices
#   with a low rank decomposition to constrain weight updates
# - LoRAAdaptedModel: Builds a model (e.g. Transformer, BERT) with LowRankAdaptationModules 
#   inserted to replace and decompose selected weight matrices 
# - Model training loop with forward/backward passes through LoRAAdaptedModel

import torch
import torch.nn as nn
from torch.nn import functional as 

In [31]:
print(responses_free[4][0])


# The paper proposed a method called ReLoRA, which stands for Re-starting Low-Rank Updates, 
# to train high-rank neural networks efficiently. The key ideas are:

# 1. Use LoRA (Low-Rank Adaptation) to decompose a layer's weight matrix W into low rank matrices WA and WB.
# Only WA and WB are trained while W is fixed. This reduces the number of trainable parameters. 

# 2. Periodically merge WA and WB back into W through a low-rank update to W.
# Then reinitialize WA and WB. This allows accumulating higher rank updates to W over multiple rounds.

# 3. Other enhancements like warm start with initial full rank training, 
# jagged learning rates around restarts, and partial optimizer resets enable stable training.

# Below is sample code to demonstrate training a simple MLP model using ReLoRA.
# Key components implemented:

# - LoRA layer replacement of fully connected layers
# - Scheduling restarts and optimizer resets 
# - Jagged learning rates aligned with restarts
# - Tracking rank me

In [32]:
print(responses_free[5][0])



# The paper proposes two methods to train gender de-biasing models:
# 1. Backward Augmentation: Using gender-fair target text as targets and artificially 
#    biasing them to create gender-biased source texts for training
# 2. Round-trip Augmentation: Translating gender-fair target texts to a pivot  
#    language and back to create pseudo gender-biased source texts for training

# We will implement both data augmentation methods and the seq2seq models used.

# 1. Backward Augmentation
# ---------------------------

import spacy
from spacy.tokens import Doc
import random

# Create lookup dictionaries mapping gender-fair words to biased forms
# Based on tables in Appendix E of paper
gender_fair_to_biased_pronouns = {
   "they":"he",
   "their":"his",
   "them":"him",
   "theirs":"his",
   "themself":"himself"}

gender_fair_to_biased_nouns = {
   "chairperson":"chairman",
   "police officer":"policeman",
   "firefighter":"fireman"} 
   
# Rule-based function to artificially bias gende

In [57]:
human_tag, robot_tag = get_model_tags(endpoint)

prompt_persona = ""

prompt_instructions = """Read the following research paper and write a podcast to be read by a speech to text service (e.g. "This paper focuses on...").
\nPaper:<paper>INPUT_PAPER</paper>\n\nFrom the above paper, write a detailed and creative podcast to be read by a speech to text service like Amazon Polly text to speech service in <podcast-script></podcast-script> tags.
Write the podcast as a discussion between two people, the "Presenter" and "Guest". Give them separate personalities."""

prompt_details = """Follow these steps when writing the podcast:
1. Carefully read and comprehend the research paper. Make sure you fully understand the key concepts, methodology, results, and conclusions. As you read, highlight important parts and jot down questions or points you'd like to expand on.
2. Outline the key sections to cover in your podcast. For example: Background, Problem Statement, Methods, Results, Discussion, Conclusions. Under each heading, make bullet points of specifics to discuss.
3. Write a script that flows logically from one idea to the next. Introduce the topic and give relevant background first. Then explain the research questions/problems. Walk through the methodology and results. Discuss what the results mean. Wrap up with conclusions, limitations, and future work.  
4. Include conversational language, analogies and examples that make concepts clear for a general audience. For a natural language processing paper, perhaps compare word embeddings to elementary math concepts. Or analogize hidden Markov models to guessing what's in a wrapped gift box based on subtle sounds from shaking it. 
5. Read the script aloud naturally and record with your preferred text-to-speech program. Break it into logical sections - introduction, methods, etc. Add some improvised commentary for color. 
6. Edit the computer-generated speech audio file to polish, set proper pacing between sections, add faint background music.
7. Use the transcript of your script to create titles, descriptions and time-links for key sections to create a good podcast experience.
An example topic could be a paper on using neural networks for autonomous vehicle navigation. You'd explain background on self-driving cars, the specific issues in the paper, solutions and techniques the paper proposes, what results mean for the field, limitations and next steps. You could give examples people relate to, like navigating tricky scenarios on the road. The end product is an engaging, accessible podcast making the essence of the research paper clear and interesting to non-experts."""

prompt_post = ""
prompt_end = "<podcast-script>Presenter: Hi! I hope you all are doing well. The title paper of today's discussion is"

prompt_template = human_tag + prompt_persona + prompt_instructions + prompt_details + robot_tag + prompt_post + prompt_end

print(len(prompt_template))
print(len(prompt_template.split()))

2650
397


In [58]:
responses_pod1 = []
for i, fp in enumerate(filepaths):
    with open(fp, 'r') as file:
        paper = str(file.read())
    prompt_full = prompt_template.replace("INPUT_PAPER", paper)
    
    payload['prompt'] = prompt_full
    response = query_endpoint(payload)
    responses_pod1.append(response)
    print(i+1)

1
2
3
4
5
6


In [59]:
print(prompt_end + responses_pod1[0][0])

<podcast-script>Presenter: Hi! I hope you all are doing well. The title paper of today's discussion is "Mamba: Linear-Time Sequence Modeling with Selective State Spaces". To dive into the details, I have with me my colleague who is an expert in this field. Welcome! Please introduce yourself to our listeners.

Guest: Hello everyone! I'm thrilled to be here. My name is Amy and I'm a researcher in deep learning, focusing on sequence modeling which is crucial for language tasks. 

Presenter: Wonderful! As the paper title suggests, we'll be talking about an approach called Mamba that can model long sequences in linear time. But before jumping in, let's provide some background. Can you explain what sequence modeling means and why it's important?

Guest: Sure! Sequence modeling refers to machine learning techniques that can process sequential data like text, audio, video etc. where the order matters. It has become the backbone of natural language processing. For example, to predict the next w

In [60]:
print(prompt_end + responses_pod1[1][0])

<podcast-script>Presenter: Hi! I hope you all are doing well. The title paper of today's discussion is "On Optimal Frame Conditioners".

<voice emotion="excited">
Guest: Hi! Great to join you today to talk about this paper on frames and convex optimization. 
</voice>

Presenter: Wonderful! Let's start with some background. This paper focuses on the problem of converting a frame - which is a set of vectors used to represent signals - into an "optimally conditioned" tight frame by rescaling the vectors. 

<voice emotion="happy">
Guest: Right! Tight frames are like an optimal, compact basis for signal representation. The paper shows how to formalize frame rescaling as a convex optimization problem, which allows bringing powerful techniques to bear on getting nice tight frames.
</voice>

Presenter: Excellent point! The authors show how properties like sparsity of the rescaled frame can be encouraged by careful design of the optimization objective function. This is a very flexible approach.

In [61]:
print(prompt_end + responses_pod1[2][0])

<podcast-script>Presenter: Hi! I hope you all are doing well. The title paper of today's discussion is "Tree of Thoughts: Deliberate Problem Solving with Large Language Models". How are you doing today, my intelligent friend?

Guest: I'm doing quite splendid, thank you! This paper discusses some fascinating new techniques for improving reasoning capabilities in large language models. I'm keen to dive in!

Presenter: Wonderful! As artificial intelligence continues advancing rapidly, researchers are exploring how to best leverage large language models like GPT-3 and GPT-4 for complex problem solving. However, these models still rely on a simple left-to-right token prediction process during text generation. 

Guest: Precisely! And while this token-by-token approach works pretty well for many tasks, the authors argue it can fall short when more deliberative planning or search is needed to find solutions.

Presenter: Right. So to address this, the authors propose a framework called "Tree of

In [62]:
print(prompt_end + responses_pod1[3][0])

<podcast-script>Presenter: Hi! I hope you all are doing well. The title paper of today's discussion is called "LORA: Low-RANK ADAPTATION OF LARGE LAN-GUAGE MODELS". Our guest today is Dr. Smith, a leading expert in natural language processing. Welcome to the show!

Guest: Thank you for having me! I'm excited to discuss this interesting paper on adapting large language models.

Presenter: To start, can you give us a high-level overview of what this paper is about? 

Guest: Sure! As language models like GPT-3 have grown enormously in size, fine-tuning them for downstream tasks becomes very expensive. This paper proposes an efficient adaptation method called LoRA that keeps most parameters frozen and injects smaller trainable matrices.

Presenter: Interesting! Can you walk us through the key ideas?

Guest: The core idea is that weight changes during fine-tuning tend to be low-rank. So LoRA decomposes these changes into smaller factor matrices. By only training those extra matrices, it ada

In [63]:
print(prompt_end + responses_pod1[4][0])

<podcast-script>Presenter: Hi! I hope you all are doing well. The title paper of today's discussion is "ReLoRA: High-Rank Training Through Low-Rank Updates". It is very interesting research on an efficient way to train large neural networks. To help us understand this paper, I have with me Dr. Smith, an expert in machine learning. Welcome Dr. Smith!

Guest: Thank you for having me! I'm looking forward to discussing this intriguing work.

Presenter: Great! To start off, can you give us some background on why training large neural networks efficiently is an important research problem?

Guest: Sure. In recent years, we've seen massive neural networks with billions of parameters achieve impressive performance on complex tasks like language understanding and computer vision. However, training these giant models requires prohibitive amounts of computational resources that only a few organizations can access. So finding techniques to train large networks efficiently makes the powerful AI they

In [64]:
print(prompt_end + responses_pod1[5][0])

<podcast-script>Presenter: Hi! I hope you all are doing well. The title paper of today's discussion is "Exploiting biased models to de-bias text: A gender-fair rewriting model". I am very excited to know more about this topic from our guest today. Welcome to the show! Please introduce yourself.  

Guest: Hello everyone! My name is Priya and I am a postdoc researcher at the University of Zurich working on natural language processing and gender bias. Thanks so much for having me!

Presenter: No problem at all, Priya! We are very excited to know more about your work. Can you please give a brief background about the key issues surrounding gender bias in NLP models that motivated this research?

Guest: Sure! As we know, NLP models which power a lot of language technologies we use everyday like Google Translate or Alexa have been shown to perpetuate gender stereotypes and be discriminative towards certain genders. For example, we found that Google Translate often translates gender neutral Tu

In [71]:
human_tag, robot_tag = get_model_tags(endpoint)

prompt_persona = ""

prompt_instructions = """Read the following research paper and write a podcast to be read by a speech to text service (e.g. "This paper focuses on...").
\nPaper:<paper>INPUT_PAPER</paper>\n\nFrom the above paper, write a detailed and creative podcast to be read by a text to speech service like Amazon Polly in <podcast-script></podcast-script> tags.
Write the podcast as a deep discussion between two people, the "Presenter" and "Guest". Give them separate personalities. An episode of the podcast should last close to 20 minutes."""

prompt_details = """Follow these steps when writing the podcast:
1. Carefully read and comprehend the research paper. Make sure you fully understand the key concepts, methodology, results, and conclusions. As you read, highlight important parts and jot down questions or points you'd like to expand on.
2. Outline the key sections to cover in your podcast. For example: Background, Problem Statement, Methods, Results, Discussion, Conclusions. Under each heading, make bullet points of specifics to discuss.
3. Write a script that flows logically from one idea to the next, but adds enough detail to make a lengthy episode. Introduce the topic and give relevant background first. Then explain the research questions/problems. Walk through the methodology and results. Discuss what the results mean. Wrap up with conclusions, limitations, and future work.  
4. Include conversational language, analogies and examples that make concepts clear for a general audience. For a natural language processing paper, perhaps compare word embeddings to elementary math concepts. Or analogize hidden Markov models to guessing what's in a wrapped gift box based on subtle sounds from shaking it. 
5. Read the script aloud naturally and record with your preferred text-to-speech program. Break it into logical sections - introduction, methods, etc. Add some improvised commentary for color. 
6. Use the transcript of your script to create titles, descriptions and time-links for key sections to create a good podcast experience.
An example topic could be a paper on using neural networks for autonomous vehicle navigation. You'd explain background on self-driving cars, the specific issues in the paper, solutions and techniques the paper proposes, what results mean for the field, limitations and next steps. You could give examples people relate to, like navigating tricky scenarios on the road. The end product is an engaging, accessible podcast making the essence of the research paper clear and interesting to non-experts."""

prompt_post = ""
prompt_end = "<podcast-script>Presenter: Hi! I hope you all are doing well. The title paper of today's discussion is"

prompt_template = human_tag + prompt_persona + prompt_instructions + prompt_details + robot_tag + prompt_post + prompt_end

print(len(prompt_template))
print(len(prompt_template.split()))

2612
395


In [72]:
responses_pod2 = []
for i, fp in enumerate(filepaths):
    with open(fp, 'r') as file:
        paper = str(file.read())
    prompt_full = prompt_template.replace("INPUT_PAPER", paper)
    
    payload['prompt'] = prompt_full
    response = query_endpoint(payload)
    responses_pod2.append(response)
    print(i+1)

1
2
3
4
5
6


In [73]:
print(prompt_end + responses_pod2[0][0])

<podcast-script>Presenter: Hi! I hope you all are doing well. The title paper of today's discussion is 'Mamba: Linear-Time Sequence Modeling with Selective State Spaces'. Let me introduce my co-host for today's podcast, Albert. Albert is an expert in the field of machine learning and neural networks. Welcome Albert!

Guest: Thanks for having me! I'm looking forward to discussing this interesting paper on selective state space models. 

Presenter: Great! To start with, can you give us a high level overview of what this paper is about? What problem does it aim to solve?

Guest: Sure! This paper introduces a new class of models called selective state space models or S6 models. The goal is to develop sequence models that are both effective, like Transformers, and efficient - with linear scaling in sequence length.

The key insight is something the authors call a selection mechanism. This allows models to selectively focus on or ignore parts of the input sequence, compressing key informatio

In [74]:
print(prompt_end + responses_pod2[1][0])

<podcast-script>Presenter: Hi! I hope you all are doing well. The title paper of today's discussion is 'On Optimal Frame Conditioners'. 
Guest: Yes, it's an interesting paper that provides a new approach to convert non-tight frames into tight frames using optimization techniques. Shall we start with some background?

Presenter: Sure, that will help set the context. Frames are important concepts in signal processing that provide redundant representations. Tight frames are optimally conditioned frames, but converting general frames into tight frames is an open challenge. 

Guest: Exactly. This paper reformulates the tight frame conversion problem as a convex optimization problem which can be efficiently solved. They provide some elegant formulations using linear and barrier programs.

Presenter: Hmm interesting. Can you explain what they mean by scalable frames and how it connects to tight frames? I think that context will help listeners understand the crux of the paper.

Guest: Good poi

In [75]:
print(prompt_end + responses_pod2[2][0])

<podcast-script>Presenter: Hi! I hope you all are doing well. The title paper of today's discussion is "Tree of Thoughts: Deliberate Problem Solving with Large Language Models." Let me introduce my guest for today's podcast, Alex. He is an expert in AI and has been following the recent developments in large language models closely. Welcome to the show, Alex!

Guest: Thank you for having me! I'm excited to discuss this fascinating paper. 

Presenter: Before we dive deeper, can you give us a high-level overview of what this paper is about?

Guest: Sure! In a nutshell, this paper proposes a new framework called "Tree of Thoughts" or ToT that allows large language models to solve problems more deliberately through search and planning. 

You see, most language models today are trained to generate text sequentially from left to right, one token at a time. This works well for many tasks, but can fall short when more complex reasoning or exploration is needed.

The authors draw inspiration fro

In [76]:
print(prompt_end + responses_pod2[3][0])

<podcast-script>Presenter: Hi! I hope you all are doing well. The title paper of today's discussion is called LORA: Low-RANK ADAPTATION OF LARGE LAN-
GUAGE MODELS. Let us welcome our guest for today, Dr. Sarah Kline, a leading expert on natural language processing and neural network models. Welcome, Dr. Kline!

Guest: Hello! Thank you for having me on the show. I'm excited to discuss this interesting paper with you and the listeners.

Presenter: Great! So before we dive into the paper, can you give us a quick 101 on neural network language models? What are they, and why are they useful?

Guest: Sure! In a nutshell, language models are AI systems that are trained on massive amounts of text data to understand and generate human language. They power a lot of the natural language processing we see today - things like search engines, chatbots, text auto-completion on your smartphone, and even voice assistants like Siri or Alexa. 

The state-of-the-art technique is to use deep neural network

In [77]:
print(prompt_end + responses_pod2[4][0])

<podcast-script>Presenter: Hi! I hope you all are doing well. The title paper of today's discussion is "ReLoRA: High-Rank Training Through Low-Rank Updates". To discuss this paper with us today, we have a special guest with us - [insert creative guest name]. Welcome to the show!

Guest: Thank you for having me! I'm excited to discuss this interesting paper on efficient neural network training.  

Presenter: Great! Let's start by setting the context. In recent years, neural network models have grown massively in size, with models reaching hundreds of billions of parameters. However, training these giant models requires prohibitive computational resources. So the key question is - do we really need such overparameterized models? Can we train high performance models more efficiently?

Guest: Exactly! This paper introduces a method called ReLoRA that aims to do just that. It explores more parameter-efficient techniques to train large neural language models. Specifically, ReLoRA performs lo

In [78]:
print(prompt_end + responses_pod2[5][0])

<podcast-script>Presenter: Hi! I hope you all are doing well. The title paper of today's discussion is "Exploiting Biased Models to De-bias Text: A Gender-Fair Rewriting Model" by researchers at the University of Zurich and the company Textshuttle. And joining me is my esteemed guest, Dr. Amanda Smith. Welcome Dr. Smith!

Guest: Thank you for having me! I'm happy to discuss this timely topic with you.

Presenter: For our listeners, Dr. Smith is a professor of linguistics who specializes in gender and language. So let's dive in! Dr. Smith, can you give us a high-level overview of what this paper is about?

Guest: Of course! This paper focuses on how to make AI text generation systems produce less biased, more gender-fair language. The authors train models that can take an input text, like from another AI system, and rewrite it to use more inclusive pronouns and job titles. 

Presenter: Fascinating! Can you explain why that's important? Don't we want AI to just reflect the way real peopl

In [137]:
human_tag, robot_tag = get_model_tags(endpoint)

prompt_persona = ""

prompt_instructions = """Read the following research paper and write a podcast to be read by a speech to text service (e.g. "This paper focuses on...").
\nPaper:<paper>INPUT_PAPER</paper>\n\nFrom the above paper, write a detailed and creative podcast to be read by a text to speech service like Amazon Polly in <podcast-script></podcast-script> tags.
Write the podcast as a deep technical discussion and presentation of the topic. An episode of the podcast should last close to 20 minutes."""

prompt_details = """Follow these steps when writing the podcast:
1. Carefully read and comprehend the research paper. Make sure you fully understand the key concepts, methodology, results, and conclusions. As you read, highlight important parts and jot down questions or points you'd like to expand on.
2. Outline the key sections to cover in your podcast. For example: Background, Problem Statement, Methods, Results, Discussion, Conclusions. Under each heading, make bullet points of specifics to discuss.
3. Write a script that flows logically from one idea to the next, but adds enough detail to make a lengthy episode. Introduce the topic and give relevant background first. Then explain the research questions/problems. Walk through the methodology and results. Discuss what the results mean. Wrap up with conclusions, limitations, and future work.  
4. Include conversational language, analogies and examples that make concepts clear for a general audience. For a natural language processing paper, perhaps compare word embeddings to elementary math concepts. Or analogize hidden Markov models to guessing what's in a wrapped gift box based on subtle sounds from shaking it. 
5. Read the script aloud naturally and record with your preferred text-to-speech program. Break it into logical sections - introduction, methods, etc. Add some improvised commentary for color. 
6. Use the transcript of your script to create titles, descriptions and time-links for key sections to create a good podcast experience.
An example topic could be a paper on using neural networks for autonomous vehicle navigation. You'd explain background on self-driving cars, the specific issues in the paper, solutions and techniques the paper proposes, what results mean for the field, limitations and next steps. You could give examples people relate to, like navigating tricky scenarios on the road. The end product is an engaging, accessible podcast making the essence of the research paper clear and interesting to non-experts."""

prompt_post = ""
prompt_end = "<podcast-script>"

prompt_template = human_tag + prompt_persona + prompt_instructions + prompt_details + robot_tag + prompt_post + prompt_end

print(len(prompt_template))
print(len(prompt_template.split()))

2479
375


In [141]:
responses_pod3 = []
for i, fp in enumerate(filepaths):
    with open(fp, 'r') as file:
        paper = str(file.read())
    prompt_full = prompt_template.replace("INPUT_PAPER", paper)
    
    payload['prompt'] = prompt_full
    response = query_endpoint(payload)
    responses_pod3.append(response)
    print(i+1)

1
2
3
4
5
An error occurred (ThrottlingException) when calling the InvokeModel operation (reached max retries: 4): Too many requests, please wait before trying again. You have sent too many requests.  Wait before trying again.
6


In [142]:
print(prompt_end + responses_pod3[0][0])

<podcast-script>

Hello and welcome to the AI Research Review, the podcast that breaks down the latest in artificial intelligence papers into understandable bites. I'm your host, Alexa.  

Today we're discussing an exciting new paper from Carnegie Mellon University titled "Mamba: Linear-Time Sequence Modeling with Selective State Spaces." This paper introduces a novel neural network architecture called Mamba that has huge implications for large language models and other sequence-based AI systems. 

To understand why this research is so important, we first need to talk about the backbone of most modern natural language systems - the Transformer model. Transformers underpin chatbots, search engines, autocorrect on your phone - all the language smarts we take for granted these days. But they have some big flaws: Transformers scale quadratically, meaning they get exponentially slower and more memory-intensive as sequence length grows. This caps how much context they can process.

Enter Mam

In [143]:
print(prompt_end + responses_pod3[1][0])

<podcast-script>
Welcome to the Frames and Scalability Podcast! In today's episode we will be discussing a new research paper that explores how to optimize the conditioning of frames using mathematical optimization techniques. 

To start, let's talk about what frames actually are. Frames are collections of vectors in vector spaces that allow signal representations that are robust to noise and erasures. You can think of them as spanning sets for the vector space. Tight frames are optimally conditioned frames where the vectors are nicely spread out and balanced. The problem is how to take a non-tight frame and convert it into an optimally conditioned tight frame. This process is known as scalability. 

The key contribution of this new paper is reformulating the scalability problem as a convex optimization problem. Convex optimization problems have nice mathematical properties that enable efficient numerical solutions. 

The researchers try out several creative formulations for converting

In [144]:
print(prompt_end + responses_pod3[2][0])

<podcast-script>

Welcome to the AI Insights podcast, where we discuss the latest research in artificial intelligence. In this episode, we'll be talking about an exciting new paper from Princeton University and Google DeepMind titled "Tree of Thoughts: Deliberate Problem Solving with Large Language Models."

To give some background, large language models like GPT-3 and GPT-4 have shown impressive abilities to generate coherent text. However, they still struggle with complex, multi-step reasoning required for problem solving. When you prompt these models to solve a math word problem for example, they tend to make decisions in a linear, left-to-right fashion that can lead to mistakes. 

The key idea in this paper is that language models would benefit from more deliberate, planned thinking - akin to how humans consciously work through problems step-by-step. The authors introduce a framework called "Tree of Thoughts" or ToT, which allows models to explore multiple reasoning paths over poss

In [145]:
print(prompt_end + responses_pod3[3][0])

<podcast-script>

Hello and welcome to the AI Research Podcast! I'm your host Julia and today we'll be discussing an exciting new paper titled "LORA: Low-RANK ADAPTATION OF LARGE LANGUAGE MODELS."

In recent years, natural language processing has been revolutionized by large pre-trained language models like BERT, GPT-2, and GPT-3. These models are first trained on massive amounts of text data to learn general language representations. Then the models can be fine-tuned to adapt to specific downstream NLP tasks like question answering, summarization, and translation. 

The problem is that fine-tuning retrains all of the model's parameters, which presents challenges when working with enormous models like GPT-3 with 175 billion parameters. It becomes incredibly expensive computationally to have separate fine-tuned model instances for every new task. 

This paper proposes an efficient alternative called Low-Rank Adaptation or LoRA. The key insight is that the changes to the weights during f

In [146]:
print(prompt_end + responses_pod3[4][0])

<podcast-script>

Hello and welcome to the AI Research Review podcast! I'm your host, Alexa. In this episode, we'll be discussing the paper "ReLoRA: High-Rank Training Through Low-Rank Updates" by Lialin et al. This paper proposes a novel method to train large neural networks more efficiently. 

To set the context, over the past decade we've seen the trend of simply scaling up neural networks to be bigger and bigger, with models now reaching hundreds of billions of parameters. But the computational costs of training these giant models is getting prohibitively expensive for most researchers. So there's a push to find more efficient training techniques.

The core question this paper tries to address is: can we train high quality, large neural networks without actually having to update all the parameters at once during training? Their proposal is a method called ReLoRA. 

Let's break down the key idea in ReLoRA with an analogy. Say you're training a neural network with a billion parameter

In [147]:
print(prompt_end + responses_pod3[5][0])

<podcast-script>

Welcome to the NLP Podcast! I'm your host Alex and today I'll be summarizing and discussing a fascinating new research paper on reducing gender bias in natural language generation models. 

In recent years, AI systems like chatbots, voice assistants, and text generators have grown incredibly sophisticated thanks to advances in natural language processing. However these systems can often reflect or amplify harmful gender stereotypes that exist in the training data or algorithms. The authors of this paper developed new methods to mitigate gender bias in text generation. 

Let's start with some background. AI systems are trained on massive datasets scraped from the internet, which contain all kinds of human biases around gender, race, religion and more. Models then learn to reproduce those biases. For example if a system sees the word "doctor" paired more often with "he" than "she" in the training data, it will continue associating doctor with males.

The authors focus s

In [156]:
human_tag, robot_tag = get_model_tags(endpoint)

prompt_persona = ""

prompt_instructions = """Read the following podcast script and write an extended podcast to be read by a speech to text service (e.g. "This paper focuses on...").
\nOriginal Paper:<paper>INPUT_PAPER</paper>
\nScript:<script>INPUT_SCRIPT</script>\n
From the above script, write a detailed and deeply technical and creative extended podcast that gives a lot more detail and lots of explanatory examples to make the topic clear.
Use <podcast-script></podcast-script> tags to enclose the script and optimize the script for Amazon polly text-to-speech service.
An episode of the podcast should last close to 30 minutes."""

prompt_details = """Follow these steps when writing the podcast:
1. Identify the key topics and themes in the short script. Make a bullet point outline of the main points. This will serve as the framework to build upon.
2. For each main point, brainstorm additional details, examples, analogies, and explanatory content you could add to expand and illustrate that point. Don't censor yourself - write down any ideas that come to mind.
3. Research the topic more deeply to uncover additional interesting information, historical context, relevant statistics and facts, opposing viewpoints, etc. that would make the discussion more multi-faceted. 
4. Weave in thought questions and prompts to get the listener reflecting more deeply. Some examples:
   - "Have you ever experienced a situation where this machine learning principle came into play?" 
   - "What other examples can you think of that illustrate this concept?"
   - "How might things be different if this key assumption was changed?"
5. Turn the main points into full scenes and narratives. For example, for a point about bias in data sets, you could describe a fictional scenario following a machine learning engineer working with a flawed data set and how it impacts their model results and real-world performance.
6. Incorporate dialogues - either real back-and-forth conversational elements or dramatizations of hypothetical conversations that might unfold in certain scenarios you describe. This adds flavor and interest.
7. Write transitions connecting the different sections and reorganize the content flow for the best listener experience - revealing information gradually to build curiosity and understanding.
The key is to take a focused short script and use your creativity, knowledge, and research to significantly expand each piece of the framework while keeping it entertaining and easy to comprehend.
Include concrete examples, thoughtful prompting, and even fun dramatizations to bring the content to life for your podcast audience."""

prompt_post = ""
prompt_end = "<podcast-script>"

prompt_template = human_tag + prompt_persona + prompt_instructions + prompt_details + robot_tag + prompt_post + prompt_end

print(len(prompt_template))
print(len(prompt_template.split()))

2609
387


In [157]:
responses_pod_exp1 = []
for fp, script in zip(filepaths[:1], responses_pod3[:1]):
    script = script[0].split('</podcast-script>')[0]
    
    with open(fp, 'r') as file:
        paper = str(file.read())
    prompt_full = prompt_template.replace("INPUT_PAPER", paper)
    prompt_full = prompt_full.replace("INPUT_SCRIPT", script)
    
    payload['prompt'] = prompt_full
    response = query_endpoint(payload)
    responses_pod_exp1.append(response)
    print(fp)

papers/2312.00752.txt


In [158]:
print(prompt_end + responses_pod_exp1[0][0])

<podcast-script>

Hello again my curious AI listeners! I'm Alexa and welcome back to the AI Research Review. Last time, we explored an exciting new neural network architecture called Mamba that promises to revolutionize sequence modeling. 

Today I want to go much deeper on how Mamba works its magic under the hood. Understanding the mechanics can help you grasp the monumental leap this represents over models like Transformers. I'll use plenty of relatable metaphors and examples to get these complex concepts across. Think of me as your friendly AI tour guide! 

Let's start by time traveling back to the 1950s when researchers first modeled language statistically. They realized words followed predictable sequences - if you saw "peanut butter and" the next word was often "jelly." But those models only looked at the last word or two. Our human experience tells us language depends enormously on context.

So imagine you just heard a speech mentioning Martin Luther King. Now if I say "I have a

In [159]:
def call_polly(client, text):
    def remove_xml_tags(text):
        cleaned_text = ""  
        xml_tag = re.compile(r"<[^>]*>")
        fragments = xml_tag.split(text)
        for fragment in fragments:
            if not re.match("<[^>]*>", fragment):
                cleaned_text += fragment
        return cleaned_text

    try:
        response = client.synthesize_speech(Engine='neural', OutputFormat='mp3', Text=text, TextType='ssml', VoiceId='Joanna')
    except:
        try:
            text = remove_xml_tags(text).replace('&','&amp;').replace("'","&apos;").replace('"','&quot;').replace("<","&lt;").replace(">","&gt;")
            response = client.synthesize_speech(Engine='neural', OutputFormat='mp3', Text='<speak><p>'+text+'</p></speak>', TextType='ssml', VoiceId='Joanna')
        except:
            response = client.synthesize_speech(Engine='neural', OutputFormat='mp3', Text='<speak><p></p></speak>', TextType='ssml', VoiceId='Joanna')
    return response


import boto3
polly = boto3.client('polly')

script = responses_pod_exp1[0][0].split("</podcast-script>")[0]
script = ['<speak><p>'+x.strip()+'</p></speak>' for x in script.split('\n') if len(x.replace('\n','')) > 0]
print(script)


audio_stream = None
for line in script:
    audio = call_polly(polly, line)
    if audio_stream is None:
        audio_stream = audio['AudioStream'].read()
    else:
        audio_stream += audio['AudioStream'].read()

with open('speech.mp3', 'wb') as out:
    out.write(audio_stream)

["<speak><p>Hello again my curious AI listeners! I'm Alexa and welcome back to the AI Research Review. Last time, we explored an exciting new neural network architecture called Mamba that promises to revolutionize sequence modeling.</p></speak>", "<speak><p>Today I want to go much deeper on how Mamba works its magic under the hood. Understanding the mechanics can help you grasp the monumental leap this represents over models like Transformers. I'll use plenty of relatable metaphors and examples to get these complex concepts across. Think of me as your friendly AI tour guide!</p></speak>", '<speak><p>Let\'s start by time traveling back to the 1950s when researchers first modeled language statistically. They realized words followed predictable sequences - if you saw "peanut butter and" the next word was often "jelly." But those models only looked at the last word or two. Our human experience tells us language depends enormously on context.</p></speak>', '<speak><p>So imagine you just hea