# Arxiv reference parser

Imports

In [1]:
#imports to load model
from random import randrange
from transformers import AutoTokenizer, set_seed, pipeline,BitsAndBytesConfig, LlamaForCausalLM, LlamaTokenizer, GenerationConfig,LlamaConfig,LlamaModel,AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling, BitsAndBytesConfig 
import torch
import bitsandbytes as bnb
import os
import json

#imports for langchain functionalities
from langchain import PromptTemplate, LLMChain
from langchain.llms import HuggingFacePipeline
from langchain.tools import BaseTool
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.chains import RetrievalQA
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
import transformers

from langchain.chains import ConversationChain, ConversationalRetrievalChain, SequentialChain
from langchain.memory import ConversationBufferMemory, ReadOnlySharedMemory
from langchain.agents import ZeroShotAgent, AgentExecutor
from langchain.prompts import PromptTemplate

## Loading llama-2 7b

BitsAndBytes configuration

In [3]:
def create_bnb_config():
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
    )

    return bnb_config

Load model

In [4]:
def load_model(model_name, bnb_config):
    n_gpus = torch.cuda.device_count()
    max_memory = f'{40960}MB' #TODO Change if necessary

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto", 
        max_memory = {i: max_memory for i in range(n_gpus)},
    )
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)

    tokenizer.pad_token = tokenizer.eos_token

    return model, tokenizer

Model ids and bnb config

In [5]:
model_id_normal = 'meta-llama/Llama-2-7b'
model_id_normal_hf = 'meta-llama/Llama-2-7b-hf'
model_id_chat = 'meta-llama/Llama-2-7b-chat'
model_id_chat_hf = 'meta-llama/Llama-2-7b-chat-hf'

bnb_config = create_bnb_config()

In [6]:
model, tokenizer = load_model(model_id_chat_hf, bnb_config)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



### Testing inference on the LLM

In [10]:
model.eval()

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear4bit(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm

In [7]:
generate_text = transformers.pipeline(
    model=model, tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    #stopping_criteria=stopping_criteria,  # without this model rambles during chat
    temperature=0.01,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=1024,  # mex number of tokens to generate in the output
    repetition_penalty=1.1  # without this output begins repeating
)

In [8]:
llm = HuggingFacePipeline(pipeline=generate_text)

In [20]:
llm.predict('what is the capital of Spain?')

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'\n Unterscheidung zwischen "Spain" und "Spanish"\n\nThe capital of Spain is Madrid.\n\nIt\'s important to note that "Spain" refers to the country as a whole, while "Spanish" can refer to either the language or the people from Spain. So, for example:\n\n* "Spain is a beautiful country with a rich culture." (Here, "Spain" refers to the country.)\n* "I love speaking Spanish with my friends." (Here, "Spanish" refers to the language.)\n* "My grandparents are from Spain, so I have Spanish ancestry." (Here, "Spanish" refers to the people from Spain.)\n\nSo, to answer your question, the capital of Spain is Madrid.'

## Reference parsing using LLM

### Jsonformer (impose a strict json structure)

In [9]:
!pip install jsonformer

Collecting jsonformer
  Downloading jsonformer-0.12.0-py3-none-any.whl (6.6 kB)
Collecting termcolor<3.0.0,>=2.3.0
  Downloading termcolor-2.3.0-py3-none-any.whl (6.9 kB)
Installing collected packages: termcolor, jsonformer
Successfully installed jsonformer-0.12.0 termcolor-2.3.0


In [18]:
from jsonformer.format import highlight_values
from jsonformer.main import Jsonformer

In [36]:
test = {
    "type": "object",
    "properties": {
        "references": {
            "type": "object",
            "properties": {
                "ref_id": {"type": "string"},
                "title": {"type": "string"},
                "author": {"type": "string"},
                "year": {"type": "string"},
            }
        },
    }
}

In [32]:
builder = Jsonformer(
    model=model,
    tokenizer=tokenizer,
    json_schema=test,
    prompt='''
Can you parse all the title, author and year of publications of these references?

Allen, T. D., & Rush, M. C. (2001). The influence of ratee gender 
on ratings of organizational citizenship behavior. Journal of 63
HWI, Servant Leadership, OCB and CWB in Italy
Applied Social Psychology, 31 (12), 2561-2587.  https://doi.
org/10.1111/j.1559-1816.2001.tb00191.x  
Andreassen, C. S., Nielsen, M. B., Pallesen, S., & Gjerstad, J. (2019). The 
relationship between psychosocial work variables and workaholism: 
Findings from a nationally representative survey. International Journal 
of Stress Management, 26 (1), 1-10. https://doi.org/10.1037/str0000073   
Aziz, S., Pittman, C., & Wuensch, K. (2020). Workaholism and organizational 
citizenship behaviors: Exploring gender role beliefs.  International 
Journal of Workplace Health Management, 13 (4), 413-425. https://doi.
org/10.1108/IJWHM-06-2019-0089  
Bakker, A. B., & Bal, P. M. (2010). Weekly work engagement and performance: A 
study among starting teachers. Journal of Occupational and Organizational 
Psychology, 83 (1), 189-206. https://doi.org/10.1348/096317909X402596  
Balducci, C., Cecchin, M., Fraccaroli, F., & Schaufeli, W. B. (2012). Exploring the 
relationship between workaholism and workplace aggressive behaviour: 
The role of job-related emotion.  Personality and Individual Differences, 
53(5), 629-634. https://doi.org/10.1016/j.paid.2012.05.004  
Barbaranelli, C., Fida, R., & Gulandri, M. (2013). Assessing counterproductive 
work behavior: A study on the dimensionality of CWB-checklist . TMP-
Testing, Psychometrics, Methodology in Applied Psychology, 20 (3), 235-
248.  https://doi.org/10.4473/TPM20.3.3  
Beauregard, T.  A. (2012). Perfectionism, self-efficacy and OCB: The 
moderating role of gender. Personnel Review , 41(5), 590-608.  https://
doi.org/10.1108/00483481211249120  
Bentler, P. M., & Wu, E. J. (2005). EQS 6.1 for Windows: Structural equations 
program manual . Multivariate Software.
Birkeland, I. K., & Buch, R. (2015). The dualistic model of passion for work: 
Discriminative and predictive validity with work engagement and 
workaholism. Motivation and Emotion, 39 (3), 392-408. https://doi.
org/10.1007/s11031-014-9462-x  
Borman, W. C., & Motowidlo, S. J. (1993). Expanding the criterion domain to 
include elements of contextual performance. In N. Schmitt & W. C. Borman 
(Eds.),  Personnel selection in organizations (pp. 71–98). Jossey-Bass.
Bowling, N. A., & Eschleman, K. J. (2010). Employee personality as a moderator 
of the relationships between work stressors and counterproductive work 
behavior. Journal of Occupational Health Psychology, 15 (1), 91-103. 
https://doi.org/10.1037/a0017326
Bruk-Lee, V., & Spector, P. (2006). The social stressors-counterproductive work 
behaviors link: Are conflicts with supervisors and coworkers the same? 
Journal of Occupational Health Psychology, 11 (2), 145-156. https://doi.
org/10.1037/1076- 8998.11.2.145  
Byrne, B. M. (2010). Structural equation modeling with AMOS: Basic concepts, 
applications, and programming  (2nd ed.). Routledge.
Chappell, D., & Di Martino, V. (2006).  Violence at work (3rd ed.). International 
Labour Organization.
Choi, Y. (2013). The differences between work engagement and workaholism, 
and organizational outcomes: An integrative model. Social Behavior 
and Personality, 41 (10), 1655-1666. https://doi.org/10.2224/
sbp.2013.41.10.1655
Dalal, R. S. (2005). A meta-analysis of the relationship between organizational 
citizenship behavior and counterproductive work behavior. Journal of 
Applied Psychology, 90 (6), 1241-1255. https://doi.org/10.1037/0021-
9010.90.6.1241  
Eagly, A. H. (1987). Sex differences in social behavior: A social role 
interpretation . Erlbaum.
Eagly, A., Karau, S. J., & Makajhani, M. G. (1995). Gender and the effectiveness 
of leaders: A meta-analysis. Psychological Bulletin, 117 (1), 125-145.  
https://doi.org/10.1037/0033-2909.117.1.125  
Ehrhart, M. G. (2004). Leadership and procedural justice climate as 
antecedents of unit-level organizational citizenship behavior.  Personnel 
Psychology, 57 (1), 61-94.  https://doi.org/10.1111/j.1744-6570.2004.
tb02484.x  
''')

In [33]:
output = builder()

In [34]:
output

{'ref': {'title': 'Allen, T. D., & Rush',
  'author': 'Allen, T. D., & Rush',
  'year': '2001'}}

### Guidance (Library to guide LLM towards certain types of outputs)

In [35]:
!pip install guidance

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Collecting guidance
  Downloading guidance-0.0.64-py3-none-any.whl (100 kB)
[K     |████████████████████████████████| 100 kB 5.7 MB/s eta 0:00:01
Collecting pyparsing>=3.0.0
  Using cached pyparsing-3.1.1-py3-none-any.whl (103 kB)
Collecting tiktoken>=0.3
  Downloading tiktoken-0.5.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)
[K     |████████████████████████████████| 2.0 MB 12.8 MB/s eta 0:00:01
[?25hCollecting openai>=0.27.8
  Downloading openai-1.2.4-py3-none-any.whl (220 kB)
[K     |████████████████████████████████| 220 kB 129.9 MB/s eta 0:00:01
[?25hCollecting diskcache
  Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)
[K     |████████████████████████████████| 45 kB 2.9 

In [36]:
import guidance

start to install package: redis
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
successfully installed package: redis
start to install package: redis-om
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
successfully installed package: redis-om


In [39]:
guidance.llm = guidance.llms.Transformers(model,tokenizer=tokenizer)

In [41]:
# define the prompt
program = guidance("""Given a text containing many references, re-organise each of them in a json format that contains their reference id, paper title, authors and year. 
----
```json
{
    "ref_id": "{{reference id}}"
    "title": "{{paper title}}",
    "authors": "{{authors}}",
    "year": "{{year}}",
```""")



In [42]:
examples = [
    {'input': "Balducci, C., Cecchin, M., Fraccaroli, F., & Schaufeli, W. B. (2012). Exploring the relationship between workaholism and workplace aggressive behaviour: The role of job-related emotion.  Personality and Individual Differences, 53(5), 629-634. https://doi.org/10.1016/j.paid.2012.05.004, Aziz, S., Pittman, C., & Wuensch, K. (2020). Workaholism and organizational citizenship behaviors: Exploring gender role beliefs.  International Journal of Workplace Health Management, 13 (4), 413-425. https://doi.org/10.1108/IJWHM-06-2019-0089  Bakker, A. B., & Bal, P. M. (2010). Weekly work engagement and performance: A study among starting teachers. Journal of Occupational and Organizational Psychology, 83 (1), 189-206. https://doi.org/10.1348/096317909X402596  Balducci, C., Cecchin, M., Fraccaroli, F., & Schaufeli, W. B. (2012). Exploring the relationship between workaholism and workplace aggressive behaviour: The role of job-related emotion.  Personality and Individual Differences, 53(5), 629-634. https://doi.org/10.1016/j.paid.2012.05.004  Barbaranelli, C., Fida, R., & Gulandri, M. (2013). Assessing counterproductive work behavior: A study on the dimensionality of CWB-checklist . TMP-Testing, Psychometrics, Methodology in Applied Psychology, 20 (3), 235-248.  https://doi.org/10.4473/TPM20.3.3"},
     {
         "ref_id": 'id1',
         "title": "Exploring the relationship between workaholism and workplace aggressive behaviour: The role of job-related emotion.  Personality and Individual Differences",
         "authors": "Balducci, C., Cecchin, M., Fraccaroli, F., & Schaufeli, W. B.",
         "year": "2012"
     },
     {
         "ref_id": 'id2',
         "title": "Workaholism and organizational citizenship behaviors: Exploring gender role beliefs.",
         "authors": "Aziz, S., Pittman, C., & Wuensch, K.",
         "year": "2020"
     },
     {
         'more references ...'
     }
]

In [43]:
# execute the prompt
out = program(input='''Giannini, M., & Loscalzo, Y. (2016). Workaholism: Health risk and prevention 
in the organizations. In A. di Fabio (Ed.), Neuroticism: Characteristics, 
impact on job performance and health outcomes (pp. 49-60). Nova 
Science Publishers.
Graham, J. W. (1991). Servant-leadership in organizations: Inspirational 
and moral. The Leadership Quarterly, 2 (2), 105-119.  https://doi.
org/10.1016/1048-9843(91)90025-WGreenleaf, R. K. (1977).  Servant-leadership: A journey into the nature of 
legitimate power and greatness . Paulist Press.
Gruys, M. L., & Sackett, P. R. (2003). Investigating the dimensionality of 
counterproductive work behavior.  International Journal of Selection 
and Assessment, 11 (1), 30-41. https://doi.org/10.1111/1468-2389.00224
Heymans, M. W., & Eekhout, I. (2019). Applied missing data analysis with 
SPSS and ®Studio. https://bookdown.org/mwheymans/bookmi
Hofstede, G. (1980). Culture’s consequences: International differences in 
work-related values.  SAGE.
Hofstede, G. (1991). Cultures and organization: Software of the mind. 
McGraw-Hill.
Hu, L. T., & Bentler, P. M. (1999). Cut-off criteria for fit indexes in covariance 
structure analysis: Conventional criteria versus new alternatives. 
Structural Equation Modeling: A Multidisciplinary Journal, 6 (1), 1-55. 
https://doi.org/10.1080/10705519909540118
James, L. R., Mulaik, S. A., & Brett, J. M. (1982). Conditions for confirmatory 
analysis and causal inference. SAGE.''', examples=examples)

In [47]:
out["authors"]

KeyError: 'authors'

#### note on Guidance:

* Promising library for various llm projects such as generating synthetic data
* Could not make it work to force a json structure
* Need to do more research on it

## Using custom prompts templates and chains to structure references

### template 1: 

In [17]:
template = """You are a master PDF reader and when given a set of references you
    always extract the most important information of the papers. For example, when
    you were given the following references:

    Lei Jimmy Ba, Jamie Ryan Kiros, and Geoffrey E.
    Hinton. 2016. Layer normalization. CoRR ,
    abs/1607.06450.
    Eyal Ben-David, Nadav Oved, and Roi Reichart.
    2021. PADA: A prompt-based autoregressive ap-
    proach for adaptation to unseen domains. CoRR ,
    abs/2102.12206.
    Tom B. Brown, Benjamin Mann, Nick Ryder, Melanie
    Subbiah, Jared Kaplan, Prafulla Dhariwal, Arvind
    Neelakantan, Pranav Shyam, Girish Sastry, Amanda
    Askell, Sandhini Agarwal, Ariel Herbert-V oss,
    Gretchen Krueger, Tom Henighan, Rewon Child,
    Aditya Ramesh, Daniel M. Ziegler, Jeffrey Wu,
    Clemens Winter, Christopher Hesse, Mark Chen,
    Eric Sigler, Mateusz Litwin, Scott Gray, Benjamin
    Chess, Jack Clark, Christopher Berner, Sam Mc-
    Candlish, Alec Radford, Ilya Sutskever, and Dario
    Amodei. 2020. Language models are few-shot learn-
    ers. In Advances in Neural Information Processing
    Systems 33: Annual Conference on Neural Informa-
    tion Processing Systems 2020, NeurIPS 2020, De-
    cember 6-12, 2020, virtual .

    You extract the following:

    Layer normalization | Lei Jimmy Ba, Jamie Ryan Kiros, Geoffrey E. Hinton | 2016
    PADA: A prompt-based autoregressive approach for adaptation to unseen domains | Eyal Ben-David, Nadav Oved, Roi Reichart
    Language models are few-shot learners | Tom B. Brown, et al. | 2020

    Here is the chat history: {chat_history}
    In the References below there are many papers. Extract their titles, authors, and years.

    References: {input}

    Extracted:
    """

prompt2 = PromptTemplate(
    input_variables=["chat_history","input"],template=template)
memory = ConversationBufferMemory(memory_key="chat_history")

chain2 = ConversationChain(
    prompt=prompt2,
    llm=llm,
    memory=memory,
    verbose=True
)

#### Did not output the desired format, and is very sensible to the data inputted

#### Template 2

In [94]:
from string import Template

template = """
AI should always respond with references in a structured JSON format.
It should not pretend to be human and should only include paper's title, authors, and year.
For multiple references, create a list under 'references' with each reference as an item.
AI should respond with 'I don't know mate!' if it doesn't know the answer.
The template should use the current conversation context and the user's input.

Always answer by saying first 'Here are the references :)'. Also, always answer in json format like this:

human: Can you format these references: "Balducci, C., Cecchin, M., Fraccaroli, F., & Schaufeli, W. B. (2012). Exploring the relationship between workaholism and workplace aggressive behaviour: The role of job-related emotion.  Personality and Individual Differences, 53(5), 629-634. https://doi.org/10.1016/j.paid.2012.05.004"?
AI: ```json
{{
  "message": "Here are the references :)",
  "references": [
    {{
      "ref_id": "ref1",
      "title": "Title of the first paper",
      "authors": ["Author1", "Author2"],
      "year": 2012
    }},
    {{
      "ref_id": "ref2",
      "title": "Title of the second paper",
      "authors": ["Author3", "Author4"],
      "year": 2013
    }}
  ]
}}```

Only use the paper's title, author and year, the rest of the information is irrelevant. If there are more than one reference then you need to create n reference where n is the number of reference refn+1 
Never forget, AI does not ask questions or pretend to be human, AI or anything else than AI. AI simply answer the input as truthfully as possible. If AI doesn't know the answer he says: I don't know mate!
The current conversation:
{chat_history}
Human: {input}
AI:"""

#template = Template(template)
#processed_string = template.substitute()

prompt = PromptTemplate(
    input_variables=["chat_history", "input"],template=template)
memory = ConversationBufferMemory(memory_key="chat_history")

#better example
#more concrete exmaple of context and what it does. 
#

#### One of the most promising template, it does output in the 'right' json format but in a String. Also it isn't very reliable

In [None]:
template = """
AI should always respond with references in a structured JSON format.
It should not pretend to be human and should only include paper's title, authors, and year.
For multiple references, create a list under 'references' with each reference as an item.
AI should respond with 'I don't know mate!' if it doesn't know the answer.
The template should use the current conversation context and the user's input.

Always answer by saying first 'Here are the references :)'. Also, always answer in json format like this:

human: Can you format these references: "Balducci, C., Cecchin, M., Fraccaroli, F., & Schaufeli, W. B. (2012). Exploring the relationship between workaholism and workplace aggressive behaviour: The role of job-related emotion.  Personality and Individual Differences, 53(5), 629-634. https://doi.org/10.1016/j.paid.2012.05.004"?
AI: ```json
{{
  "message": "Here are the references :)",
  "references": [
    {{
      "ref_id": "ref1",
      "title": "Title of the first paper",
      "authors": ["Author1", "Author2"],
      "year": 2012
    }},
    {{
      "ref_id": "ref2",
      "title": "Title of the second paper",
      "authors": ["Author3", "Author4"],
      "year": 2013
    }}
  ]
}}```

Only use the paper's title, author and year, the rest of the information is irrelevant. If there are more than one reference then you need to create n reference where n is the number of reference refn+1 
Never forget, AI does not ask questions or pretend to be human, AI or anything else than AI. AI simply answer the input as truthfully as possible. If AI doesn't know the answer he says: I don't know mate!
The current conversation:
{chat_history}
Human: {input}
AI:"""


In [13]:
ref_list_template = '''
You are an expert at putting scientific references into lists. 
You receive as input some unsctructured text containing many references consisting of various information such as paper title, authors, year of publication, journal and DOI for example.

YOUR TASK:
You should output a nested list where inside the main list there are many nested list containing the references with their paper title, authors and year of publication. 

For example if the human sends this input: """
Choi, Y. (2013). The differences between work engagement and workaholism, 
and organizational outcomes: An integrative model. Social Behavior 
and Personality, 41 (10), 1655-1666. https://doi.org/10.2224/
sbp.2013.41.10.1655
Dalal, R. S. (2005). A meta-analysis of the relationship between organizational 
citizenship behavior and counterproductive work behavior. Journal of 
Applied Psychology, 90 (6), 1241-1255. https://doi.org/10.1037/0021-
9010.90.6.1241  
  """

The output should look like the following:
[['The differences between work engagement and workaholism, and organizational outcomes: An integrative model. Social Behavior and Personality', 'Choi, Y.', '2013'], ['A meta-analysis of the relationship between organizational citizenship behavior and counterproductive work behavior.', 'Dalal, R. S.', '2005']]

Also note that there can be more than 2 references and the structure of the references can vary.
Here is the current conversation: {chat_history}
Here is the human: {input}
'''
prompt = PromptTemplate(
    input_variables=["chat_history", "input"],template=ref_list_template)
memory = ConversationBufferMemory(memory_key="chat_history")

list_chain = ConversationChain(
    prompt=prompt,
    llm=llm,
    memory=memory,
    verbose=True
)


In [14]:
list_chain.run("""Endriulaitien , A., & Morkevi it, M. (2020). The unintended effect of 
perceived transformational leadership style on workaholism: The 
mediating role of work motivation. The Journal of Psychology, 154 (6), 
446-465. https://doi.org/10.1080/00223980.2020.1776203  
Farrell, S. K., & Finkelstein, L. M. (2007). Organizational citizenship behavior 
and gender: Expectations and attributions for performance. North 
American Journal of Psychology, 9 (1), 81-96.
Fida, R., Paciello, M., Barbaranelli, C., Tramontano, C., & Griffith Fontaine, 
R. (2014). The role of irritability in the relation between job stressors, 
emotional reactivity, and counterproductive work behaviour . European 
Journal of Work and Organizational Psychology, 23 (1), 31-47. https://doi.
org/10.1080/1359432X.2012.713550""")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.




[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
You are an expert at putting scientific references into lists. 
You receive as input some unsctructured text containing many references consisting of various information such as paper title, authors, year of publication, journal and DOI for example.

YOUR TASK:
You should output a nested list where inside the main list there are many nested list containing the references with their paper title, authors and year of publication. 

For example if the human sends this input: """
Choi, Y. (2013). The differences between work engagement and workaholism, 
and organizational outcomes: An integrative model. Social Behavior 
and Personality, 41 (10), 1655-1666. https://doi.org/10.2224/
sbp.2013.41.10.1655
Dalal, R. S. (2005). A meta-analysis of the relationship between organizational 
citizenship behavior and counterproductive work behavior. Journal of 
Applied Psychology, 90 (6), 1241-1255. https://doi.or

'Hackman, J. R. (1976). The psychology of job satisfaction. Academy of Management \nReview, 1(2), 253-262.\nHackman, J. R., & Oldham, G. R. (1976). Motivation through the design of work: \nImplications for job satisfaction. Organizational Behavior and Human \nPerformance, 16(2), 250-279.\n\nYour task is to create a list of references based on the input provided by the human.'

In [34]:
template2 = '''
Input:
List of references with details (title, authors, year, etc.):

"Example Title 1", "Author A, Author B",2021, "Example Title 2", "Author C, Author D", 2020, "Example Title 3", "Author E, Author F", 2019
...

Task:
Format the above list of references into a JSON structure with a unique ID for each reference, including the title, authors (if there are many then add them all in a list.), and year of publication the rest is not important.
Always try.

Expected Output:

{{
    "references": [
        {{
            "id": "ref1",
            "title": "Example Title 1",
            "authors": ["Author A", "Author B"],
            "year": 2021
        }},
        {{
            "id": "ref2",
            "title": "Example Title 2",
            "authors": ["Author C", "Author D"],
            "year": 2020
        }},
        {{
            "id": "ref3",
            "title": "Example Title 3",
            "authors": ["Author E", "Author F"],
            "year": 2019
        }},
        ...
    ]
}}

The current conversation is as follow: {chat_history}
Human: {input}
AI:

'''

prompt = PromptTemplate(
    input_variables=["chat_history", "input"],template=template2)
memory = ConversationBufferMemory(memory_key="chat_history")


In [35]:
chain3 = ConversationChain(
    prompt=prompt,
    llm=llm,
    memory=memory,
    verbose=True
)

In [36]:
answer = chain3.run('"Allen, T. D., & Rush, M. C. (2001). The influence of ratee gender on ratings of organizational citizenship behavior. Journal of 63 HWI, Servant Leadership, OCB and CWB in Italy Applied Social Psychology, 31 (12), 2561-2587.  https://doi.org/10.1111/j.1559-1816.2001.tb00191.x  Andreassen, C. S., Nielsen, M. B., Pallesen, S., & Gjerstad, J. (2019). The relationship between psychosocial work variables and workaholism: Findings from a nationally representative survey. International Journal of Stress Management, 26 (1), 1-10. https://doi.org/10.1037/str0000073   Aziz, S., Pittman, C., & Wuensch, K. (2020). Workaholism and organizational citizenship behaviors: Exploring gender role beliefs.  International Journal of Workplace Health Management, 13 (4), 413-425. https://doi.org/10.1108/IJWHM-06-2019-0089  Bakker, A. B., & Bal, P. M. (2010). Weekly work engagement and performance: A study among starting teachers. Journal of Occupational and Organizational Psychology, 83 (1), 189-206. https://doi.org/10.1348/096317909X402596  Balducci, C., Cecchin, M., Fraccaroli, F., & Schaufeli, W. B. (2012). Exploring the relationship between workaholism and workplace aggressive behaviour: The role of job-related emotion.  Personality and Individual Differences, 53(5), 629-634. https://doi.org/10.1016/j.paid.2012.05.004  Barbaranelli, C., Fida, R., & Gulandri, M. (2013). Assessing counterproductive work behavior: A study on the dimensionality of CWB-checklist . TMP-Testing, Psychometrics, Methodology in Applied Psychology, 20 (3), 235-248.  https://doi.org/10.4473/TPM20.3.3    "')

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.




[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Input:
List of references with details (title, authors, year, etc.):

"Example Title 1", "Author A, Author B",2021, "Example Title 2", "Author C, Author D", 2020, "Example Title 3", "Author E, Author F", 2019
...

Task:
Format the above list of references into a JSON structure with a unique ID for each reference, including the title, authors (if there are many then add them all in a list.), and year of publication the rest is not important.
Always try.

Expected Output:

{
    "references": [
        {
            "id": "ref1",
            "title": "Example Title 1",
            "authors": ["Author A", "Author B"],
            "year": 2021
        },
        {
            "id": "ref2",
            "title": "Example Title 2",
            "authors": ["Author C", "Author D"],
            "year": 2020
        },
        {
            "id": "ref3",
            "title": "Example Title 3",
            "

In [37]:
answer

'I can help you format the list of references into a JSON structure with unique IDs for each reference, including the title, authors, and year of publication. Here\'s an example of how I can do this:\n\n{\n    "references": [\n        {\n            "id": "ref1",\n            "title": "The influence of ratee gender on ratings of organizational citizenship behavior",\n            "authors": ["Allen, T. D.", "Rush, M. C."],\n            "year": 2001\n        },\n        {\n            "id": "ref2",\n            "title": "The relationship between psychosocial work variables and workaholism: Findings from a nationally representative survey",\n            "authors": ["Andreassen, C. S.", "Nielsen, M. B.", "Pallesen, S.", "Gjerstad, J."],\n            "year": 2019\n        },\n        {\n            "id": "ref3",\n            "title": "Workaholism and organizational citizenship behaviors: Exploring gender role beliefs",\n            "authors": ["Aziz, S.", "Pittman, C.", "Wuensch, K."],\n   

In [44]:
answer.index('references":')

206

In [46]:
refs = answer[206:]

In [50]:
format_ref = refs.replace('  ','').replace('\n','')

In [70]:
import re

def remove_after_last_curly_brace(input_string):
    # Find all occurrences of closing curly braces
    matches = list(re.finditer(r'}', input_string))

    # If there are no closing curly braces, return the original string
    if not matches:
        return input_string

    # Get the position of the last closing curly brace
    last_match_position = matches[-1].end()

    # Slice the string up to and including the last closing curly brace
    return input_string[:last_match_position]

In [71]:
final_ref = remove_after_last_curly_brace(format_ref)

In [72]:
final_ref

'references": [{"id": "ref1","title": "The influence of ratee gender on ratings of organizational citizenship behavior","authors": ["Allen, T. D.", "Rush, M. C."],"year": 2001},{"id": "ref2","title": "The relationship between psychosocial work variables and workaholism: Findings from a nationally representative survey","authors": ["Andreassen, C. S.", "Nielsen, M. B.", "Pallesen, S.", "Gjerstad, J."],"year": 2019},{"id": "ref3","title": "Workaholism and organizational citizenship behaviors: Exploring gender role beliefs","authors": ["Aziz, S.", "Pittman, C.", "Wuensch, K."],"year": 2020}, ...]}'

In [51]:
format_ref

'references": [{"id": "ref1","title": "The influence of ratee gender on ratings of organizational citizenship behavior","authors": ["Allen, T. D.", "Rush, M. C."],"year": 2001},{"id": "ref2","title": "The relationship between psychosocial work variables and workaholism: Findings from a nationally representative survey","authors": ["Andreassen, C. S.", "Nielsen, M. B.", "Pallesen, S.", "Gjerstad, J."],"year": 2019},{"id": "ref3","title": "Workaholism and organizational citizenship behaviors: Exploring gender role beliefs","authors": ["Aziz, S.", "Pittman, C.", "Wuensch, K."],"year": 2020}, ...]}Please let me know if you have any further questions or if there\'s anything else I can help you with!'

In [None]:
template_json = """
convert list of string to json format using a structure like that: 


"""

In [None]:
reference_structure = {
    "type": "object",
    "properties": {
        "references": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "id": {"type": "string"},
                    "title": {"type": "string"},
                    "authors": {"type": "string"},
                    "year": {"type": "string"},
            }
        },
    }
}
}

In [59]:
prompt = """convert these references into json format: 'Hu, L. T., & Bentler, P. M. (1999). Cut-off criteria for fit indexes in covariance structure analysis: Conventional criteria versus new alternatives. Structural Equation Modeling: A Multidisciplinary Journal, 6 (1), 1-55. https://doi.org/10.1080/10705519909540118'"""

In [None]:
builder = Jsonformer(
    model=model,
    tokenizer=tokenizer,
    json_schema=reference_structure,
    prompt=format_ref,
    max_string_token_length=300,
    )

In [87]:
out = builder()

In [88]:
out

{'references': [{'id': '1',
   'title': 'The Great Gatsby',
   'authors': 'F. Scott Fitzgerald',
   'year': '1925'}]}

In [34]:
data = json.loads(answer)

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [98]:
builder = Jsonformer(
    model=model,
    tokenizer=tokenizer,
    json_schema=test,
    prompt=answer)

In [99]:
out = builder()

In [100]:
out

{'references': {'ref_id': 'ref1',
  'title': 'Title of the first paper',
  'author': 'Allen, Rush',
  'year': '2001'}}

In [89]:
highlight_values(output)

{
  ref: {
    title: [32m"Allen, T. D., & Rush"[0m,
    author: [32m"Allen, T. D., & Rush"[0m,
    year: [32m"2001"[0m
  }
}


In [96]:
parsed_data = []
for line in answer.split('\n'):
    if line.startswith('ref1'):
        parts = line.split(',')
        ref_id = parts[0].split(':')[0].strip()
        title = parts[0].split(':')[1].strip()
        authors = parts[1].split(':')[1].replace('[', '').replace(']', '').strip().split(' ')
        year = int(parts[2].split(':')[1].strip())
        
        parsed_data.append({
            "ref_id": ref_id,
            "title": title,
            "authors": authors,
            "year": year
        })

# Convert to JSON
json_output = json.dumps({"references": parsed_data}, indent=4)

print(json_output)

{
    "references": []
}


In [89]:
answer['references']

TypeError: string indices must be integers

In [92]:
for i in answer:
    print('test')

ValueError: not enough values to unpack (expected 2, got 1)

In [75]:
answer.replace("  ", "")

' ```json\n{\n"message": "Here are the references :)",\n"references": [\n{\n"ref_id": "ref1",\n"title": "Title of the first paper",\n"authors": ["Allen", "Rush"],\n"year": 2001\n},\n{\n"ref_id": "ref2",\n"title": "Title of the second paper",\n"authors": ["Aziz", "Pittman", "Wuensch"],\n"year": 2020\n},\n{\n"ref_id": "ref3",\n"title": "Title of the third paper",\n"authors": ["Balducci", "Cecchin", "Fraccaroli", "Schaufeli"],\n"year": 2012\n}\n]\n}\n```'

In [78]:
import json
from pathlib import Path
from pprint import pprint

In [91]:
data = json.loads(answer)

# Now you can iterate over the 'references' array
for ref in data['references']:
    print('test')

JSONDecodeError: Expecting value: line 1 column 2 (char 1)

In [83]:
data

' ```json\n{\n  "message": "Here are the references :)",\n  "references": [\n    {\n      "ref_id": "ref1",\n      "title": "Title of the first paper",\n      "authors": ["Allen", "Rush"],\n      "year": 2001\n    },\n    {\n      "ref_id": "ref2",\n      "title": "Title of the second paper",\n      "authors": ["Aziz", "Pittman", "Wuensch"],\n      "year": 2020\n    },\n    {\n      "ref_id": "ref3",\n      "title": "Title of the third paper",\n      "authors": ["Balducci", "Cecchin", "Fraccaroli", "Schaufeli"],\n      "year": 2012\n    }\n  ]\n}\n```'

In [58]:
answer

' ```json\n{\n  "message": "Here are the references :)",\n  "references": [\n    {\n      "ref_id": "ref1",\n      "title": "Title of the first paper",\n      "authors": ["Allen", "Rush"],\n      "year": 2001\n    },\n    {\n      "ref_id": "ref2",\n      "title": "Title of the second paper",\n      "authors": ["Aziz", "Pittman", "Wuensch"],\n      "year": 2020\n    },\n    {\n      "ref_id": "ref3",\n      "title": "Title of the third paper",\n      "authors": ["Balducci", "Cecchin", "Fraccaroli", "Schaufeli"],\n      "year": 2012\n    }\n  ]\n}\n```'

In [59]:
data = json.loads(answer)

JSONDecodeError: Expecting value: line 1 column 2 (char 1)

## Parsing only the reference page:

In [20]:
import PyPDF2

def extract_references(pdf_path):
    pdf_file = open(pdf_path, 'rb')
    pdf_reader = PyPDF2.PdfReader(pdf_file)
    references = ""
    capture = False

    for page_num in range(len(pdf_reader.pages)):
        page = pdf_reader.pages[page_num]
        text = page.extract_text()
        
        # Check if text extraction is possible
        if not text:
            print(f"No text found on page {page_num}")
            continue

        if 'References' in text or 'Bibliography' in text or 'references' in text:
            # Here you can split the text and start capturing the references section
            # This assumes that 'References' or 'Bibliography' is a unique heading
            # You may need to refine this logic depending on the actual PDF layout
            parts = text.split('References', 1)
            if len(parts) > 1:
                capture = True
                references += parts[1]
            else:
                parts = text.split('Bibliography', 1)
                if len(parts) > 1:
                    capture = True
                    references += parts[1]
        elif capture:
            # Keep capturing until we decide we're done (which is tricky to determine automatically)
            references += text

    pdf_file.close()
    return references

# Path to your PDF
pdf_path = "1576_5962_jwop_39_2_0055.pdf"
references_section = extract_references(pdf_path)

# Save or print the extracted references section
print(references_section)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)

Allen, T. D., & Rush, M. C. (2001). The influence of ratee gender 
on ratings of organizational citizenship behavior. Journal of 63
HWI, Servant Leadership, OCB and CWB in Italy
Applied Social Psychology, 31 (12), 2561-2587.  https://doi.
org/10.1111/j.1559-1816.2001.tb00191.x  
Andreassen, C. S., Nielsen, M. B., Pallesen, S., & Gjerstad, J. (2019). The 
relationship between psychosocial work variables and workaholism: 
Findings from a nationally representative survey. International Journal 
of Stress Management, 26 (1), 1-10. https://doi.org/10.1037/str0000073   
Aziz, S., Pittman, C., & Wuensch, K. (2020). Workaholism and organizational 
citizenship behaviors: Exploring gender role beliefs.  International

In [24]:
prompt = """
You need to extract the reference from the text and re-structure it in this format: 
{ref1:{"name":"X", "author":"Y",....... }}- ...

Re organize the following from:
"""

## Using Anystyle and Arxiv Search API

In [None]:
# imports:
import bibtexparser

import os
import PyPDF2
import arxiv
import time
from urllib.error import HTTPError

import arxiv
from tqdm import tqdm

#### Verify that the query and the result match to avoid unwanted data

In [None]:
def is_close_match(result_title, query_title, result_author, query_author):
    return (query_title.lower() in result_title.lower()) and (query_author.lower() in result_author.lower())

#### *Brute force* Arxiv search

In [None]:
def arxiv_search(title,authors):    
    for titles in title:
        for author in authors:
        
            search_results = arxiv.Search(
                query= f"au:{author} AND ti:{titles}",
                max_results=1,
            )


            for result in tqdm(search_results.results()):
                    result_title = result.title
                    result_author = ', '.join([a.name for a in result.authors])
                    print('title', result_title, 'authors', result_author)

                    if is_close_match(result_title, titles, result_author, author):
                        try:
                            result.download_pdf(dirpath="./pdfs")
                            break
                        except FileNotFoundError:
                            print("file not found")
                            break
                        except HTTPError:
                            print("forbidden")
                            break
                        except ConnectionResetError as e:
                            print("connection reset by peer")

                            # wait for some time before retrying the connection
                            time.sleep(5)

#### Processing of references after anystyle parsing to better search capabilities

In [None]:
with open('../arxiv_reference_parsing/json/2304.01597v1.Unsupervised_Improvement_of_Factual_Knowledge_in_Language_Models.bib') as bibtex_file:
    bib_database = bibtexparser.load(bibtex_file)

def clean_author_names(author_string):
    cleaned_authors = []
    for author in author_string.split(" and "):
        parts = [part for part in author.replace(',', '').split() if len(part.replace('.', '')) > 1 and not all(c.isupper() for c in part.replace('.', ''))]
        cleaned_authors.extend(parts)

    return cleaned_authors[:4]

def clean_title(title_string):
    cleaned_title = [part.replace('title:', '').strip() for part in title_string.split(":")]
    return cleaned_title

references = []
for entry in bib_database.entries:
    authors = clean_author_names(entry.get("author", ""))
    title = clean_title(entry.get("title", "No title"))
    references.append({"authors": authors, "title": title})

#### Search on Arxiv for each references

In [None]:
for ref in references:
    print(f"Searching for Title: {ref['title']}, Author: {ref['authors']}")
    arxiv_search(ref['title'], ref['authors'])

### Main limits of Arxiv search

* Reference parsing
* Reference formatting and cleaning
* Does not relly much on state-of-art AI
* Sub-optimal search system (almost brute force)
* Limited search capability (only Arxiv API) 

### Improvements for the Arxiv search


* Reference parsing:
    * Automate anystyle paring
    * Improve the anystyle library
    * Use fine-tuned CV model (YOLOv8) for detection and extraction of references
    * Grobid library
* Reference cleaning:
    * Explore the limits of arxiv search API (ex: doesn't support :'s)
    * Test with additional information such as Journal, DOI
    * Improve the structure of the data that is passed to the search engine
* Arxiv paper parsing:
    * Add error handling
    * Move on to next reference when a paper is found to reduce the search space
    * Improve the nested loops
    * Recursive parsing
    * Add necessary conditions to optimize the search process
* Paralellization:
    * Reference cleaning and paper parsing can be optimized using Ray actors
* Weaviate:
    * Parse the text from the fetched papers concurrently
    * Explore best practices to store scientific paper in a VDB (ex: paper content, metadata ...)

In [None]:
from PyPDF2.errors import PdfStreamError
import pypdf
def parse_pdf():    
    documents = []
    for file in os.listdir('./arxiv_pdfs/'):
        if file.endswith('.pdf'):
            pdf_path = os.path.join('./arxiv_pdfs', file)
            try:
                loader = PyPDFLoader(pdf_path)
                documents.extend(loader.load())
            except pypdf.errors.PdfStreamError as e:
                print(f"Skipping file {file} due to error: {e}")
                continue  # Skip this file and continue with the next one
        elif file.endswith('.txt'):
            text_path = os.path.join('./arxiv_pdfs', file)
            try:
                loader = TextLoader(text_path)
                documents.extend(loader.load())
            except Exception as e:
                print(f"Error in file {file}: {e}")
                continue
    return documents

## Arxiv pipeline

#### Installs

In [None]:
# Install Ruby and Gem

# sudo gem install anystyle-cli

# anystyle -f json find ../weaviate_tests/arxiv_pdfs/2304.01597v1.Unsupervised_Improvement_of_Factual_Knowledge_in_Language_Models.pdf json

In [2]:
# imports

import subprocess
import json
import bibtexparser

import os
import PyPDF2
import arxiv
import time
from urllib.error import HTTPError

import arxiv
from tqdm import tqdm

from PyPDF2.errors import PdfStreamError
import pypdf
from langchain.document_loaders import PyPDFLoader
from PyPDF2.errors import PdfReadError, PdfStreamError

import weaviate
import json
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Weaviate
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import HuggingFaceInstructEmbeddings
from torch import cuda, bfloat16
from langchain.llms import HuggingFacePipeline

from langchain.chains import ChatVectorDBChain,RetrievalQA
import ray

In [3]:
ray.init(num_gpus=1)

  self.start_gcs_server()
  self.start_gcs_server()
  self.start_monitor()
  self.start_monitor()
  self.start_api_server(
  self.start_raylet(plasma_directory, object_store_memory)
  self.start_raylet(plasma_directory, object_store_memory)
  self.start_log_monitor()
2023-11-22 14:54:34,927	INFO worker.py:1633 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8266 [39m[22m


0,1
Python version:,3.8.10
Ray version:,2.7.1
Dashboard:,http://127.0.0.1:8266


#### Anystyle function

In [49]:
def run_anystyle(input_pdf):
    try:
        command = ['anystyle', '-f', 'bib', 'find', input_pdf, 'bib_files']
        result = subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

        # Check for successful execution
        if result.returncode == 0:
            output_file_name = os.path.basename(input_pdf).replace('.pdf', '.' + 'bib')
            output_file_path = os.path.join('./bib_files', output_file_name)
            print('check the path: ', output_file_path)
            return output_file_path
        else:
            return f"Command failed with return code {result.returncode}."

    except subprocess.CalledProcessError as e:
        # Handle errors
        return f"An error occurred: {e.stderr}"

#### Process the reference from the bib file

In [5]:
def process_bib_files(file):
    try:
        with open(file) as bibtex_file:
            bib_database = bibtexparser.load(bibtex_file)

        references = []
        for entry in bib_database.entries:
            authors = clean_author_names(entry.get("author", ""))
            title = clean_title(entry.get("title", "No title"))
            references.append({"authors": authors, "title": title})

        return references
    
    except FileNotFoundError:
        return "BibTeX file not found."
    except Exception as e:
        return f"An error occurred: {e}"

In [6]:
def clean_author_names(author_string):
    cleaned_authors = []
    for author in author_string.split(" and "):
        parts = [part for part in author.replace(',', '').split() if len(part.replace('.', '')) > 1 and not all(c.isupper() for c in part.replace('.', ''))]
        cleaned_authors.extend(parts)

    return cleaned_authors[:4]

def clean_title(title_string):
    cleaned_title = [part.replace('title:', '').strip() for part in title_string.split(":")]
    return cleaned_title

In [7]:
def is_close_match(result_title, query_title, result_author, query_author):
    return (query_title.lower() in result_title.lower()) and (query_author.lower() in result_author.lower())

#### Arxiv Search

In [27]:
def arxiv_search(titles, authors, dir):
    for title in titles:
        for author in authors:
            try:
                search_query = f"au:{author} AND ti:{title}"
                search_results = arxiv.Search(query=search_query, max_results=1)

                for result in tqdm(search_results.results()):
                    result_title = result.title
                    result_author = ', '.join([a.name for a in result.authors])
                    print(f"Title: {result_title}, Authors: {result_author}")

                    if is_close_match(result_title, title, result_author, author):
                        try:
                            result.download_pdf(dirpath=dir)
                            return  # Exit the loop once a match is found and downloaded
                        except FileNotFoundError:
                            print("File not found.")
                        except HTTPError:
                            print("Access forbidden.")
                        except ConnectionResetError:
                            print("Connection reset by peer. Retrying in 5 seconds.")
                            time.sleep(5)
                            continue  # Retry the current iteration
                break  # Break the inner loop if a search is completed

            except Exception as e:
                print(f"An error occurred: {e}")
                break  # Break the inner loop on encountering an exception


#### Weaviate functions

In [9]:
from weaviate.util import generate_uuid5
import time

@ray.remote(num_gpus=0.1)
class WeaviateRayEmbedder:
    def __init__(self):
        self.time_taken = 0
        self.text_list = []
        self.weaviate_client = weaviate.Client(
            url="http://localhost:8080",   
        )

    def adding_weaviate_document(self, text_lst, collection_name):
        start_time = time.time()
        self.weaviate_client.batch.configure(batch_size=100)

        with self.weaviate_client.batch as batch:
            for text in text_lst:
                    batch.add_data_object(
                        text,
                        class_name=collection_name, 
                        uuid=generate_uuid5(text),
        )
        self.text_list.append(text)
        self.time_taken = time.time() - start_time
        return self.text_list

    def get(self):
        return self.lst_embeddings
    
    def get_time_taken(self):
        return self.time_taken

In [10]:
from weaviate.util import generate_uuid5
import time

class WeaviateEmbedder:
    def __init__(self):
        self.time_taken = 0
        self.text_list = []
        self.weaviate_client = weaviate.Client(
            url="http://localhost:8080",   
        )

    def adding_weaviate_document(self, text_lst, collection_name):
        start_time = time.time()
        self.weaviate_client.batch.configure(batch_size=100)

        with self.weaviate_client.batch as batch:
            for text in text_lst:
                    batch.add_data_object(
                        text,
                        class_name=collection_name, 
                        uuid=generate_uuid5(text),
        )
        self.text_list.append(text)
        self.time_taken = time.time() - start_time
        return self.text_list

    def get(self):
        return self.lst_embeddings
    
    def get_time_taken(self):
        return self.time_taken

In [11]:
def weaviate_serialize_document(doc, title):
        return {
            "page_content": doc.page_content,
            "document_title": title,
        }

In [12]:
actors = [WeaviateEmbedder.remote() for _ in range(num_actors)]

results = [actor.adding_weaviate_document.remote(doc_part, "WokToWalk") for actor, doc_part in zip(actors, doc_parts)]

NameError: name 'num_actors' is not defined

In [13]:
def parse_pdf():    
    documents = []
    for file in os.listdir('./arxiv_pdfs/'):
        if file.endswith('.pdf'):
            pdf_path = os.path.join('./arxiv_pdfs', file)
            try:
                loader = PyPDFLoader(pdf_path)
                documents.extend(loader.load())
            except pypdf.errors.PdfStreamError as e:
                print(f"Skipping file {file} due to error: {e}")
                continue  # Skip this file and continue with the next one
        elif file.endswith('.txt'):
            text_path = os.path.join('./arxiv_pdfs', file)
            try:
                loader = TextLoader(text_path)
                documents.extend(loader.load())
            except Exception as e:
                print(f"Error in file {file}: {e}")
                continue
    return documents

In [14]:
def weaviate_serialize_document(doc):
        document_title = doc.metadata.get('source', '').split('/')[-1]
        return {
            "page_content": doc.page_content,
            "document_title": document_title,

        }

In [15]:
def weaviate_split_multiple_pdf(docs):    
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

    text_docs = text_splitter.split_documents(docs)

    serialized_docs = [
                weaviate_serialize_document(doc) 
                for doc in text_docs
                ]
    return serialized_docs	

In [16]:
def split_document(docs, doc_name):        

        loader = PyPDFLoader(docs)

        documents = loader.load()

        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

        text_docs = text_splitter.split_documents(documents)
        serialized_docs = [
            weaviate_serialize_document(doc,doc_name) 
            for doc in text_docs
            ]
        return serialized_docs	

In [17]:
def process_and_remove_pdfs(directory):
    for filename in os.listdir(directory):
        if filename.endswith(".pdf"):
            file_path = os.path.join(directory, filename)

            # Apply the split_document function
            doc_name = filename[:-4]  # Remove '.pdf' from filename to get the document name
            try:
                serialized_docs = split_document(file_path, doc_name)
                # Process serialized_docs as needed
                print(f"Processed {filename}")

                # Remove the PDF file after processing
                os.remove(file_path)
                print(f"Removed {filename}")
                
            except Exception as e:
                print(f"Error processing {filename}: {e}")


In [None]:
dum = "pdfs"

os.listdir(dum)

['1303.5778v1.Speech_Recognition_with_Deep_Recurrent_Neural_Networks.pdf',
 '1212.5701v1.ADADELTA__An_Adaptive_Learning_Rate_Method.pdf',
 '1301.3584v7.Revisiting_Natural_Gradient_for_Deep_Networks.pdf',
 '1206.1106v2.No_More_Pesky_Learning_Rates.pdf',
 '1207.0580v1.Improving_neural_networks_by_preventing_co_adaptation_of_feature_detectors.pdf',
 '1308.0850v5.Generating_Sequences_With_Recurrent_Neural_Networks.pdf']

In [60]:
def parse_pdf(dir):    
    documents = []
    for file in os.listdir(dir):
        if file.endswith('.pdf'):
            pdf_path = os.path.join(dir, file)
            try:
                loader = PyPDFLoader(pdf_path)
                documents.extend(loader.load())
            except pypdf.errors.PdfStreamError as e:
                print(f"Skipping file {file} due to error: {e}")
                continue  # Skip this file and continue with the next one
        elif file.endswith('.txt'):
            text_path = os.path.join(dir, file)
            try:
                loader = TextLoader(text_path)
                documents.extend(loader.load())
            except Exception as e:
                print(f"Error in file {file}: {e}")
                continue
    return documents

In [19]:
def divide_workload(num_actors, documents):
    docs_per_actor = len(documents) // num_actors

    doc_parts = [documents[i * docs_per_actor: (i + 1) * docs_per_actor] for i in range(num_actors)]

    if len(documents) % num_actors:
        doc_parts[-1].extend(documents[num_actors * docs_per_actor:])

    return doc_parts

In [None]:
input_pdf_path = "../weaviate_tests/arxiv_pdfs/2304.01597v1.Unsupervised_Improvement_of_Factual_Knowledge_in_Language_Models.pdf"
output_directory = "bib"  # Specify the desired output directory
anystyle_output = run_anystyle(input_pdf_path)
print(anystyle_output)

./bib_files/2304.01597v1.Unsupervised_Improvement_of_Factual_Knowledge_in_Language_Models.bib


In [20]:
def weaviate_embedding(text, cls):
    embedder = WeaviateEmbedder()
    embedder.adding_weaviate_document(text, cls)


In [21]:
def weaviate_ray_embedding(text,cls):
    actor_workload = divide_workload(4, text)
    actors = [WeaviateRayEmbedder.remote() for _ in range(4)]
    [actor.adding_weaviate_document.remote(doc_part, cls) for actor, doc_part in zip(actors, actor_workload)]

In [59]:
import shutil
import os
import re

def merge_all_pdfs_into_final_dir(final_dir):
    if not os.path.exists(final_dir):
        os.makedirs(final_dir)

    # Regular expression to match iteration directories
    iter_dir_pattern = re.compile(r'^iteration_\d+$')

    # List all directories that match the iteration pattern
    all_iter_dirs = [d for d in os.listdir('.') if os.path.isdir(d) and iter_dir_pattern.match(d)]

    for iter_dir in all_iter_dirs:
        for pdf_file in os.listdir(iter_dir):
            if pdf_file.endswith('.pdf'):
                src_file_path = os.path.join(iter_dir, pdf_file)
                dest_file_path = os.path.join(final_dir, pdf_file)

                # Check for filename conflicts and rename if necessary
                file_index = 1
                base_name, extension = os.path.splitext(dest_file_path)
                while os.path.exists(dest_file_path):
                    dest_file_path = f"{base_name}_{file_index}{extension}"
                    file_index += 1

                shutil.move(src_file_path, dest_file_path)

In [63]:
import shutil


def arxiv_pipeline(input_pdf, cls, ray=False, recursive=False, iteration = None):
    
        """Process all on one actor"""

        current_iter = 1

        print('check before recursive')

        if not recursive:
            anystyle_output = run_anystyle(input_pdf)
            parsed_data = process_bib_files(anystyle_output)
            for ref in parsed_data:
                arxiv_search(ref['title'], ref['authors'])


            print('check not recursive')
            parsed_text = parse_pdf()
            serialized_text = weaviate_split_multiple_pdf(parsed_text)

            if ray == False:
                print('success split with no ray')
            # calling the weaviate embedder
                weaviate_embedding(serialized_text, cls)

            elif ray is True:
                print('splt with ray')
                weaviate_ray_embedding(serialized_text, cls)


            for filename in os.listdir('./pdfs/'):
                file_path = os.path.join('./pdfs', filename)
                if os.path.isfile(file_path) and filename.endswith(".pdf"):
                    os.remove(file_path)
                    print(f"Removed {filename}")

        if recursive and iteration > 0:
            while current_iter <= iteration:
                print('test while loop 1')
                if current_iter == 1:
                    print('test while loop 2')
                    iter_dir = f'./iteration_{current_iter}'
                    if not os.path.exists(iter_dir):
                        os.makedirs(iter_dir)
                    print('test while loop 3')
                    anystyle_output = run_anystyle(input_pdf)
                    parsed_data = process_bib_files(anystyle_output)
                    for ref in parsed_data:
                        arxiv_search(ref['title'], ref['authors'], iter_dir)
                    print('test while loop 4')
                    current_iter += 1
                    
                elif current_iter >= 2:
                    print('test while loop 5')
                    iter_dir = f'./iteration_{current_iter}'
                    if not os.path.exists(iter_dir):
                        os.makedirs(iter_dir)
                   
                    previous_dir = f'./iteration_{current_iter - 1}'
                    print('checking the directories prev and current and current iteration:', iter_dir, previous_dir, current_iter)
                    pdf_files = [f for f in os.listdir(previous_dir) if f.endswith('.pdf')]
                    print('pdf files in iterdir', pdf_files)
                    for pdf_file in pdf_files:
                        full_path = os.path.join(previous_dir, pdf_file)
                        print('pdf file:', full_path)
                        anystyle_output = run_anystyle(full_path)
                        print('check anystyle bib', anystyle_output)
                        
                        parsed_data = process_bib_files(anystyle_output)
                        for ref in parsed_data:
                            if isinstance(ref, dict) and 'title' in ref and 'authors' in ref:
                                arxiv_search(ref['title'], ref['authors'], iter_dir)
                            else:
                                print(f"Unexpected format of reference: {ref}")
                    current_iter += 1
            
            final_directory = './final_pdfs'
            merge_all_pdfs_into_final_dir(final_directory)
            parsed_text = parse_pdf(final_directory)
            serialized_text = weaviate_split_multiple_pdf(parsed_text)
            if ray == False:
                print('success split with no ray')
            # calling the weaviate embedder
                weaviate_embedding(serialized_text, cls)

            elif ray is True:
                print('splt with ray')
                weaviate_ray_embedding(serialized_text, cls)


In [64]:
arxiv_pipeline("./1412.6980v9.Adam__A_Method_for_Stochastic_Optimization.pdf", "new_class_ray", ray=True, recursive=True, iteration=3)

check before recursive
test while loop 1
test while loop 2
test while loop 3


  for result in tqdm(search_results.results()):


check the path:  ./bib_files/1412.6980v9.Adam__A_Method_for_Stochastic_Optimization.bib


0it [00:00, ?it/s]
1it [00:00,  1.82it/s]


Title: Fisher Information and Natural Gradient Learning of Random Deep Networks, Authors: Shun-ichi Amari, Ryo Karakida, Masafumi Oizumi


1it [00:00,  1.58it/s]


Title: Recent Advances in Convolutional Neural Networks, Authors: Jiuxiang Gu, Zhenhua Wang, Jason Kuen, Lianyang Ma, Amir Shahroudy, Bing Shuai, Ting Liu, Xingxing Wang, Li Wang, Gang Wang, Jianfei Cai, Tsuhan Chen


1it [00:00,  1.65it/s]


Title: Stochastic (Approximate) Proximal Point Methods: Convergence, Optimality, and Adaptivity, Authors: Hilal Asi, John C. Duchi


0it [00:00, ?it/s]

Title: Generating Sequences With Recurrent Neural Networks, Authors: Alex Graves


0it [00:04, ?it/s]
0it [00:00, ?it/s]

Title: Speech Recognition with Deep Recurrent Neural Networks, Authors: Alex Graves, Abdel-rahman Mohamed, Geoffrey Hinton


0it [00:02, ?it/s]
1it [00:00,  1.75it/s]


Title: Reducing ground-based astrometric errors with Gaia and Gaussian processes, Authors: W. F. Fortino, G. M. Bernstein, P. H. Bernardinelli, M. Aguena, S. Allam, J. Annis, D. Bacon, K. Bechtol, S. Bhargava, D. Brooks, D. L. Burke, J. Carretero, A. Choi, M. Costanzi, L. N. da Costa, M. E. S. Pereira, J. De Vicente, S. Desai, P. Doel, A. Drlica-Wagner, K. Eckert, T. F. Eifler, A. E. Evrard, I. Ferrero, J. Frieman, J. García-Bellido, E. Gaztanaga, D. W. Gerdes, R. A. Gruendl, J. Gschwend, G. Gutierrez, W. G. Hartley, S. R. Hinton, D. L. Hollowood, K. Honscheid, D. J. James, M. Jarvis, S. Kent, K. Kuehn, N. Kuropatkin, M. A. G. Maia, J. L. Marshall, F. Menanteau, R. Miquel, R. Morgan, J. Myles, R. L. C. Ogando, A. Palmese, F. Paz-Chinchón, A. A. Plazas, A. Roodman, E. S. Rykoff, E. Sanchez, B. Santiago, V. Scarpine, M. Schubnell, S. Serrano, I. Sevilla-Noarbe, M. Smith, E. Suchyta, G. Tarle, C. To, D. L. Tucker, T. N. Varga, A. R. Walker, J. Weller, W. Wester


1it [00:00,  1.21it/s]


Title: Speech Recognition with Deep Recurrent Neural Networks, Authors: Alex Graves, Abdel-rahman Mohamed, Geoffrey Hinton


1it [00:01,  1.41s/it]


Title: The Forward-Forward Algorithm: Some Preliminary Investigations, Authors: Geoffrey Hinton


0it [00:00, ?it/s]

Title: Improving neural networks by preventing co-adaptation of feature detectors, Authors: Geoffrey E. Hinton, Nitish Srivastava, Alex Krizhevsky, Ilya Sutskever, Ruslan R. Salakhutdinov


0it [00:03, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.64it/s]


Title: Search Intelligence: Deep Learning For Dominant Category Prediction, Authors: Zeeshan Khawar Malik, Mo Kobrosli, Peter Maas


0it [00:00, ?it/s]
0it [00:00, ?it/s]

Title: Revisiting Natural Gradient for Deep Networks, Authors: Razvan Pascanu, Yoshua Bengio


0it [00:02, ?it/s]
0it [00:00, ?it/s]
1it [00:01,  1.11s/it]


Title: A comparative study of divisive hierarchical clustering algorithms, Authors: Maurice Roux


1it [00:00,  1.77it/s]


Title: Efficient OPA tomography of non-Gaussian states of light, Authors: Éva Rácz, László Ruppert, Radim Filip


0it [00:00, ?it/s]

Title: No More Pesky Learning Rates, Authors: Tom Schaul, Sixin Zhang, Yann LeCun


0it [00:02, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.67it/s]


Title: One-Shot Imitation Learning, Authors: Yan Duan, Marcin Andrychowicz, Bradly C. Stadie, Jonathan Ho, Jonas Schneider, Ilya Sutskever, Pieter Abbeel, Wojciech Zaremba


0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:01,  1.24s/it]


Title: Towards Understanding Fast Adversarial Training, Authors: Bai Li, Shiqi Wang, Suman Jana, Lawrence Carin


0it [00:00, ?it/s]

Title: ADADELTA: An Adaptive Learning Rate Method, Authors: Matthew D. Zeiler


0it [00:02, ?it/s]
0it [00:00, ?it/s]


test while loop 4
test while loop 1
test while loop 5
checking the directories prev and current and current iteration: ./iteration_2 ./iteration_1 2
pdf files in iterdir ['1303.5778v1.Speech_Recognition_with_Deep_Recurrent_Neural_Networks.pdf', '1212.5701v1.ADADELTA__An_Adaptive_Learning_Rate_Method.pdf', '1301.3584v7.Revisiting_Natural_Gradient_for_Deep_Networks.pdf', '1206.1106v2.No_More_Pesky_Learning_Rates.pdf', '1207.0580v1.Improving_neural_networks_by_preventing_co_adaptation_of_feature_detectors.pdf', '1308.0850v5.Generating_Sequences_With_Recurrent_Neural_Networks.pdf']
pdf file: ./iteration_1/1303.5778v1.Speech_Recognition_with_Deep_Recurrent_Neural_Networks.pdf
check the path:  ./bib_files/1303.5778v1.Speech_Recognition_with_Deep_Recurrent_Neural_Networks.bib
check anystyle bib ./bib_files/1303.5778v1.Speech_Recognition_with_Deep_Recurrent_Neural_Networks.bib
Unexpected format of reference: B
Unexpected format of reference: i
Unexpected format of reference: b
Unexpected forma

  for result in tqdm(search_results.results()):


check the path:  ./bib_files/1212.5701v1.ADADELTA__An_Adaptive_Learning_Rate_Method.bib
check anystyle bib ./bib_files/1212.5701v1.ADADELTA__An_Adaptive_Learning_Rate_Method.bib


0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.73it/s]


Title: Stochastic (Approximate) Proximal Point Methods: Convergence, Optimality, and Adaptivity, Authors: Hilal Asi, John C. Duchi


1it [00:00,  1.15it/s]


Title: Large scale canonical correlation analysis with iterative least squares, Authors: Yichao Lu, Dean P. Foster


1it [00:01,  1.78s/it]


Title: Improved Fixed-Rank Nyström Approximation via QR Decomposition: Practical and Theoretical Aspects, Authors: Farhad Pourkamali-Anaraki, Stephen Becker


0it [00:00, ?it/s]
1it [00:00,  1.76it/s]

Title: Application of Deep Learning on Predicting Prognosis of Acute Myeloid Leukemia with Cytogenetics, Age, and Mutations, Authors: Mei Lin, Vanya Jaitly, Iris Wang, Zhihong Hu, Lei Chen, Md. Amer Wahed, Zeyad Kanaan, Adan Rios, Andy N. D. Nguyen
pdf file: ./iteration_1/1301.3584v7.Revisiting_Natural_Gradient_for_Deep_Networks.pdf



  for result in tqdm(search_results.results()):


check the path:  ./bib_files/1301.3584v7.Revisiting_Natural_Gradient_for_Deep_Networks.bib
check anystyle bib ./bib_files/1301.3584v7.Revisiting_Natural_Gradient_for_Deep_Networks.bib


1it [00:00,  1.50it/s]


Title: Low-rank optimization for semidefinite convex problems, Authors: M. Journée, F. Bach, P. -A. Absil, R. Sepulchre


0it [00:00, ?it/s]
1it [00:00,  1.73it/s]


Title: Pathological spectra of the Fisher information metric and its variants in deep neural networks, Authors: Ryo Karakida, Shotaro Akaho, Shun-ichi Amari


1it [00:00,  1.70it/s]


Title: Information Geometry of Wasserstein Statistics on Shapes and Affine Deformations, Authors: Shun-ichi Amari, Takeru Matsuda


1it [00:00,  1.83it/s]


Title: Fisher Information and Natural Gradient Learning of Random Deep Networks, Authors: Shun-ichi Amari, Ryo Karakida, Masafumi Oizumi


0it [00:00, ?it/s]
0it [00:00, ?it/s]

Title: Information-Geometric Optimization Algorithms: A Unifying Picture via Invariance Principles, Authors: Yann Ollivier, Ludovic Arnold, Anne Auger, Nikolaus Hansen


0it [00:04, ?it/s]
0it [00:00, ?it/s]

Title: Theano: new features and speed improvements, Authors: Frédéric Bastien, Pascal Lamblin, Razvan Pascanu, James Bergstra, Ian Goodfellow, Arnaud Bergeron, Nicolas Bouchard, David Warde-Farley, Yoshua Bengio


0it [00:02, ?it/s]
1it [00:01,  1.07s/it]


Title: Inductive Biases for Deep Learning of Higher-Level Cognition, Authors: Anirudh Goyal, Yoshua Bengio


0it [00:00, ?it/s]

Title: Theano: new features and speed improvements, Authors: Frédéric Bastien, Pascal Lamblin, Razvan Pascanu, James Bergstra, Ian Goodfellow, Arnaud Bergeron, Nicolas Bouchard, David Warde-Farley, Yoshua Bengio


0it [00:02, ?it/s]
0it [00:00, ?it/s]

Title: Theano: new features and speed improvements, Authors: Frédéric Bastien, Pascal Lamblin, Razvan Pascanu, James Bergstra, Ian Goodfellow, Arnaud Bergeron, Nicolas Bouchard, David Warde-Farley, Yoshua Bengio


0it [00:02, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]

Title: MINRES-QLP: a Krylov subspace method for indefinite or singular symmetric systems, Authors: Sou-Cheng T. Choi, Christopher C. Paige, Michael A. Saunders


0it [00:03, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.21it/s]


Title: What Does Nature Minimize In Every Incompressible Flow?, Authors: Haithem E. Taha, Cody Gonzalez


1it [00:00,  1.72it/s]


Title: Bounds on the Bethe Free Energy for Gaussian Networks, Authors: Botond Cseke, Tom Heskes


0it [00:00, ?it/s]
1it [00:00,  1.75it/s]


Title: Fast Approximate Inference of Transcript Expression Levels from RNA-seq Data, Authors: James Hensman, Peter Glaus, Antti Honkela, Magnus Rattray


1it [00:00,  1.45it/s]


Title: Model-Based Reinforcement Learning with a Generative Model is Minimax Optimal, Authors: Alekh Agarwal, Sham Kakade, Lin F. Yang


0it [00:00, ?it/s]

Title: Training Neural Networks with Stochastic Hessian-Free Optimization, Authors: Ryan Kiros


0it [00:02, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.69it/s]


Title: Interplanetary Transfers via Deep Representations of the Optimal Policy and/or of the Value Function, Authors: Dario Izzo, Ekin Öztürk, Marcus Märtens


1it [00:00,  1.60it/s]


Title: Adding Gradient Noise Improves Learning for Very Deep Networks, Authors: Arvind Neelakantan, Luke Vilnis, Quoc V. Le, Ilya Sutskever, Lukasz Kaiser, Karol Kurach, James Martens


1it [00:00,  1.75it/s]


Title: Security of quantum key distribution with iterative sifting, Authors: Kiyoshi Tamaki, Hoi-Kwong Lo, Akihiro Mizutani, Go Kato, Charles Ci Wen Lim, Koji Azuma, Marcos Curty


1it [00:00,  1.81it/s]


Title: On the Numerical Performance of Derivative-Free Optimization Methods Based on Finite-Difference Approximations, Authors: Hao-Jun Michael Shi, Melody Qiming Xuan, Figen Oztoprak, Jorge Nocedal


1it [00:01,  1.39s/it]


Title: NeuSaver: Neural Adaptive Power Consumption Optimization for Mobile Video Streaming, Authors: Kyoungjun Park, Myungchul Kim, Laihyuk Park


0it [00:00, ?it/s]
1it [00:00,  1.55it/s]


Title: Compatible Natural Gradient Policy Search, Authors: Joni Pajarinen, Hong Linh Thai, Riad Akrour, Jan Peters, Gerhard Neumann


1it [00:01,  1.18s/it]


Title: A comparative study of divisive hierarchical clustering algorithms, Authors: Maurice Roux


1it [00:00,  1.78it/s]


Title: Natural Evolution Strategies, Authors: Daan Wierstra, Tom Schaul, Tobias Glasmachers, Yi Sun, Jürgen Schmidhuber


0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:01,  1.28s/it]


Title: Meshfree Approximation for Stochastic Optimal Control Problems, Authors: Hui Sun, Feng Bao


1it [00:00,  1.05it/s]


Title: Naturalness and the Landscape, Authors: Leonard Susskind


0it [00:00, ?it/s]

Title: Krylov Subspace Descent for Deep Learning, Authors: Oriol Vinyals, Daniel Povey


0it [00:02, ?it/s]


pdf file: ./iteration_1/1206.1106v2.No_More_Pesky_Learning_Rates.pdf


  for result in tqdm(search_results.results()):


check the path:  ./bib_files/1206.1106v2.No_More_Pesky_Learning_Rates.bib
check anystyle bib ./bib_files/1206.1106v2.No_More_Pesky_Learning_Rates.bib


0it [00:00, ?it/s]
1it [00:00,  1.71it/s]


Title: Optimization Methods for Large-Scale Machine Learning, Authors: Léon Bottou, Frank E. Curtis, Jorge Nocedal


1it [00:00,  1.67it/s]


Title: Efficient Learning of Sparse Invariant Representations, Authors: Karol Gregor, Yann LeCun


1it [00:00,  1.43it/s]


Title: Computing the Stereo Matching Cost with a Convolutional Neural Network, Authors: Jure Žbontar, Yann LeCun


1it [00:00,  1.02it/s]


Title: Mars Reconnaissance Orbiter's Mars Color Imager (MARCI): A New Workflow for Processing Its Image Data, Authors: Stuart J. Robbins


0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]

Title: Towards Optimal One Pass Large Scale Learning with Averaged Stochastic Gradient Descent, Authors: Wei Xu


0it [00:03, ?it/s]


pdf file: ./iteration_1/1207.0580v1.Improving_neural_networks_by_preventing_co_adaptation_of_feature_detectors.pdf


  for result in tqdm(search_results.results()):


check the path:  ./bib_files/1207.0580v1.Improving_neural_networks_by_preventing_co_adaptation_of_feature_detectors.bib
check anystyle bib ./bib_files/1207.0580v1.Improving_neural_networks_by_preventing_co_adaptation_of_feature_detectors.bib


0it [00:00, ?it/s]
1it [00:00,  1.84it/s]


Title: Beyond No Free Lunch: Realistic Algorithms for Arbitrary Problem Classes, Authors: James A. R. Marshall, Thomas G. Hinton


0it [00:00, ?it/s]
1it [00:00,  1.84it/s]


Title: No More Pesky Learning Rates, Authors: Tom Schaul, Sixin Zhang, Yann LeCun


1it [00:00,  1.76it/s]


Title: Beyond No Free Lunch: Realistic Algorithms for Arbitrary Problem Classes, Authors: James A. R. Marshall, Thomas G. Hinton


1it [00:00,  1.68it/s]


Title: Characterizing Serre quotients with no section functor and applications to coherent sheaves, Authors: Mohamed Barakat, Markus Lange-Hegermann


0it [00:00, ?it/s]
1it [00:00,  1.77it/s]


Title: Position Prediction as an Effective Pretraining Strategy, Authors: Shuangfei Zhai, Navdeep Jaitly, Jason Ramapuram, Dan Busbridge, Tatiana Likhomanenko, Joseph Yitan Cheng, Walter Talbott, Chen Huang, Hanlin Goh, Joshua Susskind


1it [00:00,  1.80it/s]


Title: Learning Hand-Eye Coordination for Robotic Grasping with Deep Learning and Large-Scale Data Collection, Authors: Sergey Levine, Peter Pastor, Alex Krizhevsky, Deirdre Quillen


0it [00:00, ?it/s]
1it [00:00,  1.70it/s]


Title: Strategizing against No-regret Learners, Authors: Yuan Deng, Jon Schneider, Balusubramanian Sivan


1it [00:00,  1.44it/s]


Title: Curvaton reheating allows TeV Hubble scale in NO inflation, Authors: J. C. Bueno Sanchez, K. Dimopoulos


1it [00:00,  1.81it/s]


Title: A proof using Böhme's Lemma that no Petersen family graph has a flat embedding, Authors: Joel Foisy, Catherine Jacobs, Trinity Paquin, Morgan Schalizki, Henry Stringer


1it [00:00,  1.56it/s]


Title: Never Use Labels: Signal Strength-Based Bayesian Device-Free Localization in Changing Environments, Authors: Peter Hillyard, Neal Patwari


0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.57it/s]


Title: Fitting heights of solvable groups with no nontrivial prime power character degrees, Authors: Mark L. Lewis


0it [00:00, ?it/s]


pdf file: ./iteration_1/1308.0850v5.Generating_Sequences_With_Recurrent_Neural_Networks.pdf


Entry type thesis not standard. Not considered.
  for result in tqdm(search_results.results()):


check the path:  ./bib_files/1308.0850v5.Generating_Sequences_With_Recurrent_Neural_Networks.bib
check anystyle bib ./bib_files/1308.0850v5.Generating_Sequences_With_Recurrent_Neural_Networks.bib


1it [00:01,  1.23s/it]


Title: Agnostic Physics-Driven Deep Learning, Authors: Benjamin Scellier, Siddhartha Mishra, Yoshua Bengio, Yann Ollivier


1it [00:00,  1.80it/s]


Title: Bayesian Hierarchical Mixtures of Experts, Authors: Christopher M. Bishop, Markus Svensen


1it [00:00,  1.76it/s]


Title: Universal Time-Uniform Trajectory Approximation for Random Dynamical Systems with Recurrent Neural Networks, Authors: Adrian N. Bishop


0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.44it/s]


Title: Searching for non-Gaussianity in the VSA data, Authors: Richard Savage, Richard A. Battye, Pedro Carreira, Kieran Cleary, Rod D. Davies, Richard J. Davis, Clive Dickinson, Ricardo Genova-Santos, Keith Grainge, Carlos M. Gutierrez, Yaser A. Hafez, Michael P. Hobson, Michael E. Jones, Rudiger Kneissl, Katy Lancaster, Anthony Lasenby, J. P. Leahy, Klaus Maisinger, Guy G. Pooley, Nutan Rajguru, Rafael Rebolo, Graca Rocha, Jose Alberto Rubino-Martin, Pedro Sosa Molina, Richard D. E. Saunders, Paul Scott, Anze Slosar, Angela C. Taylor, David Titterington, Elizabeth Waldram, Robert A. Watson


1it [00:01,  1.24s/it]


Title: A Hierarchical Latent Vector Model for Learning Long-Term Structure in Music, Authors: Adam Roberts, Jesse Engel, Colin Raffel, Curtis Hawthorne, Douglas Eck


1it [00:00,  1.82it/s]


Title: Learning Contextualized Document Representations for Healthcare Answer Retrieval, Authors: Sebastian Arnold, Betty van Aken, Paul Grundmann, Felix A. Gers, Alexander Löser


1it [00:00,  1.77it/s]


Title: A Practical Sparse Approximation for Real Time Recurrent Learning, Authors: Jacob Menick, Erich Elsen, Utku Evci, Simon Osindero, Karen Simonyan, Alex Graves


0it [00:00, ?it/s]

Title: Sequence Transduction with Recurrent Neural Networks, Authors: Alex Graves


0it [00:03, ?it/s]
0it [00:00, ?it/s]

Title: Speech Recognition with Deep Recurrent Neural Networks, Authors: Alex Graves, Abdel-rahman Mohamed, Geoffrey Hinton


0it [00:02, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.72it/s]


Title: Offline Learning of Counterfactual Predictions for Real-World Robotic Reinforcement Learning, Authors: Jun Jin, Daniel Graves, Cameron Haigh, Jun Luo, Martin Jagersand


0it [00:00, ?it/s]
1it [00:02,  2.50s/it]


Title: Distilling a Neural Network Into a Soft Decision Tree, Authors: Nicholas Frosst, Geoffrey Hinton


1it [00:00,  1.77it/s]


Title: Few-Shot Learning by Dimensionality Reduction in Gradient Space, Authors: Martin Gauch, Maximilian Beck, Thomas Adler, Dmytro Kotsur, Stefan Fiel, Hamid Eghbal-zadeh, Johannes Brandstetter, Johannes Kofler, Markus Holzleitner, Werner Zellinger, Daniel Klotz, Sepp Hochreiter, Sebastian Lehner


1it [00:00,  1.78it/s]


Title: Effects of Sampling Methods on Prediction Quality. The Case of Classifying Land Cover Using Decision Trees, Authors: Ronald Hochreiter, Christoph Waldhauser


0it [00:00, ?it/s]

Title: Quantum Optical Experiments Modeled by Long Short-Term Memory, Authors: Thomas Adler, Manuel Erhard, Mario Krenn, Johannes Brandstetter, Johannes Kofler, Sepp Hochreiter


0it [00:02, ?it/s]
1it [00:01,  1.33s/it]


Title: On the Computability of Solomonoff Induction and Knowledge-Seeking, Authors: Jan Leike, Marcus Hutter


1it [00:01,  1.19s/it]


Title: An Analysis of the VLASS Proposal, Authors: Jim Condon


1it [00:00,  1.75it/s]


Title: Martingale marginals do not always determine convergence, Authors: Jim Pitman


1it [00:01,  1.20s/it]


Title: Does the dataset meet your expectations? Explaining sample representation in image data, Authors: Dhasarathy Parthasarathy, Anton Johansson


0it [00:00, ?it/s]

Title: A Machine Learning Perspective on Predictive Coding with PAQ, Authors: Byron Knoll, Nando de Freitas


0it [00:07, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.66it/s]


Title: Building a Stationary Stochastic Process From a Finite-dimensional Marginal, Authors: Marcus Pivato


1it [00:01,  1.09s/it]


Title: The KATRIN Experiment, Authors: Marcus Beck


1it [00:00,  1.71it/s]


Title: Enriching Word Vectors with Subword Information, Authors: Piotr Bojanowski, Edouard Grave, Armand Joulin, Tomas Mikolov


1it [00:00,  1.74it/s]


Title: A Fast and Simple Algorithm for Training Neural Probabilistic Language Models, Authors: Andriy Mnih, Yee Whye Teh


0it [00:00, ?it/s]

Title: A Fast and Simple Algorithm for Training Neural Probabilistic Language Models, Authors: Andriy Mnih, Yee Whye Teh


0it [00:02, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.81it/s]


Title: Deterministic Bidirectional Communication and Remote Entanglement Generation Between Superconducting Quantum Processors, Authors: N. Leung, Y. Lu, S. Chakram, R. K. Naik, N. Earnest, R. Ma, K. Jacobs, A. N. Cleland, D. I. Schuster


1it [00:00,  1.75it/s]


Title: Estimating the Hessian by Back-propagating Curvature, Authors: James Martens, Ilya Sutskever, Kevin Swersky


1it [00:00,  1.63it/s]


Title: Learning to Generate Reviews and Discovering Sentiment, Authors: Alec Radford, Rafal Jozefowicz, Ilya Sutskever


1it [00:00,  1.30it/s]


Title: A Deep Factorization of Style and Structure in Fonts, Authors: Nikita Srivatsan, Jonathan T. Barron, Dan Klein, Taylor Berg-Kirkpatrick


0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]


test while loop 1
test while loop 5
checking the directories prev and current and current iteration: ./iteration_3 ./iteration_2 3
pdf files in iterdir ['1111.4259v1.Krylov_Subspace_Descent_for_Deep_Learning.pdf', '1108.3298v1.A_Machine_Learning_Perspective_on_Predictive_Coding_with_PAQ.pdf', '1303.5778v1.Speech_Recognition_with_Deep_Recurrent_Neural_Networks.pdf', '1211.3711v1.Sequence_Transduction_with_Recurrent_Neural_Networks.pdf', '1301.3641v3.Training_Neural_Networks_with_Stochastic_Hessian_Free_Optimization.pdf', '1206.6426v1.A_Fast_and_Simple_Algorithm_for_Training_Neural_Probabilistic_Language_Models.pdf', '1910.13804v1.Quantum_Optical_Experiments_Modeled_by_Long_Short_Term_Memory.pdf', '1211.5590v1.Theano__new_features_and_speed_improvements.pdf', '1003.4042v3.MINRES_QLP__a_Krylov_subspace_method_for_indefinite_or_singular_symmetric_systems.pdf', '1106.3708v4.Information_Geometric_Optimization_Algorithms__A_Unifying_Picture_via_Invariance_Principles.pdf', '1107.2490v2.Towards

  for result in tqdm(search_results.results()):
1it [00:00,  1.71it/s]


Title: Fisher Information and Natural Gradient Learning of Random Deep Networks, Authors: Shun-ichi Amari, Ryo Karakida, Masafumi Oizumi


1it [00:00,  1.58it/s]


Title: Understanding deep learning requires rethinking generalization, Authors: Chiyuan Zhang, Samy Bengio, Moritz Hardt, Benjamin Recht, Oriol Vinyals


1it [00:00,  1.47it/s]


Title: Differential Geometry on SU(3) with Applications to Three State Systems, Authors: Mark Byrd


1it [00:01,  1.49s/it]


Title: A Proof of the Smoothness of the Finite Time Horizon American Put Option for Jump Diffusions, Authors: Erhan Bayraktar


1it [00:00,  1.68it/s]


Title: Reducing ground-based astrometric errors with Gaia and Gaussian processes, Authors: W. F. Fortino, G. M. Bernstein, P. H. Bernardinelli, M. Aguena, S. Allam, J. Annis, D. Bacon, K. Bechtol, S. Bhargava, D. Brooks, D. L. Burke, J. Carretero, A. Choi, M. Costanzi, L. N. da Costa, M. E. S. Pereira, J. De Vicente, S. Desai, P. Doel, A. Drlica-Wagner, K. Eckert, T. F. Eifler, A. E. Evrard, I. Ferrero, J. Frieman, J. García-Bellido, E. Gaztanaga, D. W. Gerdes, R. A. Gruendl, J. Gschwend, G. Gutierrez, W. G. Hartley, S. R. Hinton, D. L. Hollowood, K. Honscheid, D. J. James, M. Jarvis, S. Kent, K. Kuehn, N. Kuropatkin, M. A. G. Maia, J. L. Marshall, F. Menanteau, R. Miquel, R. Morgan, J. Myles, R. L. C. Ogando, A. Palmese, F. Paz-Chinchón, A. A. Plazas, A. Roodman, E. S. Rykoff, E. Sanchez, B. Santiago, V. Scarpine, M. Schubnell, S. Serrano, I. Sevilla-Noarbe, M. Smith, E. Suchyta, G. Tarle, C. To, D. L. Tucker, T. N. Varga, A. R. Walker, J. Weller, W. Wester


1it [00:01,  1.33s/it]


Title: On the existence and instability of solitary water waves with a finite dipole, Authors: Hung Le


1it [00:00,  1.68it/s]


Title: Interplanetary Transfers via Deep Representations of the Optimal Policy and/or of the Value Function, Authors: Dario Izzo, Ekin Öztürk, Marcus Märtens


1it [00:00,  1.64it/s]


Title: Adding Gradient Noise Improves Learning for Very Deep Networks, Authors: Arvind Neelakantan, Luke Vilnis, Quoc V. Le, Ilya Sutskever, Lukasz Kaiser, Karol Kurach, James Martens


0it [00:00, ?it/s]
1it [00:00,  1.80it/s]


Title: On the Numerical Performance of Derivative-Free Optimization Methods Based on Finite-Difference Approximations, Authors: Hao-Jun Michael Shi, Melody Qiming Xuan, Figen Oztoprak, Jorge Nocedal


0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]


pdf file: ./iteration_2/1108.3298v1.A_Machine_Learning_Perspective_on_Predictive_Coding_with_PAQ.pdf


Entry type thesis not standard. Not considered.
Entry type thesis not standard. Not considered.
  for result in tqdm(search_results.results()):


check the path:  ./bib_files/1108.3298v1.A_Machine_Learning_Perspective_on_Predictive_Coding_with_PAQ.bib
check anystyle bib ./bib_files/1108.3298v1.A_Machine_Learning_Perspective_on_Predictive_Coding_with_PAQ.bib


1it [00:00,  1.65it/s]


Title: Selective social interactions and speed-induced leadership in schooling fish, Authors: Andreu Puy, Palina Bartashevich, Elisabet Gimeno, Jordi Torrents, M. Carmen Miguel, Romualdo Pastor-Satorras, Pawel Romanczuk


0it [00:00, ?it/s]
1it [00:00,  1.68it/s]


Title: A Sequential Algorithm for Training Text Classifiers, Authors: David D. Lewis, William A. Gale


0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]

Title: Clustering by compression, Authors: Rudi Cilibrasi, Paul Vitanyi


0it [00:02, ?it/s]
1it [00:00,  1.53it/s]


Title: Searching for non-Gaussianity in the VSA data, Authors: Richard Savage, Richard A. Battye, Pedro Carreira, Kieran Cleary, Rod D. Davies, Richard J. Davis, Clive Dickinson, Ricardo Genova-Santos, Keith Grainge, Carlos M. Gutierrez, Yaser A. Hafez, Michael P. Hobson, Michael E. Jones, Rudiger Kneissl, Katy Lancaster, Anthony Lasenby, J. P. Leahy, Klaus Maisinger, Guy G. Pooley, Nutan Rajguru, Rafael Rebolo, Graca Rocha, Jose Alberto Rubino-Martin, Pedro Sosa Molina, Richard D. E. Saunders, Paul Scott, Anze Slosar, Angela C. Taylor, David Titterington, Elizabeth Waldram, Robert A. Watson


1it [00:00,  1.81it/s]


Title: A finitely presented group with unbounded dead-end depth, Authors: Sean Cleary, Tim R. Riley


1it [00:00,  1.69it/s]


Title: Observing relativistic features in large-scale structure surveys -- II: Doppler magnification in an ensemble of relativistic simulations, Authors: Louis Coates, Julian Adamek, Philip Bull, Caroline Guandalin, Chris Clarkson


0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:01,  1.31s/it]


Title: A Comparison Map for Symmetric Homology and Gamma Homology, Authors: Daniel Graves


1it [00:00,  1.04it/s]


Title: A Cohomological Perspective on Algebraic Quantum Field Theory, Authors: Eli Hawkins


0it [00:00, ?it/s]
1it [00:00,  1.33it/s]


Title: Critical exponents for a spin-charge flip symmetric fixed point in 2+1d with massless Dirac fermions, Authors: Emilie Huffman


1it [00:01,  1.41s/it]


Title: On the Computability of Solomonoff Induction and Knowledge-Seeking, Authors: Jan Leike, Marcus Hutter


1it [00:00,  1.60it/s]


Title: Adapting to Non-stationarity with Growing Expert Ensembles, Authors: Cosma Rohilla Shalizi, Abigail Z. Jacobs, Kristina Lisa Klinkner, Aaron Clauset


1it [00:00,  1.46it/s]


Title: H.E.S.S. constraints on Dark Matter annihilations towards the Sculptor and Carina Dwarf Galaxies, Authors: HESS Collaboration, A. Abramowski, F. Acero, F. Aharonian, A. G. Akhperjanian, G. Anton, A. Barnacka, U. Barres de Almeida, A. R. Bazer-Bachi, Y. Becherini, J. Becker, B. Behera, K. Bernlöhr, A. Bochow, C. Boisson, J. Bolmont, P. Bordas, V. Borrel, J. Brucker, F. Brun, P. Brun, T. Bulik, I. Büsching, S. Carrigan, S. Casanova, M. Cerruti, P. M. Chadwick, A. Charbonnier, R. C. G. Chaves, A. Cheesebrough, L. -M. Chounet, A. C. Clapson, G. Coignet, J. Conrad, M. Dalton, M. K. Daniel, I. D. Davids, B. Degrange, C. Deil, H. J. Dickinson, A. Djannati-Ataï, W. Domainko, L. O'C. Drury, F. Dubois, G. Dubus, J. Dyks, M. Dyrda, K. Egberts, P. Eger, P. Espigat, L. Fallon, C. Farnier, S. Fegan, F. Feinstein, M. V. Fernandes, A. Fiasson, A. Frster, G. Fontaine, M. Füßling, Y. A. Gallant, H. Gast, L. Gérard, D. Gerbig, B. Giebels, J. F. Glicenstein, B. Glück, P. Goret, D. Göring, J. D. Hag

0it [00:00, ?it/s]
1it [00:01,  1.44s/it]


Title: ShapeWordNet: An Interpretable Shapelet Neural Network for Physiological Signal Classification, Authors: Wenqiang He, Mingyue Cheng, Qi Liu, Zhi Li


1it [00:00,  1.75it/s]


Title: Fat-Tailed Variational Inference with Anisotropic Tail Adaptive Flows, Authors: Feynman Liang, Liam Hodgkinson, Michael W. Mahoney


1it [00:00,  1.45it/s]


Title: Randomized algorithms for matrices and data, Authors: Michael W. Mahoney


1it [00:01,  1.53s/it]


Title: An Ambarzumian type theorem on graphs with odd cycles, Authors: Márton Kiss


0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]

Title: Popular Ensemble Methods: An Empirical Study, Authors: R. Maclin, D. Opitz


0it [00:02, ?it/s]
1it [00:01,  1.11s/it]


Title: ExPUNations: Augmenting Puns with Keywords and Explanations, Authors: Jiao Sun, Anjali Narayan-Chen, Shereen Oraby, Alessandra Cervone, Tagyoung Chung, Jing Huang, Yang Liu, Nanyun Peng


1it [00:00,  1.32it/s]


Title: Interplay of creation, propagation, and relaxation of an excitation in a dimer, Authors: J. Perina, Jr.


0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.51it/s]


Title: On Pauli Pairs, Authors: Stanislav Shkarin


1it [00:00,  1.52it/s]


Title: How to Train Your Deep Neural Network with Dictionary Learning, Authors: Vanika Singhal, Shikha Singh, Angshul Majumdar


1it [00:00,  1.66it/s]


Title: Learning to Generate Reviews and Discovering Sentiment, Authors: Alec Radford, Rafal Jozefowicz, Ilya Sutskever


1it [00:01,  1.34s/it]


Title: A new tableau model for irreducible polynomial representations of the orthogonal group, Authors: Hideya Watanabe


1it [00:01,  1.37s/it]


Title: Parameter-Efficient Long-Tailed Recognition, Authors: Jiang-Xin Shi, Tong Wei, Zhi Zhou, Xin-Yan Han, Jie-Jing Shao, Yu-Feng Li


1it [00:00,  1.74it/s]


Title: Distance Metric Learning for Kernel Machines, Authors: Zhixiang Xu, Kilian Q. Weinberger, Olivier Chapelle


1it [00:01,  1.33s/it]


Title: NEURO HAND: A weakly supervised Hierarchical Attention Network for neuroimaging abnormality Detection, Authors: David A. Wood


1it [00:01,  1.33s/it]


Title: How to predict and avert economic crisis, Authors: Yong Tao


1it [00:01,  1.41s/it]

Title: Active Discriminative Text Representation Learning, Authors: Ye Zhang, Matthew Lease, Byron C. Wallace
pdf file: ./iteration_2/1303.5778v1.Speech_Recognition_with_Deep_Recurrent_Neural_Networks.pdf





check the path:  ./bib_files/1303.5778v1.Speech_Recognition_with_Deep_Recurrent_Neural_Networks.bib
check anystyle bib ./bib_files/1303.5778v1.Speech_Recognition_with_Deep_Recurrent_Neural_Networks.bib
Unexpected format of reference: B
Unexpected format of reference: i
Unexpected format of reference: b
Unexpected format of reference: T
Unexpected format of reference: e
Unexpected format of reference: X
Unexpected format of reference:  
Unexpected format of reference: f
Unexpected format of reference: i
Unexpected format of reference: l
Unexpected format of reference: e
Unexpected format of reference:  
Unexpected format of reference: n
Unexpected format of reference: o
Unexpected format of reference: t
Unexpected format of reference:  
Unexpected format of reference: f
Unexpected format of reference: o
Unexpected format of reference: u
Unexpected format of reference: n
Unexpected format of reference: d
Unexpected format of reference: .
pdf file: ./iteration_2/1211.3711v1.Sequence_Trans

Entry type thesis not standard. Not considered.
  for result in tqdm(search_results.results()):


check the path:  ./bib_files/1211.3711v1.Sequence_Transduction_with_Recurrent_Neural_Networks.bib
check anystyle bib ./bib_files/1211.3711v1.Sequence_Transduction_with_Recurrent_Neural_Networks.bib


0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.78it/s]


Title: Few-Shot Learning by Dimensionality Reduction in Gradient Space, Authors: Martin Gauch, Maximilian Beck, Thomas Adler, Dmytro Kotsur, Stefan Fiel, Hamid Eghbal-zadeh, Johannes Brandstetter, Johannes Kofler, Markus Holzleitner, Werner Zellinger, Daniel Klotz, Sepp Hochreiter, Sebastian Lehner


1it [00:00,  1.65it/s]


Title: The Role of Emotions in Propagating Brands in Social Networks, Authors: Ronald Hochreiter, Christoph Waldhauser


0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:01,  1.76s/it]


Title: A Bayesian Model of node interaction in networks, Authors: Ingmar Schuster


0it [00:00, ?it/s]


pdf file: ./iteration_2/1301.3641v3.Training_Neural_Networks_with_Stochastic_Hessian_Free_Optimization.pdf


Entry type thesis not standard. Not considered.
  for result in tqdm(search_results.results()):


check the path:  ./bib_files/1301.3641v3.Training_Neural_Networks_with_Stochastic_Hessian_Free_Optimization.bib
check anystyle bib ./bib_files/1301.3641v3.Training_Neural_Networks_with_Stochastic_Hessian_Free_Optimization.bib


1it [00:00,  1.47it/s]


Title: On the Ineffectiveness of Variance Reduced Optimization for Deep Learning, Authors: Aaron Defazio, Léon Bottou


0it [00:00, ?it/s]
0it [00:00, ?it/s]

Title: Improving neural networks by preventing co-adaptation of feature detectors, Authors: Geoffrey E. Hinton, Nitish Srivastava, Alex Krizhevsky, Ilya Sutskever, Ruslan R. Salakhutdinov


0it [00:03, ?it/s]
1it [00:00,  1.71it/s]


Title: Interplanetary Transfers via Deep Representations of the Optimal Policy and/or of the Value Function, Authors: Dario Izzo, Ekin Öztürk, Marcus Märtens


1it [00:00,  1.34it/s]


Title: Adding Gradient Noise Improves Learning for Very Deep Networks, Authors: Arvind Neelakantan, Luke Vilnis, Quoc V. Le, Ilya Sutskever, Lukasz Kaiser, Karol Kurach, James Martens


0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.72it/s]


Title: Efficient Learning of Sparse Invariant Representations, Authors: Karol Gregor, Yann LeCun


1it [00:00,  1.71it/s]


Title: Stochastic (Approximate) Proximal Point Methods: Convergence, Optimality, and Adaptivity, Authors: Hilal Asi, John C. Duchi


1it [00:00,  1.15it/s]


Title: Functional Large Deviations for Cox Processes and $Cox/G/\infty$ Queues, with a Biological Application, Authors: Justin Dean, Ayalvadi Ganesh, Edward Crane


0it [00:00, ?it/s]

Title: No More Pesky Learning Rates, Authors: Tom Schaul, Sixin Zhang, Yann LeCun


0it [00:02, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.79it/s]


Title: Revisiting Natural Gradient for Deep Networks, Authors: Razvan Pascanu, Yoshua Bengio


0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]

Title: Big Neural Networks Waste Capacity, Authors: Yann N. Dauphin, Yoshua Bengio


0it [00:02, ?it/s]
1it [00:00,  1.69it/s]


Title: Adaptive Sampling Strategies for Stochastic Optimization, Authors: Raghu Bollapragada, Richard Byrd, Jorge Nocedal


0it [00:00, ?it/s]
1it [00:00,  1.65it/s]


Title: Learning to Generate Reviews and Discovering Sentiment, Authors: Alec Radford, Rafal Jozefowicz, Ilya Sutskever


0it [00:00, ?it/s]

Title: Krylov Subspace Descent for Deep Learning, Authors: Oriol Vinyals, Daniel Povey


0it [00:02, ?it/s]
1it [00:01,  1.51s/it]


Title: On the existence and instability of solitary water waves with a finite dipole, Authors: Hung Le


0it [00:00, ?it/s]

Title: Practical recommendations for gradient-based training of deep architectures, Authors: Yoshua Bengio


0it [00:02, ?it/s]
1it [00:00,  1.67it/s]


Title: Rapid training of deep neural networks without skip connections or normalization layers using Deep Kernel Shaping, Authors: James Martens, Andy Ballard, Guillaume Desjardins, Grzegorz Swirszcz, Valentin Dalibard, Jascha Sohl-Dickstein, Samuel S. Schoenholz


1it [00:00,  1.13it/s]


Title: Conceptualization of seeded region growing by pixels aggregation. Part 4: Simple, generic and robust extraction of grains in granular materials obtained by X-ray tomography, Authors: Vincent Tariel


1it [00:01,  1.46s/it]


Title: Large-Margin kNN Classification Using a Deep Encoder Network, Authors: Martin Renqiang Min, David A. Stanley, Zineng Yuan, Anthony Bonner, Zhaolei Zhang


1it [00:00,  1.34it/s]


Title: A Probabilistic Analysis of Kademlia Networks, Authors: Xing Shi Cai, Luc Devroye


1it [00:00,  1.72it/s]


Title: Deep Self-Taught Learning for Handwritten Character Recognition, Authors: Frédéric Bastien, Yoshua Bengio, Arnaud Bergeron, Nicolas Boulanger-Lewandowski, Thomas Breuel, Youssouf Chherawala, Moustapha Cisse, Myriam Côté, Dumitru Erhan, Jeremy Eustache, Xavier Glorot, Xavier Muller, Sylvain Pannetier Lebeuf, Razvan Pascanu, Salah Rifai, Francois Savard, Guillaume Sicard


1it [00:01,  1.09s/it]


Title: Large-Margin kNN Classification Using a Deep Encoder Network, Authors: Martin Renqiang Min, David A. Stanley, Zineng Yuan, Anthony Bonner, Zhaolei Zhang


0it [00:00, ?it/s]
0it [00:00, ?it/s]

Title: Advances in Optimizing Recurrent Networks, Authors: Yoshua Bengio, Nicolas Boulanger-Lewandowski, Razvan Pascanu


0it [00:02, ?it/s]
1it [00:00,  1.63it/s]

Title: Bridging Textual and Tabular Data for Cross-Domain Text-to-SQL Semantic Parsing, Authors: Xi Victoria Lin, Richard Socher, Caiming Xiong
pdf file: ./iteration_2/1206.6426v1.A_Fast_and_Simple_Algorithm_for_Training_Neural_Probabilistic_Language_Models.pdf



  for result in tqdm(search_results.results()):


check the path:  ./bib_files/1206.6426v1.A_Fast_and_Simple_Algorithm_for_Training_Neural_Probabilistic_Language_Models.bib
check anystyle bib ./bib_files/1206.6426v1.A_Fast_and_Simple_Algorithm_for_Training_Neural_Probabilistic_Language_Models.bib


1it [00:00,  1.75it/s]


Title: Sample Efficient Actor-Critic with Experience Replay, Authors: Ziyu Wang, Victor Bapst, Nicolas Heess, Volodymyr Mnih, Remi Munos, Koray Kavukcuoglu, Nando de Freitas


0it [00:00, ?it/s]
1it [00:01,  1.07s/it]


Title: Filtering and Mining Parallel Data in a Joint Multilingual Space, Authors: Holger Schwenk


1it [00:00,  1.85it/s]


Title: Very Deep Convolutional Networks for Text Classification, Authors: Alexis Conneau, Holger Schwenk, Loïc Barrault, Yann Lecun


0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.46it/s]


Title: A Family of Computationally Efficient and Simple Estimators for Unnormalized Statistical Models, Authors: Miika Pihlaja, Michael Gutmann, Aapo Hyvarinen


1it [00:00,  1.37it/s]


Title: Memories of Murray and the Quark Model, Authors: George Zweig


0it [00:00, ?it/s]


pdf file: ./iteration_2/1910.13804v1.Quantum_Optical_Experiments_Modeled_by_Long_Short_Term_Memory.pdf


Entry type thesis not standard. Not considered.
Entry type thesis not standard. Not considered.
  for result in tqdm(search_results.results()):


check the path:  ./bib_files/1910.13804v1.Quantum_Optical_Experiments_Modeled_by_Long_Short_Term_Memory.bib
check anystyle bib ./bib_files/1910.13804v1.Quantum_Optical_Experiments_Modeled_by_Long_Short_Term_Memory.bib


0it [00:00, ?it/s]

Title: Scheduled Sampling for Sequence Prediction with Recurrent Neural Networks, Authors: Samy Bengio, Oriol Vinyals, Navdeep Jaitly, Noam Shazeer


0it [00:02, ?it/s]
0it [00:00, ?it/s]

Title: Twisted Photons: New Quantum Perspectives in High Dimensions, Authors: Manuel Erhard, Robert Fickler, Mario Krenn, Anton Zeilinger


0it [00:09, ?it/s]
0it [00:00, ?it/s]

Title: Experimental GHZ Entanglement beyond Qubits, Authors: Manuel Erhard, Mehul Malik, Mario Krenn, Anton Zeilinger


0it [00:04, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]

Title: MaskGAN: Better Text Generation via Filling in the______, Authors: William Fedus, Ian Goodfellow, Andrew M. Dai


0it [00:02, ?it/s]
1it [00:00,  1.63it/s]


Title: NIPS 2016 Tutorial: Generative Adversarial Networks, Authors: Ian Goodfellow


0it [00:00, ?it/s]

Title: Generating Sequences With Recurrent Neural Networks, Authors: Alex Graves


0it [00:04, ?it/s]
0it [00:00, ?it/s]

Title: Improved Training of Wasserstein GANs, Authors: Ishaan Gulrajani, Faruk Ahmed, Martin Arjovsky, Vincent Dumoulin, Aaron Courville


0it [00:06, ?it/s]
0it [00:00, ?it/s]

Title: Quantum Optical Experiments Modeled by Long Short-Term Memory, Authors: Thomas Adler, Manuel Erhard, Mario Krenn, Johannes Brandstetter, Johannes Kofler, Sepp Hochreiter


0it [00:02, ?it/s]
0it [00:00, ?it/s]

Title: The structure of multidimensional entanglement in multipartite systems, Authors: Marcus Huber, Julio I. de Vicente


0it [00:02, ?it/s]
0it [00:00, ?it/s]

Title: The entropy vector formalism and the structure of multidimensional entanglement in multipartite systems, Authors: Marcus Huber, Martí Perarnau-Llobet, Julio I. de Vicente


0it [00:03, ?it/s]
0it [00:00, ?it/s]

Title: Deep Visual-Semantic Alignments for Generating Image Descriptions, Authors: Andrej Karpathy, Li Fei-Fei


0it [00:06, ?it/s]
1it [00:00,  1.59it/s]


Title: Violations of local realism by two entangled quNits are stronger than for two qubits, Authors: D. Kaszlikowski, P. Gnacinski, M. Zukowski, W. Miklaszewski, A. Zeilinger


0it [00:00, ?it/s]

Title: Automated Search for new Quantum Experiments, Authors: Mario Krenn, Mehul Malik, Robert Fickler, Radek Lapkiewicz, Anton Zeilinger


0it [00:02, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]

Title: Toxicity Prediction using Deep Learning, Authors: Thomas Unterthiner, Andreas Mayr, Günter Klambauer, Sepp Hochreiter


0it [00:03, ?it/s]
0it [00:00, ?it/s]

Title: Active learning machine learns to create new quantum experiments, Authors: Alexey A. Melnikov, Hendrik Poulsen Nautrup, Mario Krenn, Vedran Dunjko, Markus Tiersch, Anton Zeilinger, Hans J. Briegel


0it [00:05, ?it/s]
0it [00:00, ?it/s]

Title: Efficient Estimation of Word Representations in Vector Space, Authors: Tomas Mikolov, Kai Chen, Greg Corrado, Jeffrey Dean


0it [00:02, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.72it/s]


Title: Mastering Chess and Shogi by Self-Play with a General Reinforcement Learning Algorithm, Authors: David Silver, Thomas Hubert, Julian Schrittwieser, Ioannis Antonoglou, Matthew Lai, Arthur Guez, Marc Lanctot, Laurent Sifre, Dharshan Kumaran, Thore Graepel, Timothy Lillicrap, Karen Simonyan, Demis Hassabis


0it [00:00, ?it/s]

Title: Sequence to Sequence Learning with Neural Networks, Authors: Ilya Sutskever, Oriol Vinyals, Quoc V. Le


0it [00:01, ?it/s]
0it [00:00, ?it/s]

Title: Fourier relationship between angular position and optical orbital angular momentum, Authors: Eric Yao, Sonja Franke-Arnold, Johannes Courtial, Stephen Barnett, Miles Padgett


0it [00:03, ?it/s]
0it [00:00, ?it/s]

Title: SeqGAN: Sequence Generative Adversarial Nets with Policy Gradient, Authors: Lantao Yu, Weinan Zhang, Jun Wang, Yong Yu


0it [00:03, ?it/s]


pdf file: ./iteration_2/1211.5590v1.Theano__new_features_and_speed_improvements.pdf


  for result in tqdm(search_results.results()):


check the path:  ./bib_files/1211.5590v1.Theano__new_features_and_speed_improvements.bib
check anystyle bib ./bib_files/1211.5590v1.Theano__new_features_and_speed_improvements.bib


0it [00:00, ?it/s]

Title: Theano: new features and speed improvements, Authors: Frédéric Bastien, Pascal Lamblin, Razvan Pascanu, James Bergstra, Ian Goodfellow, Arnaud Bergeron, Nicolas Bouchard, David Warde-Farley, Yoshua Bengio


0it [00:02, ?it/s]
0it [00:00, ?it/s]

Title: Theano: new features and speed improvements, Authors: Frédéric Bastien, Pascal Lamblin, Razvan Pascanu, James Bergstra, Ian Goodfellow, Arnaud Bergeron, Nicolas Bouchard, David Warde-Farley, Yoshua Bengio


0it [00:02, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.66it/s]


Title: Word-level Speech Recognition with a Letter to Word Encoder, Authors: Ronan Collobert, Awni Hannun, Gabriel Synnaeve


0it [00:00, ?it/s]
1it [00:01,  1.48s/it]


Title: A notion of twins, Authors: Zach Hunter


0it [00:00, ?it/s]

Title: SciPy 1.0--Fundamental Algorithms for Scientific Computing in Python, Authors: Pauli Virtanen, Ralf Gommers, Travis E. Oliphant, Matt Haberland, Tyler Reddy, David Cournapeau, Evgeni Burovski, Pearu Peterson, Warren Weckesser, Jonathan Bright, Stéfan J. van der Walt, Matthew Brett, Joshua Wilson, K. Jarrod Millman, Nikolay Mayorov, Andrew R. J. Nelson, Eric Jones, Robert Kern, Eric Larson, CJ Carey, İlhan Polat, Yu Feng, Eric W. Moore, Jake VanderPlas, Denis Laxalde, Josef Perktold, Robert Cimrman, Ian Henriksen, E. A. Quintero, Charles R Harris, Anne M. Archibald, Antônio H. Ribeiro, Fabian Pedregosa, Paul van Mulbregt, SciPy 1. 0 Contributors


0it [00:03, ?it/s]
1it [00:00,  1.65it/s]


Title: Adding Gradient Noise Improves Learning for Very Deep Networks, Authors: Arvind Neelakantan, Luke Vilnis, Quoc V. Le, Ilya Sutskever, Lukasz Kaiser, Karol Kurach, James Martens


0it [00:00, ?it/s]
1it [00:00,  1.78it/s]


Title: SciPy 1.0--Fundamental Algorithms for Scientific Computing in Python, Authors: Pauli Virtanen, Ralf Gommers, Travis E. Oliphant, Matt Haberland, Tyler Reddy, David Cournapeau, Evgeni Burovski, Pearu Peterson, Warren Weckesser, Jonathan Bright, Stéfan J. van der Walt, Matthew Brett, Joshua Wilson, K. Jarrod Millman, Nikolay Mayorov, Andrew R. J. Nelson, Eric Jones, Robert Kern, Eric Larson, CJ Carey, İlhan Polat, Yu Feng, Eric W. Moore, Jake VanderPlas, Denis Laxalde, Josef Perktold, Robert Cimrman, Ian Henriksen, E. A. Quintero, Charles R Harris, Anne M. Archibald, Antônio H. Ribeiro, Fabian Pedregosa, Paul van Mulbregt, SciPy 1. 0 Contributors


0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:01,  1.41s/it]


Title: Convergence of a Lagrangian-Eulerian scheme by a weak asymptotic analysis for one-dimensional hyperbolic problems, Authors: Eduardo Abreu, Arthur Espírito Santo, Wanderson Lambert, John Pérez


0it [00:00, ?it/s]


pdf file: ./iteration_2/1003.4042v3.MINRES_QLP__a_Krylov_subspace_method_for_indefinite_or_singular_symmetric_systems.pdf


Entry type thesis not standard. Not considered.
Entry type thesis not standard. Not considered.
Entry type thesis not standard. Not considered.
Entry type thesis not standard. Not considered.
Entry type thesis not standard. Not considered.
  for result in tqdm(search_results.results()):


check the path:  ./bib_files/1003.4042v3.MINRES_QLP__a_Krylov_subspace_method_for_indefinite_or_singular_symmetric_systems.bib
check anystyle bib ./bib_files/1003.4042v3.MINRES_QLP__a_Krylov_subspace_method_for_indefinite_or_singular_symmetric_systems.bib


0it [00:00, ?it/s]
1it [00:00,  1.72it/s]


Title: Identifying Influential Users in Unknown Social Networks for Adaptive Incentive Allocation Under Budget Restriction, Authors: Shiqing Wu, Weihua Li, Hao Shen, Quan Bai


1it [00:00,  1.76it/s]


Title: An augmented Lagrangian-based preconditioning technique for a class of block three-by-three linear systems, Authors: Fatemeh P. A. Beik, Michele Benzi


1it [00:00,  1.18it/s]


Title: Homogeneous and Isotropic Turbulence: a short survey on recent developments, Authors: R. Benzi, L. Biferale


1it [00:00,  1.53it/s]


Title: Typical and Generic Ranks in Matrix Completion, Authors: Daniel Irving Bernstein, Grigoriy Blekherman, Rainer Sinn


1it [00:00,  1.18it/s]


Title: Stacks in Representation Theory. What is a continuous representation of an algebraic group ?, Authors: Joseph Bernstein


0it [00:00, ?it/s]
1it [00:00,  1.65it/s]


Title: Adaptive Stopping Rule for Kernel-based Gradient Descent Algorithms, Authors: Xiangyu Chang, Shao-Bo Lin


1it [00:01,  1.44s/it]


Title: Matrix balancing based interior point methods for point set matching problems, Authors: Janith Wijesinghe, Pengwen Chen


1it [00:00,  1.53it/s]


Title: SocialIQA: Commonsense Reasoning about Social Interactions, Authors: Maarten Sap, Hannah Rashkin, Derek Chen, Ronan LeBras, Yejin Choi


0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.73it/s]


Title: Multiway Spectral Graph Partitioning: Cut Functions, Cheeger Inequalities, and a Simple Algorithm, Authors: Lars Eldén


1it [00:00,  1.67it/s]


Title: Non-hereditary Minimum Deep Coalescence trees, Authors: Mareike Fischer, Martin Kreidl


1it [00:01,  1.43s/it]


Title: A mathematical commitment without computational strength, Authors: Anton Freund


0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.61it/s]


Title: Algebraic Temporal Blocking for Sparse Iterative Solvers on Multi-Core CPUs, Authors: Christie Alappat, Jonas Thies, Georg Hager, Holger Fehske, Gerhard Wellein


0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:01,  1.66s/it]


Title: Non-negatively constrained least squares and parameter choice by the residual periodogram for the inversion of electrochemical impedance spectroscopy, Authors: Jakob Hansen, Jarom Hogue, Grant Sander, Rosemary Renaut, Sudeep Popat


0it [00:00, ?it/s]
1it [00:00,  1.79it/s]


Title: The Boundaries of Verifiable Accuracy, Robustness, and Generalisation in Deep Learning, Authors: Alexander Bastounis, Alexander N. Gorban, Anders C. Hansen, Desmond J. Higham, Danil Prokhorov, Oliver Sutton, Ivan Y. Tyukin, Qinghua Zhou


0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.78it/s]


Title: An Inner-Outer Iterative Method for Edge Preservation in Image Restoration and Reconstruction, Authors: Silvia Gazzola, Misha E. Kilmer, James G. Nagy, Oguz Semerici, Eric L. Miller


0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:01,  1.52s/it]


Title: ROSE: A Neurocomputational Architecture for Syntax, Authors: Elliot Murphy


1it [00:00,  1.76it/s]


Title: Fine spectral analysis of preconditioned matrices and matrix-sequences arising from stage-parallel implicit Runge-Kutta methods of arbitrarily high order, Authors: Ivo Dravins, Stefano Serra-Capizzano, Maya Neytcheva


1it [00:00,  1.73it/s]


Title: On the Numerical Performance of Derivative-Free Optimization Methods Based on Finite-Difference Approximations, Authors: Hao-Jun Michael Shi, Melody Qiming Xuan, Figen Oztoprak, Jorge Nocedal


0it [00:00, ?it/s]
1it [00:00,  1.78it/s]


Title: Highly robust error correction by convex programming, Authors: Emmanuel J. Candes, Paige A. Randall


1it [00:00,  1.77it/s]


Title: Saying Hello World with Epsilon - A Solution to the 2011 Instructive Case, Authors: Louis M. Rose, Antonio García-Domínguez, James R. Williams, Dimitrios S. Kolovos, Richard F. Paige, Fiona A. C. Polack


0it [00:00, ?it/s]
1it [00:00,  1.68it/s]


Title: An Introduction to Probabilistic Programming, Authors: Jan-Willem van de Meent, Brooks Paige, Hongseok Yang, Frank Wood


1it [00:00,  1.82it/s]


Title: Faster variational quantum algorithms with quantum kernel-based surrogate models, Authors: Alistair W. R. Smith, A. J. Paige, M. S. Kim


1it [00:00,  1.77it/s]


Title: Integrated Framework of Vehicle Dynamics, Instabilities, Energy Models, and Sparse Flow Smoothing Controllers, Authors: Jonathan W. Lee, George Gunter, Rabie Ramadan, Sulaiman Almatrudi, Paige Arnold, John Aquino, William Barbour, Rahul Bhadani, Joy Carpio, Fang-Chieh Chou, Marsalis Gibson, Xiaoqian Gong, Amaury Hayat, Nour Khoudari, Abdul Rahman Kreidieh, Maya Kumar, Nathan Lichtlé, Sean McQuade, Brian Nguyen, Megan Ross, Sydney Truong, Eugene Vinitsky, Yibo Zhao, Jonathan Sprinkle, Benedetto Piccoli, Alexandre M. Bayen, Daniel B. Work, Benjamin Seibold


1it [00:00,  1.16it/s]


Title: Multi-Agent Routing Value Iteration Network, Authors: Quinlan Sykora, Mengye Ren, Raquel Urtasun


0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:02,  2.42s/it]


Title: Iterative methods for linear systems of equations: A brief historical journey, Authors: Yousef Saad


1it [00:00,  1.39it/s]


Title: An Integer Linear Programming Solution to the Telescope Network Scheduling Problem, Authors: Sotiria Lampoudi, Eric Saunders, Jason Eastman


1it [00:00,  1.56it/s]


Title: Probabilistic analysis of Wiedemann's algorithm for minimal polynomial computation, Authors: Gavin Harrison, Jeremy Johnson, B. David Saunders


0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:01,  1.63s/it]


Title: On the wellposedness for periodic nonlinear Schrödinger equations with white noise dispersion, Authors: Gavin Stewart


1it [00:00,  1.46it/s]


Title: Structure in scientific networks: towards predictions of research dynamism, Authors: Benjamin W. Stewart, Andy Rivas, Luat T. Vuong


1it [00:01,  1.67s/it]


Title: Long time decay and asymptotics for the complex mKdV equation, Authors: Gavin Stewart


1it [00:00,  1.73it/s]


Title: A block symmetric Gauss-Seidel decomposition theorem for convex composite quadratic programming and its applications, Authors: Xudong Li, Defeng Sun, Kim-Chuan Toh


1it [00:00,  1.77it/s]


Title: Numerical conformal mapping with rational functions, Authors: Lloyd N. Trefethen


1it [00:01,  1.41s/it]

Title: Efficient Algorithm for Solving Hyperbolic Programs, Authors: Yichuan Deng, Zhao Song, Lichen Zhang, Ruizhe Zhang
pdf file: ./iteration_2/1106.3708v4.Information_Geometric_Optimization_Algorithms__A_Unifying_Picture_via_Invariance_Principles.pdf



  for result in tqdm(search_results.results()):


check the path:  ./bib_files/1106.3708v4.Information_Geometric_Optimization_Algorithms__A_Unifying_Picture_via_Invariance_Principles.bib
check anystyle bib ./bib_files/1106.3708v4.Information_Geometric_Optimization_Algorithms__A_Unifying_Picture_via_Invariance_Principles.bib


1it [00:06,  6.03s/it]


Title: A continuously tunable modulation scheme for precision control of optical cavities with variable detuning, Authors: William Yam, Emily Davis, Sarah Ackley, Matthew Evans, Nergis Mavalvala


1it [00:01,  1.72s/it]


Title: Nonadiabatic charge pumping in a one-dimensional system of noninteracting electrons by an oscillating potential, Authors: Amit Agarwal, Diptiman Sen


0it [00:00, ?it/s]

Title: Objective Improvement in Information-Geometric Optimization, Authors: Youhei Akimoto, Yann Ollivier


0it [00:03, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.57it/s]


Title: Global Linear Convergence of Evolution Strategies on More Than Smooth Strongly Convex Functions, Authors: Youhei Akimoto, Anne Auger, Tobias Glasmachers, Daiki Morinaga


1it [00:00,  1.77it/s]


Title: Fisher Information and Natural Gradient Learning of Random Deep Networks, Authors: Shun-ichi Amari, Ryo Karakida, Masafumi Oizumi


1it [00:00,  1.70it/s]


Title: Online Duet between Metric Embeddings and Minimum-Weight Perfect Matchings, Authors: Sujoy Bhore, Arnold Filtser, Csaba D. Tóth


0it [00:00, ?it/s]
1it [00:00,  1.71it/s]


Title: Correlation-Polarization Effects in Electron/Positron Scattering from Acetylene: A Comparison of Computational Models, Authors: J. Franz, F. A. Gianturco, K. L. Baluja, J. Tennyson, R. Carey, R. Montuoro, R. R. Lucchese, T. Stoecklin


0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.72it/s]


Title: Generalizable Features From Unsupervised Learning, Authors: Mehdi Mirza, Aaron Courville, Yoshua Bengio


0it [00:00, ?it/s]

Title: Representation Learning: A Review and New Perspectives, Authors: Yoshua Bengio, Aaron Courville, Pascal Vincent


0it [00:04, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.74it/s]


Title: An information-theoretic evolutionary algorithm, Authors: Arnaud Berny


0it [00:00, ?it/s]
1it [00:01,  1.46s/it]


Title: Model Analysis of Time Reversal Symmetry Test in the Caltech Fe-57 Gamma-Transition Experiment, Authors: Michael Beyer


1it [00:00,  1.51it/s]


Title: A spectral solver for evolution problems with spatial S3-topology, Authors: Florian Beyer


0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:01,  1.76s/it]


Title: On a possible node in the Sivers and Qiu-Sterman functions, Authors: Daniel Boer


1it [00:00,  1.75it/s]


Title: Adaptive Parallel Tempering for Stochastic Maximum Likelihood Learning of RBMs, Authors: Guillaume Desjardins, Aaron Courville, Yoshua Bengio


0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]

Title: Benchmarking Optimization Software with Performance Profiles, Authors: Elizabeth D. Dolan, Jorge J. Moré


0it [00:02, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.77it/s]


Title: Some counterexamples on the behaviour of real-valued functions and their derivatives, Authors: Juergen Grahl, Shahar Nevo


1it [00:01,  1.97s/it]


Title: An extended analysis of the viscosity kernel for monatomic and diatomic fluids, Authors: R. M. Puscasu, B. D. Todd, P. J. Daivis, J. S. Hansen


0it [00:00, ?it/s]

Title: The CMA Evolution Strategy: A Tutorial, Authors: Nikolaus Hansen


0it [00:03, ?it/s]
1it [00:00,  1.56it/s]


Title: Benchmarking Fast-to-Alfven Mode Conversion in a Cold MHD Plasma, Authors: Paul S. Cally, Shelley C. Hansen


1it [00:00,  1.48it/s]


Title: Evaluating Polynomials Over the Unit Disk and the Unit Ball, Authors: Kendall Atkinson, Olaf Hansen, David Chien


1it [00:00,  1.28it/s]


Title: Diagonal Acceleration for Covariance Matrix Adaptation Evolution Strategies, Authors: Youhei Akimoto, Nikolaus Hansen


1it [00:01,  1.25s/it]


Title: The CMA Evolution Strategy: A Tutorial, Authors: Nikolaus Hansen


1it [00:00,  1.38it/s]


Title: Perspectives and completely positive maps, Authors: Frank Hansen


0it [00:00, ?it/s]
1it [00:00,  1.76it/s]


Title: Time-Series Classification in Smart Manufacturing Systems: An Experimental Evaluation of State-of-the-Art Machine Learning Algorithms, Authors: Mojtaba A. Farahani, M. R. McCormick, Ramy Harik, Thorsten Wuest


1it [00:00,  1.80it/s]


Title: Large scale distributed neural network training through online distillation, Authors: Rohan Anil, Gabriel Pereyra, Alexandre Passos, Robert Ormandi, George E. Dahl, Geoffrey E. Hinton


1it [00:02,  2.35s/it]


Title: How to represent part-whole hierarchies in a neural network, Authors: Geoffrey Hinton


0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.62it/s]


Title: Semi-global Output Feedback Stabilization of Non-Minimum Phase Nonlinear Systems, Authors: Almuatazbellah M. Boker, Hassan K. Khalil


1it [00:00,  1.63it/s]


Title: Convergence of numerical methods for stochastic differential equations in mathematical finance, Authors: Peter Kloeden, Andreas Neuenkirch


0it [00:00, ?it/s]
1it [00:00,  1.71it/s]


Title: Heterogeneous Doppler Spread-based CSI Estimation Planning for TDD Massive MIMO, Authors: Salah Eddine Hajri, Maialen Larrañaga, Mohamad Assaad


1it [00:00,  1.16it/s]


Title: Stringy Generalization of the First Law of Thermodynamics for Rotating BTZ Black Hole with a Cosmological Constant as State Parameter, Authors: Alexis Larranaga


0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.50it/s]


Title: Learning Feature Hierarchies with Centered Deep Boltzmann Machines, Authors: Grégoire Montavon, Klaus-Robert Müller


0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:01,  1.58s/it]


Title: The Extended Kalman Filter is a Natural Gradient Descent in Trajectory Space, Authors: Yann Ollivier


1it [00:00,  1.71it/s]


Title: Parallel Mixed Bayesian Optimization Algorithm: A Scaleup Analysis, Authors: Jiri Ocenasek, Martin Pelikan


0it [00:00, ?it/s]
1it [00:00,  1.17it/s]


Title: Musical Information Extraction from the Singing Voice, Authors: Preeti Rao


0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:02,  2.34s/it]


Title: How to design, and tune, a computed torque controller: An introduction and a Matlab example, Authors: Lluís Ros


1it [00:01,  1.68s/it]


Title: Explicit and Efficient Construction of (nearly) Optimal Rate Codes for Binary Deletion Channel and the Poisson Repeat Channel, Authors: Ittai Rubinstein


1it [00:01,  1.08s/it]


Title: Learning Markov Chain in Unordered Dataset, Authors: Yao-Hung Hubert Tsai, Han Zhao, Ruslan Salakhutdinov, Nebojsa Jojic


1it [00:00,  1.43it/s]


Title: On the Quantitative Analysis of Decoder-Based Generative Models, Authors: Yuhuai Wu, Yuri Burda, Ruslan Salakhutdinov, Roger Grosse


0it [00:00, ?it/s]
0it [00:00, ?it/s]

Title: World Trade Center responders in their own words: Predicting PTSD symptom trajectories with AI-based language analyses of interviews, Authors: Youngseo Son, Sean A. P. Clouston, Roman Kotov, Johannes C. Eichstaedt, Evelyn J. Bromet, Benjamin J. Luft, H Andrew Schwartz


0it [00:03, ?it/s]
0it [00:00, ?it/s]
1it [00:01,  1.24s/it]


Title: Prospects for all-optical ultrafast muon acceleration, Authors: F. Peano, J. Vieira, R. Mulas, G. Coppa, R. Bingham, L. O. Silva


0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]

Title: Notes on information geometry and evolutionary processes, Authors: Marc Toussaint


0it [00:02, ?it/s]
0it [00:00, ?it/s]
1it [00:01,  1.67s/it]


Title: A Generalization of the Exponential-Poisson Distribution, Authors: Wagner Barreto-Souza, Francisco Cribari-Neto


0it [00:00, ?it/s]

Title: Efficient Natural Evolution Strategies, Authors: Yi Sun, Daan Wierstra, Tom Schaul, Juergen Schmidhuber


0it [00:02, ?it/s]
0it [00:00, ?it/s]

Title: Efficient Natural Evolution Strategies, Authors: Yi Sun, Daan Wierstra, Tom Schaul, Juergen Schmidhuber


0it [00:02, ?it/s]


pdf file: ./iteration_2/1107.2490v2.Towards_Optimal_One_Pass_Large_Scale_Learning_with_Averaged_Stochastic_Gradient_Descent.pdf


  for result in tqdm(search_results.results()):


check the path:  ./bib_files/1107.2490v2.Towards_Optimal_One_Pass_Large_Scale_Learning_with_Averaged_Stochastic_Gradient_Descent.bib
check anystyle bib ./bib_files/1107.2490v2.Towards_Optimal_One_Pass_Large_Scale_Learning_with_Averaged_Stochastic_Gradient_Descent.bib


1it [00:00,  1.78it/s]


Title: Enumerative Sphere Shaping for Rate Adaptation and Reach Increase in WDM Transmission Systems, Authors: Abdelkerim Amari, Sebastiaan Goossens, Yunus Can Gultekin, Olga Vassilieva, Inwoong Kim, Tadashi Ikeuchi, Chigo Okonkwo, Frans M. J. Willems, Alex Alvarado


0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.58it/s]


Title: Beyond Folklore: A Scaling Calculus for the Design and Initialization of ReLU Networks, Authors: Aaron Defazio, Léon Bottou


0it [00:00, ?it/s]
1it [00:01,  1.32s/it]


Title: On U-Statistics and Compressed Sensing II: Non-Asymptotic Worst-Case Analysis, Authors: Fabian Lim, Vladimir Stojanovic


1it [00:00,  1.33it/s]


Title: On U-Statistics and Compressed Sensing II: Non-Asymptotic Worst-Case Analysis, Authors: Fabian Lim, Vladimir Stojanovic


0it [00:00, ?it/s]
1it [00:01,  1.45s/it]


Title: A classification of permutation polynomials of degree $7$ over finite fields, Authors: Xiang Fan


1it [00:00,  1.75it/s]


Title: Logarithmic Regret for Online Control, Authors: Naman Agarwal, Elad Hazan, Karan Singh


0it [00:00, ?it/s]

Title: Sparse Online Learning via Truncated Gradient, Authors: John Langford, Lihong Li, Tong Zhang


0it [00:02, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:01,  1.39s/it]


Title: A Fast Algorithm for Calculation of Thêo1, Authors: Ben Lewis


1it [00:00,  1.75it/s]


Title: Convex optimization for finite horizon robust covariance control of linear stochastic systems, Authors: Georgios Kotsalis, Guanghui Lan, Arkadi Nemirovski


0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.76it/s]


Title: A Quasi-Newton Approach to Nonsmooth Convex Optimization Problems in Machine Learning, Authors: Jin Yu, S. V. N. Vishwanathan, Simon Guenter, Nicol N. Schraudolph


0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:01,  1.28s/it]

Title: A General Algorithm for Solving Rank-one Matrix Sensing, Authors: Lianke Qin, Zhao Song, Ruizhe Zhang





splt with ray


In [65]:
weaviate_client = weaviate.Client(url="http://localhost:8080")

In [66]:
weaviate_client.data_object.get(class_name='new_class_ray')

{'deprecations': [],
 'objects': [{'class': 'New_class_ray',
   'creationTimeUnix': 1700671887430,
   'id': '0053d892-29fa-5c3a-ae92-b0cd51967ad6',
   'lastUpdateTimeUnix': 1700671887430,
   'properties': {'document_title': '1206.5538v3.Representation_Learning__A_Review_and_New_Perspectives.pdf',
    'page_content': '29\nmodel for visual area V2. In NIPS’07 .\nLee, H., Grosse, R., Ranganath, R., and Ng, A. Y . (2009a). Convolu-\ntional deep belief networks for scalable unsupervised learning of\nhierarchical representations. In ICML’2009 .\nLee, H., Pham, P., Largman, Y ., and Ng, A. (2009b). Unsupervised\nfeature learning for audio classiﬁcation using convolutional deep\nbelief networks. In NIPS’2009 .\nLin, Y ., Tong, Z., Zhu, S., and Yu, K. (2010). Deep coding network.\nInNIPS’2010 .\nLowe, D. (1999). Object recognition from local scale invariant\nfeatures. In ICCV’99 .\nMallat, S. (2012). Group invariant scattering. Communications on\nPure and Applied Mathematics .\nMarlin, B. and d

In [82]:
weaviate_client.query.aggregate("New_class_ray").with_meta_count().do()

{'data': {'Aggregate': {'New_class_ray': [{'meta': {'count': 976}}]}}}

In [68]:
ish = weaviate_client.query.get(class_name='new_class_ray', properties='document_title')

In [73]:
ish.do()

{'data': {'Get': {'New_class_ray': [{'document_title': '1206.5538v3.Representation_Learning__A_Review_and_New_Perspectives.pdf'},
    {'document_title': '1206.1106v2.No_More_Pesky_Learning_Rates_2.pdf'},
    {'document_title': '1107.2490v2.Towards_Optimal_One_Pass_Large_Scale_Learning_with_Averaged_Stochastic_Gradient_Descent.pdf'},
    {'document_title': '1301.3584v7.Revisiting_Natural_Gradient_for_Deep_Networks.pdf'},
    {'document_title': '1308.0850v5.Generating_Sequences_With_Recurrent_Neural_Networks.pdf'},
    {'document_title': '1412.2306v2.Deep_Visual_Semantic_Alignments_for_Generating_Image_Descriptions.pdf'},
    {'document_title': '1108.3298v1.A_Machine_Learning_Perspective_on_Predictive_Coding_with_PAQ.pdf'},
    {'document_title': '1801.07736v3.MaskGAN__Better_Text_Generation_via_Filling_in_the______.pdf'},
    {'document_title': '1308.0850v5.Generating_Sequences_With_Recurrent_Neural_Networks_1.pdf'},
    {'document_title': '1111.4259v1.Krylov_Subspace_Descent_for_Deep_L

In [78]:
test = weaviate_client.query.aggregate(class_name='New_class_ray')

In [79]:
test.do()

{'errors': [{'locations': [{'column': 25, 'line': 1}],
   'message': 'Syntax Error GraphQL request (1:25) Unexpected empty IN {}\n\n1: {Aggregate{New_class_ray{}}}\n                           ^\n',
   'path': None}]}

In [None]:


"""

            for i in range(iteration):
                for filename in os.listdir('./pdfs/'):
                        file_path = os.path.join('./pdfs', filename)
                        if os.path.isfile(file_path) and filename.endswith(".pdf"):
                            run_anystyle(file_path)
                            parsed_data = process_bib_files(anystyle_output)
                            for ref in parsed_data:
                                arxiv_search(ref['title'], ref['authors'])
                            print('success search')

                            parsed_text = parse_pdf()
                            serialized_text = weaviate_split_multiple_pdf(parsed_text)
                            os.remove(file_path)

                if ray == False:
                        print('success split with no ray')
                    # calling the weaviate embedder
                        embedder = WeaviateEmbedder()
                        embedder.adding_weaviate_document(serialized_text, cls)

                elif ray is True:
                        print('splt with ray')
                        actor_workload = divide_workload(4, serialized_text)
                        actors = [WeaviateRayEmbedder.remote() for _ in range(4)]
                        [actor.adding_weaviate_document.remote(doc_part, cls) for actor, doc_part in zip(actors, actor_workload)]
                        if recursive:
                            print('recursive with ray')
                            return arxiv_pipeline(input_pdf, cls, ray=True)

                    for filename in os.listdir('./pdfs/'):
                        file_path = os.path.join('./pdfs', filename)
                        if os.path.isfile(file_path) and filename.endswith(".pdf"):
                            run_anystyle(file_path)
                            os.remove(file_path)
                            print(f"Removed {filename}")
    
    """

In [44]:
weaviate_client.data_object.get(class_name='test_no_rec')

{'deprecations': [],
 'objects': [{'class': 'Test_no_rec',
   'creationTimeUnix': 1700659853728,
   'id': '0124446f-5348-5c80-895d-ea31f24ee1e7',
   'lastUpdateTimeUnix': 1700659853728,
   'properties': {'document_title': '1301.3584v7.Revisiting_Natural_Gradient_for_Deep_Networks.pdf',
    'page_content': 'Erhan, D., Courville, A., Bengio, Y ., and Vincent, P. (2010). Why does unsupervised pre-training help deep\nlearning? In JMLR W&CP: Proc. AISTATS’2010 , volume 9, pages 201–208.\nGonzalez, A. and Dorronsoro, J. (2006). Natural conjugate gradient training of multilayer perceptrons. Artiﬁcial\nNeural Networks ICANN 2006 , pages 169–177.\nHeskes, T. (2000). On natural learning and pruning in multilayered perceptrons. Neural Computation ,12,\n1037–1057.\nHonkela, A., Tornio, M., Raiko, T., and Karhunen, J. (2008). Natural conjugate gradient in variational inference.\nInNeural Information Processing , pages 305–314.\nHonkela, A., Raiko, T., Kuusela, M., Tornio, M., and Karhunen, J. (2010

## Storing data in Weaviate

In [None]:
def divide_workload(num_actors, documents):
    docs_per_actor = len(documents) // num_actors

    doc_parts = [documents[i * docs_per_actor: (i + 1) * docs_per_actor] for i in range(num_actors)]

    if len(documents) % num_actors:
        doc_parts[-1].extend(documents[num_actors * docs_per_actor:])

    return doc_parts

In [16]:
arxiv_pipeline('./1412.6980v9.Adam__A_Method_for_Stochastic_Optimization.pdf')

  for result in tqdm(search_results.results()):
0it [00:00, ?it/s]
1it [00:00,  1.79it/s]


Title: Fisher Information and Natural Gradient Learning of Random Deep Networks, Authors: Shun-ichi Amari, Ryo Karakida, Masafumi Oizumi


1it [00:00,  1.34it/s]


Title: Recent Advances in Convolutional Neural Networks, Authors: Jiuxiang Gu, Zhenhua Wang, Jason Kuen, Lianyang Ma, Amir Shahroudy, Bing Shuai, Ting Liu, Xingxing Wang, Li Wang, Gang Wang, Jianfei Cai, Tsuhan Chen


1it [00:00,  1.73it/s]


Title: Stochastic (Approximate) Proximal Point Methods: Convergence, Optimality, and Adaptivity, Authors: Hilal Asi, John C. Duchi


0it [00:00, ?it/s]

Title: Generating Sequences With Recurrent Neural Networks, Authors: Alex Graves


0it [00:05, ?it/s]
0it [00:00, ?it/s]

Title: Speech Recognition with Deep Recurrent Neural Networks, Authors: Alex Graves, Abdel-rahman Mohamed, Geoffrey Hinton


0it [00:02, ?it/s]
1it [00:00,  1.74it/s]


Title: Reducing ground-based astrometric errors with Gaia and Gaussian processes, Authors: W. F. Fortino, G. M. Bernstein, P. H. Bernardinelli, M. Aguena, S. Allam, J. Annis, D. Bacon, K. Bechtol, S. Bhargava, D. Brooks, D. L. Burke, J. Carretero, A. Choi, M. Costanzi, L. N. da Costa, M. E. S. Pereira, J. De Vicente, S. Desai, P. Doel, A. Drlica-Wagner, K. Eckert, T. F. Eifler, A. E. Evrard, I. Ferrero, J. Frieman, J. García-Bellido, E. Gaztanaga, D. W. Gerdes, R. A. Gruendl, J. Gschwend, G. Gutierrez, W. G. Hartley, S. R. Hinton, D. L. Hollowood, K. Honscheid, D. J. James, M. Jarvis, S. Kent, K. Kuehn, N. Kuropatkin, M. A. G. Maia, J. L. Marshall, F. Menanteau, R. Miquel, R. Morgan, J. Myles, R. L. C. Ogando, A. Palmese, F. Paz-Chinchón, A. A. Plazas, A. Roodman, E. S. Rykoff, E. Sanchez, B. Santiago, V. Scarpine, M. Schubnell, S. Serrano, I. Sevilla-Noarbe, M. Smith, E. Suchyta, G. Tarle, C. To, D. L. Tucker, T. N. Varga, A. R. Walker, J. Weller, W. Wester


1it [00:00,  1.19it/s]


Title: Speech Recognition with Deep Recurrent Neural Networks, Authors: Alex Graves, Abdel-rahman Mohamed, Geoffrey Hinton


1it [00:01,  1.31s/it]


Title: The Forward-Forward Algorithm: Some Preliminary Investigations, Authors: Geoffrey Hinton


0it [00:00, ?it/s]

Title: Improving neural networks by preventing co-adaptation of feature detectors, Authors: Geoffrey E. Hinton, Nitish Srivastava, Alex Krizhevsky, Ilya Sutskever, Ruslan R. Salakhutdinov


0it [00:03, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.60it/s]


Title: Search Intelligence: Deep Learning For Dominant Category Prediction, Authors: Zeeshan Khawar Malik, Mo Kobrosli, Peter Maas


0it [00:00, ?it/s]
0it [00:00, ?it/s]

Title: Revisiting Natural Gradient for Deep Networks, Authors: Razvan Pascanu, Yoshua Bengio


0it [00:02, ?it/s]
0it [00:00, ?it/s]
1it [00:01,  1.44s/it]


Title: A comparative study of divisive hierarchical clustering algorithms, Authors: Maurice Roux


1it [00:00,  1.67it/s]


Title: Efficient OPA tomography of non-Gaussian states of light, Authors: Éva Rácz, László Ruppert, Radim Filip


0it [00:00, ?it/s]

Title: No More Pesky Learning Rates, Authors: Tom Schaul, Sixin Zhang, Yann LeCun


0it [00:04, ?it/s]
0it [00:00, ?it/s]
1it [00:00,  1.64it/s]


Title: One-Shot Imitation Learning, Authors: Yan Duan, Marcin Andrychowicz, Bradly C. Stadie, Jonathan Ho, Jonas Schneider, Ilya Sutskever, Pieter Abbeel, Wojciech Zaremba


0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:01,  1.59s/it]


Title: Towards Understanding Fast Adversarial Training, Authors: Bai Li, Shiqi Wang, Suman Jana, Lawrence Carin


0it [00:00, ?it/s]

Title: ADADELTA: An Adaptive Learning Rate Method, Authors: Matthew D. Zeiler


0it [00:02, ?it/s]
0it [00:00, ?it/s]
