In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/publications/LEE.pdf
/kaggle/input/publications/Dawson.pdf
/kaggle/input/publications/Qiu.pdf
/kaggle/input/publications/1_Ramrez-Duque_.pdf
/kaggle/input/publications/Abbas_2020.pdf
/kaggle/input/publications/22_Ouss_ASD.pdf
/kaggle/input/publications/Asd_Cry_patterns.pdf
/kaggle/input/publications/zhao2020.pdf
/kaggle/input/publications/Abbas_2018.pdf
/kaggle/input/publications/carpenter2020 (1).pdf
/kaggle/input/publications/Young_Behavior.pdf
/kaggle/input/publications/Tariq2018.pdf
/kaggle/input/publications/Patten_Audio.pdf


**Installing all the dependencies**

In [2]:
!pip install -q unstructured
!pip install -q sentence-transformers

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
keras-cv 0.8.2 requires keras-core, which is not installed.
keras-nlp 0.9.3 requires keras-core, which is not installed.
tensorflow-decision-forests 1.8.1 requires wurlitzer, which is not installed.
google-cloud-bigquery 2.34.4 requires packaging<22.0dev,>=14.3, but you have packaging 24.0 which is incompatible.
jupyterlab 4.1.6 requires jupyter-lsp>=2.0.0, but you have jupyter-lsp 1.5.1 which is incompatible.
jupyterlab-lsp 5.1.0 requires jupyter-lsp>=2.0.0, but you have jupyter-lsp 1.5.1 which is incompatible.
libpysal 4.9.2 requires shapely>=2.0.1, but you have shapely 1.8.5.post1 which is incompatible.
momepy 0.7.0 requires shapely>=2, but you have shapely 1.8.5.post1 which is incompatible.
osmnx 1.9.2 requires shapely>=2.0, but you have shapely 1.8.5.post1 which is incompatible.
spopt 0.6.0 requires shap

**Importing all the dependencies**

In [3]:
import warnings
warnings.filterwarnings('ignore')
import os
import json
from unstructured_client import UnstructuredClient
from unstructured_client.models import shared
from unstructured_client.models.errors import SDKError
from unstructured.staging.base import dict_to_elements, elements_to_json
from IPython.display import JSON

**Setting up the user secrets and unstructured client**

In [4]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("uns_api")
secret_value_1 = user_secrets.get_secret("uns_server_url")
s = UnstructuredClient(
    api_key_auth=secret_value_0,
    server_url=secret_value_1,
)

**Getting elements from a single pdf using the Unstructured client**

In [5]:
def get_elements_from_pdf(filename):
    with open(filename, "rb") as f:
        files=shared.Files(
            content=f.read(), 
            file_name=filename,
        )

    req = shared.PartitionParameters(
        files=files,
        strategy='hi_res',
        pdf_infer_table_structure=True,
        languages=["eng"],
    )
    try:
        resp = s.general.partition(req)
    except SDKError as e:
        print(e)
    return resp
filename = "/kaggle/input/publications/1_Ramrez-Duque_.pdf"
response = get_elements_from_pdf(filename)
print(json.dumps(response.elements[:3], indent=2))

[
  {
    "type": "Header",
    "element_id": "124519724f8942a945cd489488e97294",
    "text": "Journal of Intelligent & Robotic Systems (2019) 96:267-281 https://doi.org/10.1007/510846-018-00975-y",
    "metadata": {
      "filetype": "application/pdf",
      "languages": [
        "eng"
      ],
      "page_number": 1,
      "filename": "1_Ramrez-Duque_.pdf"
    }
  },
  {
    "type": "Title",
    "element_id": "0606ba94dc82b8248e73d33d5dce1c37",
    "text": "Robot-Assisted Autism Spectrum Disorder Diagnostic Based on Artificial Reasoning",
    "metadata": {
      "filetype": "application/pdf",
      "languages": [
        "eng"
      ],
      "page_number": 1,
      "parent_id": "124519724f8942a945cd489488e97294",
      "filename": "1_Ramrez-Duque_.pdf"
    }
  },
  {
    "type": "Title",
    "element_id": "165abf064c3a95bbee57e8aa4016140c",
    "text": "Andr\u00e9s A. Ramirez-Duque\u2019 @ . Anselmo Frizera-Neto' - Teodiano Freire Bastos\u2019",
    "metadata": {
      "filetype": "

In [6]:
JSON(json.dumps(response.elements, indent=2))

<IPython.core.display.JSON object>

In [7]:
for element in response.elements:
    if element['type'] == "Title" and element['text'] == "References":
        print('element id: ',element['element_id'])
        print('parent id: ',element['metadata']['parent_id'])

element id:  69824d3b0e70ca6aaa0da1613b65fd91
parent id:  efd96aedf377e20afd95285a7c751a86


In [8]:
#Extracting text out of this pdf without the references, headers and footers
def cleaned_text_without_references(response):
    cleaned_elements=[]
    for element in response.elements:
        if element['text'] == 'REFERENCES' or element['text']=='References':
            break
        elif element['type'] != "Header" and element['type'] != "Footer":
            cleaned_elements.append({'element_id': element['element_id'],
                                    'page_number': element['metadata']['page_number'],
                                    'text': element['text']})
    return cleaned_elements
cleaned_text = cleaned_text_without_references(response)
cleaned_text[:5]

[{'element_id': '0606ba94dc82b8248e73d33d5dce1c37',
  'page_number': 1,
  'text': 'Robot-Assisted Autism Spectrum Disorder Diagnostic Based on Artificial Reasoning'},
 {'element_id': '165abf064c3a95bbee57e8aa4016140c',
  'page_number': 1,
  'text': "Andrés A. Ramirez-Duque’ @ . Anselmo Frizera-Neto' - Teodiano Freire Bastos’"},
 {'element_id': 'b2181c6eefd507a2a68fcd65e3af62e0',
  'page_number': 1,
  'text': 'l.) Check for updates'},
 {'element_id': 'dbc91cf0825c7c18192c20fc0182d322',
  'page_number': 1,
  'text': 'Received: 25 April 2018 / Accepted: 20 December 2018 / Published online: 29 March 2019 © Springer Nature B.V. 2019'},
 {'element_id': 'd21b4a64a2d8656a0fdf7ab2e89a4916',
  'page_number': 1,
  'text': 'Abstract'}]

In [12]:
#grouping all the text on same page
def group_by_page(cleaned_text):
    grouped_texts=[]
    #finding the max page number
    max_page_number = 0
    for item in cleaned_text:
        page_number = item['page_number']
        if page_number > max_page_number:
            max_page_number = page_number
    #grouping all the text on same page
    for i in range(max_page_number):
        item_text = ''
        item_page_number = i+1
        for item in cleaned_text:
            if item['page_number'] == i+1:
                item_text = item_text + item['text']
        grouped_texts.append({'page_number': item_page_number, 
                              'text': item_text})
    return grouped_texts

grouped_text = group_by_page(cleaned_text)
grouped_text[:2]

[{'page_number': 1,
  'text': "Robot-Assisted Autism Spectrum Disorder Diagnostic Based on Artificial ReasoningAndrés A. Ramirez-Duque’ @ . Anselmo Frizera-Neto' - Teodiano Freire Bastos’l.) Check for updatesReceived: 25 April 2018 / Accepted: 20 December 2018 / Published online: 29 March 2019 © Springer Nature B.V. 2019AbstractAutism spectrum disorder (ASD) is a neurodevelopmental disorder that affects people from birth, whose symptoms are found in the early developmental period. The ASD diagnosis is usually performed through several sessions of behavioral observation, exhaustive screening, and manual coding behavior. The early detection of ASD signs in naturalistic behavioral observation may be improved through Child-Robot Interaction (CRI) and technological-based tools for automated behavior assessment. Robot-assisted tools using CRI theories have been of interest in intervention for children with Autism Spectrum Disorder (CwASD), elucidating faster and more significant gains from t

**Extracting text out of each pdf without the references, headers and footers**

In [13]:
pdf_and_text=[]
for filename in os.listdir("/kaggle/input/publications"):
  #check if the file is a pdf
  if filename.endswith('.pdf'):
    # Construct the full path to the PDF file
    pdf_path = os.path.join("/kaggle/input/publications", filename)
    pdf_response = get_elements_from_pdf(pdf_path)
    pdf_cleaned_text = cleaned_text_without_references(pdf_response)
    pdf_grouped_text = group_by_page(pdf_cleaned_text)
    pdf_and_text.append({'filename': filename.replace("/kaggle/input/publications/", ""), 
                         'text': pdf_grouped_text})
pdf_and_text[0]

{'filename': 'LEE.pdf',
 'text': [{'page_number': 1,
   'text': '. sensorsbyLetterLetter Deep-Learning-Based Detection of Infants with Autism Spectrum Disorder Using Auto-Encoder Feature RepresentationJung Hyuk Lee 1, Geon Woo Lee 1, Guiyoung Bong 2, Hee Jeong Yoo 2,3 and Hong Kook Kim 1,*1School of Electrical Engineering and Computer Science, Gwangju Institute of Science and Technology, Gwangju 61005, Korea; ljh0412@gist.ac.kr (J.H.L.); geonwoo0801@gist.ac.kr (G.W.L.) 2 Department of Psychiatry, Seoul National University Bundang Hospital, Seongnam-si,Gyeonggi-do 13620, Korea; 20409@snubh.org (G.B.); hjyoo@snu.ac.kr (H.J.Y.)3 Department of Psychiatry, College of Medicine, Seoul National University, Seoul 03980, Korea * Correspondence: hongkook@gist.ac.krReceived: 29 October 2020; Accepted: 24 November 2020; Published: 26 November 2020check for v updatesAbstract: Autism spectrum disorder (ASD) is a developmental disorder with a life-span disability. While diagnostic instruments have bee

In [14]:
# The dictionary is large and takes a long time to compute so we will save it locally
import pickle

# Specify the folder path
folder_path = '/kaggle/working/'

# Save the list of dictionaries to a file in the specified folder
file_path = os.path.join(folder_path, 'pdf_and_text.pkl')
with open(file_path, 'wb') as f:
    pickle.dump(pdf_and_text, f)

**Loading the saved list of dictionaries**

In [15]:
with open('/kaggle/working/pdf_and_text.pkl', 'rb') as f:
    loaded_pdf_and_text = pickle.load(f)

### **Further text processing (splitting pages into sentences)**

In [16]:
from spacy.lang.en import English

nlp = English()

# Add a sentencizer pipeline, see https://spacy.io/api/sentencizer/ 
nlp.add_pipe("sentencizer")

# Create a document instance as an example 
doc = nlp("Hello, my name is Amit. This is my notebook related to RAG.") 
assert len(list(doc.sents)) == 2

# Access the sentences of the document 
list(doc.sents)

[Hello, my name is Amit., This is my notebook related to RAG.]

In [17]:
for pdf in loaded_pdf_and_text:
    for page in pdf['text']:
        page["sentences"] = list(nlp(page["text"]).sents)
        # Make sure all sentences are strings
        page["sentences"] = [str(sentence) for sentence in page["sentences"]]
        # Count the sentences
        page["page_sentence_count_spacy"] = len(page["sentences"])

In [19]:
loaded_pdf_and_text[5]

{'filename': '22_Ouss_ASD.pdf',
 'text': [{'page_number': 1,
   'text': 'A R T I C L EO p e n A c c e s sBehavior and interaction imaging at 9 months of age predict autism/intellectual disability in high-risk infants with West syndrome Lisa Ouss1, Giuseppe Palestra 2, Catherine Saint-Georges2,3, Marluce Leitgel Gille1, Mohamed Afshar4, Hugues Pellerin2, Kevin Bailly2, Mohamed Chetouani2, Laurence Robel1, Bernard Golse1, Rima Nabbout5, Isabelle Desguerre5, Mariana Guergova-Kuras4 and David Cohen 2,3; , : ) ( 0 9 8 7 6 5 4 3 2 1; , : ) ( 0 9 8 7 6 5 4 3 2 1; , : ) ( 0 9 8 7 6 5 4 3 2 1; , : ) ( 0 9 8 7 6 5 4 3 2 1Abstract Automated behavior analysis are promising tools to overcome current assessment limitations in psychiatry. At 9 months of age, we recorded 32 infants with West syndrome (WS) and 19 typically developing (TD) controls during a standardized mother–infant interaction. We computed infant hand movements (HM), speech turn taking of both partners (vocalization, pause, silences, 

### **Chunking our sentences together**

In [20]:
# Define split size to turn groups of sentences into chunks
num_sentence_chunk_size = 10

# Create a function that recursively splits a list into desired sizes
def split_list(input_list: list,
               slice_size: int)-> list[list[str]]:
    return [input_list[i:i + slice_size] for i in range(0, len(input_list),slice_size)]


In [21]:
# Loop through pages and texts and split sentences into chunks
for pdf in loaded_pdf_and_text:
    for page in pdf['text']:
        page["sentence_chunks"] = split_list(input_list=page["sentences"],
                                             slice_size=num_sentence_chunk_size)
        page["num_chunks"] = len(page["sentence_chunks"])

In [22]:
loaded_pdf_and_text[5]

{'filename': '22_Ouss_ASD.pdf',
 'text': [{'page_number': 1,
   'text': 'A R T I C L EO p e n A c c e s sBehavior and interaction imaging at 9 months of age predict autism/intellectual disability in high-risk infants with West syndrome Lisa Ouss1, Giuseppe Palestra 2, Catherine Saint-Georges2,3, Marluce Leitgel Gille1, Mohamed Afshar4, Hugues Pellerin2, Kevin Bailly2, Mohamed Chetouani2, Laurence Robel1, Bernard Golse1, Rima Nabbout5, Isabelle Desguerre5, Mariana Guergova-Kuras4 and David Cohen 2,3; , : ) ( 0 9 8 7 6 5 4 3 2 1; , : ) ( 0 9 8 7 6 5 4 3 2 1; , : ) ( 0 9 8 7 6 5 4 3 2 1; , : ) ( 0 9 8 7 6 5 4 3 2 1Abstract Automated behavior analysis are promising tools to overcome current assessment limitations in psychiatry. At 9 months of age, we recorded 32 infants with West syndrome (WS) and 19 typically developing (TD) controls during a standardized mother–infant interaction. We computed infant hand movements (HM), speech turn taking of both partners (vocalization, pause, silences, 

### **Splitting each chunk into its own item**

In [23]:
import re
# Split each chunk into its own item
pages_and_chunks = []
for pdf in loaded_pdf_and_text:
    for page in pdf['text']:
      for sentence_chunk in page["sentence_chunks"]:
        chunk_dict = {}
        chunk_dict["pdf_name"] = pdf['filename']
        chunk_dict["page_number"] = page["page_number"]

        # Join the sentences together into a paragraph-like structure, aka a chunk (so they are a single string)
        joined_sentence_chunk = "".join(sentence_chunk).replace(" ", " "). strip()
        joined_sentence_chunk = re.sub(r'\.([A-Z])', r'. \1',joined_sentence_chunk)
        chunk_dict["sentence_chunk"] = joined_sentence_chunk
        # Get stats about the chunk
        chunk_dict["chunk_char_count"] = len(joined_sentence_chunk)
        chunk_dict["chunk_word_count"] = len([word for word in joined_sentence_chunk.split(" ")])
        chunk_dict["chunk_token_count"] = len(joined_sentence_chunk) / 4 # 1 token = ~4 characters
        pages_and_chunks.append(chunk_dict)

# How many chunks do we have?
len(pages_and_chunks)

517

In [24]:
#Let's print some random sample
import random
random.sample(pages_and_chunks, k=1)

[{'pdf_name': 'Patten_Audio.pdf',
  'page_number': 1,
  'sentence_chunk': '", NAUo% 3 P HENPublished in final edited form as:J Autism Dev Disord.2014 October ; 44(10): 2413–2428.doi:10.1007/s10803-014-2047-4. Vocal patterns in infants with Autism Spectrum Disorder: Canonical babbling status and vocalization frequencyElena Patten, Ph. D.1, Katie Belardi, M. S.2, Grace T. Baranek, Ph. D.2, Linda R. Watson, Ed. D. 2, Jeffrey D. Labban, Ph. D.1, and D. Kimbrough Oller, Ph. D.3 1Univ.of North Carolina, Greensboro2Univ.of North Carolina, Chapel Hill3Univ.',
  'chunk_char_count': 480,
  'chunk_word_count': 68,
  'chunk_token_count': 120.0}]

In [25]:
# Get stats about our chunks
import pandas as pd
df = pd.DataFrame(pages_and_chunks)
df.describe().round(2)

Unnamed: 0,page_number,chunk_char_count,chunk_word_count,chunk_token_count
count,517.0,517.0,517.0,517.0
mean,9.48,1187.22,182.33,296.8
std,7.16,710.37,114.55,177.59
min,1.0,5.0,1.0,1.25
25%,4.0,546.0,77.0,136.5
50%,7.0,1197.0,184.0,299.25
75%,16.0,1666.0,256.0,416.5
max,35.0,4075.0,647.0,1018.75


In [26]:
# Show random chunks with under 30 tokens in length
min_token_length = 30
for row in df[df["chunk_token_count"] <= min_token_length].sample(5).iterrows():
  print(f'Chunk token count: {row[1]["chunk_token_count"]} | Text:{row[1]["sentence_chunk"]}')

Chunk token count: 3.0 | Text:Through this
Chunk token count: 13.0 | Text:Author manuscript; available in PMC 2014 October 01.
Chunk token count: 28.75 | Text:What did the researchers do and find?PLOS Medicine | https://doi.org/10.1371/journal.pmed.1002705 November 27, 2018
Chunk token count: 7.5 | Text:Automatic diagnostic procedure
Chunk token count: 29.25 | Text:For the above, despite the novelty@ SpringerContent courtesy of Springer Nature, terms of use apply. Rights reserved.


### **Let’s filter our DataFrame/list of dictionaries to only include chunks with over 30 tokens in length.**

In [27]:
pages_and_chunks_over_min_token_len = df[df["chunk_token_count"] >= min_token_length].to_dict(orient="records")
pages_and_chunks_over_min_token_len[:2]

[{'pdf_name': 'LEE.pdf',
  'page_number': 1,
  'sentence_chunk': '.sensorsbyLetterLetter Deep-Learning-Based Detection of Infants with Autism Spectrum Disorder Using Auto-Encoder Feature RepresentationJung Hyuk Lee 1, Geon Woo Lee 1, Guiyoung Bong 2, Hee Jeong Yoo 2,3 and Hong Kook Kim 1,*1School of Electrical Engineering and Computer Science, Gwangju Institute of Science and Technology, Gwangju 61005, Korea; ljh0412@gist.ac.kr (J. H. L.); geonwoo0801@gist.ac.kr (G. W. L.) 2 Department of Psychiatry, Seoul National University Bundang Hospital, Seongnam-si,Gyeonggi-do 13620, Korea; 20409@snubh.org (G. B.); hjyoo@snu.ac.kr (H. J. Y.)3 Department of Psychiatry, College of Medicine, Seoul National University, Seoul 03980, Korea * Correspondence: hongkook@gist.ac.krReceived: 29 October 2020; Accepted: 24 November 2020; Published: 26 November 2020check for v updatesAbstract: Autism spectrum disorder (ASD) is a developmental disorder with a life-span disability. While diagnostic instruments h

# **Embedding our text chunks**

In [28]:
from sentence_transformers import SentenceTransformer
embedding_model = SentenceTransformer(model_name_or_path="all-mpnet-base-v2")

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [29]:
import time

In [30]:
# Create embeddings one by one
for item in pages_and_chunks_over_min_token_len:
  item["embedding"] = embedding_model.encode(item["sentence_chunk"])

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

# **We can perform batched operations by turning our target text samples into a single list and then passing that list to our embedding model.**

In [31]:
# Turn text chunks into a single list
text_chunks = [item["sentence_chunk"] for item in pages_and_chunks_over_min_token_len]

In [32]:
%%time
# Embed all texts in batches
text_chunk_embeddings = embedding_model.encode(text_chunks, batch_size=32,convert_to_tensor=True)
text_chunk_embeddings

Batches:   0%|          | 0/16 [00:00<?, ?it/s]

CPU times: user 9min 24s, sys: 2min 49s, total: 12min 14s
Wall time: 6min 10s


tensor([[-0.0206,  0.0329, -0.0465,  ..., -0.0200, -0.0548, -0.0453],
        [-0.0653, -0.0180, -0.0537,  ..., -0.0223, -0.0255, -0.0312],
        [-0.0456,  0.0325, -0.0454,  ...,  0.0114, -0.0225, -0.0467],
        ...,
        [ 0.0057, -0.0318, -0.0421,  ..., -0.0134, -0.0218, -0.0245],
        [ 0.0036, -0.0277, -0.0399,  ..., -0.0138, -0.0211, -0.0223],
        [-0.0039, -0.0518, -0.0254,  ..., -0.0199, -0.0702, -0.0446]])

# **Save embeddings to file**

### Since creating embeddings can be a timely process, let’s turn our pages_and_chunks_over_min_token_len list of dictionaries into a DataFrame and save it.

In [33]:
# Save embeddings to file
text_chunks_and_embeddings_df = pd.DataFrame(pages_and_chunks_over_min_token_len)
embeddings_df_save_path = "/kaggle/working/text_chunks_and_embeddings_df.csv"
text_chunks_and_embeddings_df.to_csv(embeddings_df_save_path, index=False)

In [35]:
# Import saved file and view
import pandas as pd
embeddings_df_save_path = "/kaggle/working/text_chunks_and_embeddings_df.csv"
text_chunks_and_embedding_df_load = pd.read_csv(embeddings_df_save_path)
text_chunks_and_embedding_df_load.head()

Unnamed: 0,pdf_name,page_number,sentence_chunk,chunk_char_count,chunk_word_count,chunk_token_count,embedding
0,LEE.pdf,1,.sensorsbyLetterLetter Deep-Learning-Based Det...,3028,414,757.0,[-2.05749348e-02 3.28779072e-02 -4.64697927e-...
1,LEE.pdf,1,As there is wide variation in terms of the typ...,529,83,132.25,[-6.53321519e-02 -1.79831404e-02 -5.37393503e-...
2,LEE.pdf,2,social abilities of people with ASD increases ...,3061,450,765.25,[-4.55831960e-02 3.25354822e-02 -4.53654565e-...
3,LEE.pdf,2,For examples of machine learning classiﬁcation...,1790,255,447.5,[-4.83638458e-02 3.05312667e-02 -3.35919224e-...
4,LEE.pdf,3,diﬃculties pertaining to the need to secure th...,1718,264,429.5,[-3.53161991e-02 3.77684757e-02 -4.86659035e-...


# **Similarity search**

### **Let’s import our embeddings we created earlier (tk-link to embedding file) and prepare them for use by turning them into a tensor.**

In [36]:
import random
import torch
import numpy as np
import pandas as pd

# Import texts and embedding df
text_chunks_and_embedding_df = pd.read_csv("/kaggle/working/text_chunks_and_embeddings_df.csv")
# Convert embedding column back to np.array (it got converted to string when it got saved to CSV)
text_chunks_and_embedding_df["embedding"] = text_chunks_and_embedding_df["embedding"].apply(lambda x: np.fromstring(x.strip("[]"), sep=" "))
# Convert texts and embedding df to list of dicts
pages_and_chunks = text_chunks_and_embedding_df.to_dict(orient="records")
# # Convert embeddings to torch tensor and send to device (note: NumPy arrays are float64, torch tensors are float32 by default)
embeddings = torch.tensor(np.array(text_chunks_and_embedding_df["embedding"].tolist()), dtype=torch.float32)
embeddings.shape

torch.Size([503, 768])

In [37]:
text_chunks_and_embedding_df.head()

Unnamed: 0,pdf_name,page_number,sentence_chunk,chunk_char_count,chunk_word_count,chunk_token_count,embedding
0,LEE.pdf,1,.sensorsbyLetterLetter Deep-Learning-Based Det...,3028,414,757.0,"[-0.0205749348, 0.0328779072, -0.0464697927, -..."
1,LEE.pdf,1,As there is wide variation in terms of the typ...,529,83,132.25,"[-0.0653321519, -0.0179831404, -0.0537393503, ..."
2,LEE.pdf,2,social abilities of people with ASD increases ...,3061,450,765.25,"[-0.045583196, 0.0325354822, -0.0453654565, 0...."
3,LEE.pdf,2,For examples of machine learning classiﬁcation...,1790,255,447.5,"[-0.0483638458, 0.0305312667, -0.0335919224, 0..."
4,LEE.pdf,3,diﬃculties pertaining to the need to secure th...,1718,264,429.5,"[-0.0353161991, 0.0377684757, -0.0486659035, 0..."


In [38]:
embeddings[0]

tensor([-2.0575e-02,  3.2878e-02, -4.6470e-02, -9.3290e-03,  2.2779e-02,
         2.9414e-02,  4.6374e-02, -3.1967e-02, -6.6183e-02, -1.5472e-02,
         6.5602e-03, -5.9365e-02,  6.3316e-02,  6.2994e-02, -5.6251e-02,
        -2.7083e-02, -8.2688e-03,  3.3644e-03,  3.7265e-02, -8.6292e-03,
         3.4178e-02,  1.0568e-02, -6.3263e-02,  7.3406e-02, -4.4935e-02,
         3.0851e-02, -3.3910e-03, -6.0136e-02,  7.7574e-03, -6.9380e-02,
        -1.0476e-03,  8.3198e-03,  2.3188e-02,  6.4961e-02,  2.1899e-06,
        -2.7092e-02,  1.1488e-02, -2.1731e-02, -3.3694e-02, -6.7440e-02,
        -3.4016e-02, -2.0057e-02,  2.5766e-03,  3.3616e-02, -3.1582e-02,
        -8.3078e-02,  3.9643e-02,  2.4284e-02,  2.3357e-03,  2.6866e-02,
        -1.3909e-02,  1.7111e-02,  6.4790e-02,  1.7834e-02,  1.4242e-02,
        -1.0971e-02,  7.7212e-03, -4.0157e-02,  1.0227e-02, -4.5842e-02,
        -4.0227e-02,  3.4358e-02, -9.8436e-03,  1.1578e-02,  2.8985e-02,
        -1.0710e-02, -5.1550e-03, -7.0833e-02, -7.9

## **Now let’s prepare another instance of our embedding model. Not because we have to but because we’d like to make it so you can start the notebook from the cell above.**

In [39]:
from sentence_transformers import util, SentenceTransformer
embedding_model = SentenceTransformer(model_name_or_path="all-mpnet-base-v2")

In [40]:
# 1. Define the query
# Note: This could be anything. But since we're working with a specific set of pdfs, we'll stick with queries related to those pdfs.
query = "Autism spectrum disorder"
print(f"Query: {query}")
# 2. Embed the query to the same numerical space as the text examples
# Note: It's important to embed your query with the same model you embedded your examples with.
query_embedding = embedding_model.encode(query, convert_to_tensor=True)
# 3. Get similarity scores with the dot product (we'll time this for fun)
from time import perf_counter as timer
start_time = timer()
dot_scores = util.dot_score(a=query_embedding, b=embeddings)[0]
end_time = timer()

print(f"Time take to get scores on {len(embeddings)} embeddings: {end_time-start_time:.5f} seconds.")

# 4. Get the top-k results (we'll keep this to 5)
top_results_dot_product = torch.topk(dot_scores, k=5)
top_results_dot_product

Query: Autism spectrum disorder


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Time take to get scores on 503 embeddings: 0.00051 seconds.


torch.return_types.topk(
values=tensor([0.6785, 0.6635, 0.6534, 0.6270, 0.6262]),
indices=tensor([394, 497, 390,  55, 232]))

## **Let’s check the results of our similarity search.**
#### First, we’ll define a small helper function to print out wrapped text (so it doesn’t print a whole text chunk as a single line).

In [41]:
# Define helper function to print wrapped text
import textwrap
def print_wrapped(text, wrap_length=80):
  wrapped_text = textwrap.fill(text, wrap_length)
  print(wrapped_text)

In [42]:
print(f"Query: '{query}'\n")
print("Results:")
# Loop through zipped together scores and indicies from torch.topk
for score, idx in zip(top_results_dot_product[0], top_results_dot_product[1]):
  print(f"Score: {score:.4f}")
  # Print relevant sentence chunk (since the scores are in descending order,the most relevant chunk will be first)
  print("Text:")
  print_wrapped(pages_and_chunks[idx]["sentence_chunk"])
  # Print the page number too so we can reference the textbook further (and check the results)
  print(f"Page number: {pages_and_chunks[idx]['page_number']}")
  print("\n")

Query: 'Autism spectrum disorder'

Results:
Score: 0.6785
Text:
PMCID: PMC3424065. Lord C, Rutter M, Goode S, Heemsbergen J, Jordan H, Mawhood
L, et al. Austism diagnostic observa- tion schedule: A standardized observation
of communicative and social behavior. Journal of autism and developmental
disorders.1989; 19(2):185–212. PMID: 2745388. Lord C, Rutter M, Le Couteur A.
Autism Diagnostic Interview-Revised: a revised version of a diagnostic interview
for caregivers of individuals with possible pervasive developmental disorders.
Journal of autism and developmental disorders.1994; 24(5):659–85. PMID:
7814313.10. Association AP. Diagnostic and statistical manual of mental
disorders (DSM-5®).
Page number: 18


Score: 0.6635
Text:
Patten et al. Page 30Table 1Participant DemographicsASD; n=23 TD; n=14 Age at
9–12months; mean (SD) 10.89 (1.39) 10.63 (.53) Age at 15–18 months; mean (SD)
16.33 (.83) 16.28 (.70) Sex 19 males, 4 females 11 males, 3 females Race 23
White, 1 Black 13 White, 1 Asia

# **Functionizing our semantic search pipeline**

#### Let’s put all of the steps from above for semantic search into a function or two so we can repeat the workflow.

In [43]:
def retrieve_relevant_resources(query: str,
                                embeddings: torch.tensor,
                                model: SentenceTransformer=embedding_model,
                                n_resources_to_return: int=5,
                                print_time: bool=True):
  """ Embeds a query with model and returns top k scores and indices from embeddings. """
  # Embed the query
  query_embedding = model.encode(query, convert_to_tensor=True)
  # Get dot product scores on embeddings
  start_time = timer()
  dot_scores = util.dot_score(query_embedding, embeddings)[0]
  end_time = timer()

  if print_time:
    print(f"[INFO] Time taken to get scores on {len(embeddings)} embeddings: {end_time-start_time:.5f} seconds.")

  scores, indices = torch.topk(input=dot_scores, k=n_resources_to_return)
  return scores, indices

In [44]:
def print_top_results_and_scores(query: str,
                                 embeddings: torch.tensor,
                                 pages_and_chunks: list[dict]=pages_and_chunks,
                                 n_resources_to_return: int=5):
  """ Takes a query, retrieves most relevant resources and prints them out in descending order.
  Note: Requires pages_and_chunks to be formatted in a specific way (se above for reference). """
  scores, indices = retrieve_relevant_resources(query=query, embeddings=embeddings,
                                                n_resources_to_return=n_resources_to_return)

  print(f"Query: {query}\n")
  print("Results:")
  # Loop through zipped together scores and indicies

  for score, index in zip(scores, indices):
    print(f"Score: {score:.4f}")
    # Print relevant sentence chunk (since the scores are in descending order, the most relevant chunk will be first)
    print_wrapped(pages_and_chunks[index]["sentence_chunk"])
    # Print the page number too so we can reference the textbook further and check the results
    print(f"Page number: {pages_and_chunks[index]['page_number']}")
    print(f"PDF name: {pages_and_chunks[index]['pdf_name']}")
    print("\n")

# **Now let’s test our functions out.**

In [45]:
query = "Proposed method for detection of Infants with Autism Spectrum Disorder Using Auto-Encoder Feature Representation"
# Get just the scores and indices of top related results
scores, indices = retrieve_relevant_resources(query=query, embeddings=embeddings)
scores, indices

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[INFO] Time taken to get scores on 503 embeddings: 0.00033 seconds.


(tensor([0.7837, 0.6978, 0.6846, 0.6817, 0.6805]),
 tensor([  0,  17, 164,  47, 158]))

In [46]:
# Print out the texts of the top scores
print_top_results_and_scores(query=query, embeddings=embeddings)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[INFO] Time taken to get scores on 503 embeddings: 0.00029 seconds.
Query: Proposed method for detection of Infants with Autism Spectrum Disorder Using Auto-Encoder Feature Representation

Results:
Score: 0.7837
.sensorsbyLetterLetter Deep-Learning-Based Detection of Infants with Autism
Spectrum Disorder Using Auto-Encoder Feature RepresentationJung Hyuk Lee 1, Geon
Woo Lee 1, Guiyoung Bong 2, Hee Jeong Yoo 2,3 and Hong Kook Kim 1,*1School of
Electrical Engineering and Computer Science, Gwangju Institute of Science and
Technology, Gwangju 61005, Korea; ljh0412@gist.ac.kr (J. H. L.);
geonwoo0801@gist.ac.kr (G. W. L.) 2 Department of Psychiatry, Seoul National
University Bundang Hospital, Seongnam-si,Gyeonggi-do 13620, Korea;
20409@snubh.org (G. B.); hjyoo@snu.ac.kr (H. J. Y.)3 Department of Psychiatry,
College of Medicine, Seoul National University, Seoul 03980, Korea *
Correspondence: hongkook@gist.ac.krReceived: 29 October 2020; Accepted: 24
November 2020; Published: 26 November 2020c

# **Finally let's set up some query and get the most relevant results.**

In [47]:
questions = ['''How does the system architecture in the developed work utilize 
ROS for robot-centered systems, and what are the key features of this framework?''',
             '''How effective is the Video-referenced Infant Rating System for 
             Autism (VIRSA) in identifying autism spectrum disorder (ASD) risk in infancy?''',
             '''What are the key psychometric properties of the Video-referenced Infant Rating System 
             for Autism (VIRSA) that were examined in the study, and what were the findings?''']

In [48]:
for question in questions:
  print_top_results_and_scores(query=question, embeddings=embeddings)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[INFO] Time taken to get scores on 503 embeddings: 0.00089 seconds.
Query: How does the system architecture in the developed work utilize 
ROS for robot-centered systems, and what are the key features of this framework?

Results:
Score: 0.5763
Fig.1 Node graph architecture of the proposed ROS-based system. The system is
composed of two interconnected modules, an artificial reasoning module and a
CRI-channel module. The ONO web server has two way of bidirectional
communication: a websocket and a standard ROS SubscriberROS Manager /ROS_core
/Kinect2_bridge g /Fusion_Node /kinect2_n/hd/ image_color_rect ry Sound
/ROS_bridge Controller /tf_transform websocket /Detect_Recognize X /nav_msgs/
Head_Odomets ONO Web Server g /NFOA_Node /known_face n /std_msgs/header/
/ONO_Node Servo g vfoa_msgs Controller /CLNF_node 8 Artificial Reasoning Module
Child-Robot Interaction Modul T Topic Wire Topic Wireless - Local leo & oI
Connection Connection 5, ",developed in the server-side application to direct

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[INFO] Time taken to get scores on 503 embeddings: 0.00024 seconds.
Query: How effective is the Video-referenced Infant Rating System for 
             Autism (VIRSA) in identifying autism spectrum disorder (ASD) risk in infancy?

Results:
Score: 0.8611
HHS Public Access Author manuscript J Child Psychol Psychiatry. Author
manuscript; available in PMC 2021 January 01. Published in final edited form
as:J Child Psychol Psychiatry.2020 January ; 61(1):
88–94.doi:10.1111/jcpp.13105. A Video-Based Measure to Identify Autism Risk in
InfancyGregory S. Young, PhDa, John N. Constantino, MDb, Simon Dvorak, BSc,
Ashleigh Belding, MPHa, Devon Gangi, PhDa, Alesha Hill, BAa, Monique Hill, MAa,
Meghan Miller, PhDa, Chandni Parikh, PhDa, AJ Schwichtenberg, PhDd, Erika Solis,
BSa, Sally Ozonoff, PhDa aDepartment of Psychiatry & Behavioral Sciences, MIND
Institute, University of California-DavisbDepartment of Psychiatry, Washington
University-St. Louis School of MedicinecInformation and Educational Tech

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[INFO] Time taken to get scores on 503 embeddings: 0.00022 seconds.
Query: What are the key psychometric properties of the Video-referenced Infant Rating System 
             for Autism (VIRSA) that were examined in the study, and what were the findings?

Results:
Score: 0.8379
HHS Public Access Author manuscript J Child Psychol Psychiatry. Author
manuscript; available in PMC 2021 January 01. Published in final edited form
as:J Child Psychol Psychiatry.2020 January ; 61(1):
88–94.doi:10.1111/jcpp.13105. A Video-Based Measure to Identify Autism Risk in
InfancyGregory S. Young, PhDa, John N. Constantino, MDb, Simon Dvorak, BSc,
Ashleigh Belding, MPHa, Devon Gangi, PhDa, Alesha Hill, BAa, Monique Hill, MAa,
Meghan Miller, PhDa, Chandni Parikh, PhDa, AJ Schwichtenberg, PhDd, Erika Solis,
BSa, Sally Ozonoff, PhDa aDepartment of Psychiatry & Behavioral Sciences, MIND
Institute, University of California-DavisbDepartment of Psychiatry, Washington
University-St. Louis School of MedicinecInforma

## Great!! 

## Now you can choose your desired LLM and use it to generate your text.

## Also try the notebook on your own set of PDFs and see how it works!