In [10]:
from llama_index.core import Document
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.extractors import TitleExtractor
from llama_index.core.ingestion import IngestionPipeline, IngestionCache
from llama_index.core.llama_dataset.generator import RagDatasetGenerator
from llama_index.llms.openai import OpenAI
from llama_index.core import ServiceContext
from llama_index.core import SimpleDirectoryReader
from llama_index.core import VectorStoreIndex
from llama_index.core import get_response_synthesizer
from llama_index.core.indices.vector_store.retrievers import VectorIndexRetriever
from llama_index.core.query_engine.retriever_query_engine import (
    RetrieverQueryEngine,
)
from llama_index.core.llama_dataset import LabelledRagDataset
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext, load_index_from_storage
from llama_index.core.response_synthesizers import ResponseMode
import chromadb
import os
import shutil

from dotenv import load_dotenv
load_dotenv()

import nest_asyncio
nest_asyncio.apply()

In [3]:
from llama_index.readers.file import UnstructuredReader
from pathlib import Path

In [12]:
# https://github.com/run-llama/llama_index/tree/main/llama-index-integrations/readers/llama-index-readers-file/llama_index/readers/file/unstructured
dir_reader = SimpleDirectoryReader(
    "./documents",
    file_extractor={
        ".pdf": UnstructuredReader(),
        ".html": UnstructuredReader(),
        ".eml": UnstructuredReader(),
        ".csv": UnstructuredReader(),
        ".png": UnstructuredReader(),
        ".pptx": UnstructuredReader(),
    },
)
documents = dir_reader.load_data()

This function will be deprecated in a future release and `unstructured` will simply use the DEFAULT_MODEL from `unstructured_inference.model.base` to set default model name
This function will be deprecated in a future release and `unstructured` will simply use the DEFAULT_MODEL from `unstructured_inference.model.base` to set default model name


In [14]:
transformations=[
            SentenceSplitter(chunk_size=512, chunk_overlap=50),
        ]
pipeline = IngestionPipeline(
    transformations=transformations  
)
documents = pipeline.run(documents=documents)

In [15]:
index = VectorStoreIndex(documents)
index.storage_context.persist(persist_dir='./data/unstructured_index')

In [32]:
query_engine = index.as_query_engine()
response = query_engine.query("Who wrote the article about El Nino?")

In [33]:
print('Response: ', response.response)

Response:  Mary Gilbert


In [42]:
import pprint

for i in range(len(response.source_nodes)):
    node = response.source_nodes[i].text.strip()
    print(node[:200], f'\n\n\nScore:', response.source_nodes[i].score)
    print('-' * 50)

CNN

1/30/2024

A potent pair of atmospheric rivers will drench California as El Niño makes its first mark on winter

By Mary Gilbert, CNN Meteorologist

Updated: 3:49 PM EST, Tue January 30, 2024

So 


Score: 0.8289936828183206
--------------------------------------------------
A wet, cooler pattern is likely to extend well after the second atmospheric river wraps up.

Below average temperatures and above average precipitation amounts are likely to persist into mid-February, 


Score: 0.8049290459384449
--------------------------------------------------


In [43]:
response = query_engine.query("Which years of trade data are included?")
print('Response: ', response.response)

Response:  2018, 2019, 2020, 2021, 2022, and 2023


In [44]:
for i in range(len(response.source_nodes)):
    node = response.source_nodes[i].text.strip()
    print(node[:200], f'\n\n\nScore:', response.source_nodes[i].score)
    print('-' * 50)

China Australia 12 Coffee Company C 3 4 2023 Italy Spain 17 Oil Company E 5 1 2023 China India 13 Wheat Company C 1 4 2020 Germany Japan 1 Corn Company C 2 9 2021 USA Germany 10 Copper Company D 2 12  


Score: 0.8060611706373232
--------------------------------------------------
USA Germany 15 Corn Company A 1 6 2021 Spain UK 9 Soybeans Company A 2 1 2018 Italy Japan 11 Coffee Company B 5 4 2023 Italy France 11 Corn Company A 5 5 2021 UK Japan 5 Wheat Company D 1 3 2021 USA U 


Score: 0.8048798831075077
--------------------------------------------------


In [46]:
response = query_engine.query("Which companies is the trade data about?")
print('Response: ', response.response)

Response:  The trade data is about various companies such as Company A, Company B, Company C, Company D, and Company E involved in trading commodities like Corn, Soybeans, Coffee, Wheat, Copper, Oil, Gas, Cotton, Iron Ore, Coal, and Gas.


In [47]:
response = query_engine.query("What is the table in the pdf about?")
print('Response: ', response.response)

Response:  The table in the PDF appears to contain a series of calculations and results related to different models or systems. It includes various numerical values, some of which are followed by additions or changes in parentheses. The table also mentions different models such as GPT-3 175B, Codex (code-davinci-002), and PaLM 540B, along with their corresponding calculations and outcomes.


In [48]:
for i in range(len(response.source_nodes)):
    node = response.source_nodes[i].text.strip()
    print(node[:200], f'\n\n\nScore:', response.source_nodes[i].score)
    print('-' * 50)

calc 6.5 17.8 29.5 37.5 (+8.0) 46.6 (+6.5) 20.6 (-4.9) 42.1 40.1 25.5 53.4 20.6 43.2 57.9 (+14.7) 69.3 GPT-3 175B (text-davinci-002) Chain of thought 46.9 (+31.3) 68.9 (+3.2) 71.3 (+1.0) 35.8 (+11.0)  


Score: 0.7748590543065557
--------------------------------------------------
This key is your passport to the powerful features of the Unstructured API.

2. Explore the Pre-Built Data Connectors

The ability to connect with various data sources is crucial in data processing. U 


Score: 0.7488118810214556
--------------------------------------------------


In [54]:
# response not accurate
response = query_engine.query("What is being compared and on which metrics in the table in the pdf?")
print('Response: ', response.response)

Response:  Different AI models are being compared based on their performance improvements on various tasks. The metrics being compared include the increase in performance percentages for different models on a specific task.


In [55]:
for i in range(len(response.source_nodes)):
    node = response.source_nodes[i].text.strip()
    print(node[:200], f'\n\n\nScore:', response.source_nodes[i].score)
    print('-' * 50)

calc 6.5 17.8 29.5 37.5 (+8.0) 46.6 (+6.5) 20.6 (-4.9) 42.1 40.1 25.5 53.4 20.6 43.2 57.9 (+14.7) 69.3 GPT-3 175B (text-davinci-002) Chain of thought 46.9 (+31.3) 68.9 (+3.2) 71.3 (+1.0) 35.8 (+11.0)  


Score: 0.7885114158992066
--------------------------------------------------
India Spain 20 Coffee Company D 2 2 2021 Germany Australia 4 Oil Company E 1 10 2020 Australia India 0 Copper Company A 3 3 2018 Spain China 5 Iron Ore Company E 5 2 2019 Australia Germany 7 Cotton Co 


Score: 0.7582670661308508
--------------------------------------------------


In [56]:
response = query_engine.query("How is chain of prompting evaluated?")
print('Response: ', response.response)

Response:  Chain of thought prompting is evaluated based on experimental results for varying models and model sizes on all benchmarks. The evaluation involves comparing the performance of chain-of-thought prompting against standard prompting for various large language models on arithmetic reasoning benchmarks. The evaluation metrics used are accuracy percentages, and the results show that adding an external calculator significantly boosts the performance of chain-of-thought prompting on most tasks.


In [57]:
for i in range(len(response.source_nodes)):
    node = response.source_nodes[i].text.strip()
    print(node[:200], f'\n\n\nScore:', response.source_nodes[i].score)
    print('-' * 50)

B All Experimental Results

This section contains tables for experimental results for varying models and model sizes, on all benchmarks, for standard prompting vs. chain-of-thought prompting.

For the 


Score: 0.8263621981825032
--------------------------------------------------
calc 6.5 17.8 29.5 37.5 (+8.0) 46.6 (+6.5) 20.6 (-4.9) 42.1 40.1 25.5 53.4 20.6 43.2 57.9 (+14.7) 69.3 GPT-3 175B (text-davinci-002) Chain of thought 46.9 (+31.3) 68.9 (+3.2) 71.3 (+1.0) 35.8 (+11.0)  


Score: 0.7525997777553156
--------------------------------------------------


In [58]:
response = query_engine.query("What is the image about?")
print('Response: ', response.response)

Response:  The image is related to a document or file named "transformer.png" located in the directory "/Users/davideliu/Desktop/projects/llm_testing/documents/".


In [59]:
for i in range(len(response.source_nodes)):
    node = response.source_nodes[i].text.strip()
    print(node[:200], f'\n\n\nScore:', response.source_nodes[i].score)
    print('-' * 50)

Nx

Add & Norm | Gada. Norm Add & Norm Multi- Head Attention SE a, @ ©

Positional Encoding

Input Embedding

Inputs

Output

Probabilities Add & Norm Feed Forward Add & Norm Multi- Head Attention Add 


Score: 0.743558686519107
--------------------------------------------------
A wet, cooler pattern is likely to extend well after the second atmospheric river wraps up.

Below average temperatures and above average precipitation amounts are likely to persist into mid-February, 


Score: 0.7309814958878271
--------------------------------------------------


In [60]:
response = query_engine.query("Which model in depicted in the picture?")
print('Response: ', response.response)

Response:  The model depicted in the picture is the Transformer model.


## Index analysis

In [66]:
nodes = index.docstore.docs.values()
print(len(nodes))

347


In [72]:
for n in nodes:
    print(n)

Node ID: ecf91828-f20d-41a7-af0d-6c29f3feda9e
Text: B All Experimental Results  This section contains tables for
experimental results for varying models and model sizes, on all
benchmarks, for standard prompting vs. chain-of-thought prompting.
For the arithmetic reasoning benchmarks, some chains of thought (along
with the equations produced) were correct, except the model performed
an arithmetic...
Node ID: d4215138-a244-4468-bcc0-1e96df042ace
Text: calc 6.5 17.8 29.5 37.5 (+8.0) 46.6 (+6.5) 20.6 (-4.9) 42.1 40.1
25.5 53.4 20.6 43.2 57.9 (+14.7) 69.3 GPT-3 175B (text-davinci-002)
Chain of thought 46.9 (+31.3) 68.9 (+3.2) 71.3 (+1.0) 35.8 (+11.0)
87.1 (+14.4) Standard 15.6 65.7 70.3 24.8 72.7 + ext. calc 49.6 70.3
71.1 35.8 87.5 Codex (code-davinci-002) Chain of thought 63.1 (+43.4)
76.4 (+6...
Node ID: 4048b767-e459-4a31-aef5-31b6baed2982
Text: CNN  1/30/2024  A potent pair of atmospheric rivers will drench
California as El Niño makes its first mark on winter  By Mary Gilbert,
CNN Mete