In [64]:
from langchain.agents import AgentExecutor, create_openai_tools_agent, create_react_agent
from langchain_openai import ChatOpenAI
from langchain_core.utils.function_calling import convert_to_openai_function
from langchain.pydantic_v1 import BaseModel, Field
from langchain.tools import BaseTool, StructuredTool, tool
from langchain_community.agent_toolkits import JsonToolkit
from langchain_community.tools.json.tool import JsonSpec
from langchain.agents import create_json_agent
from langchain_community.utilities.gitlab import GitLabAPIWrapper
from langchain.callbacks import get_openai_callback
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.agents import Tool
from langchain_experimental.utilities import PythonREPL
from langchain import hub
from typing import Optional, Type
from langchain.callbacks.manager import AsyncCallbackManagerForToolRun, CallbackManagerForToolRun
from typing import List
from langchain_community.agent_toolkits.base import BaseToolkit
import dateparser
import operator
import datetime
import json
import re


In [65]:
llm = ChatOpenAI(model="gpt-4-0613", temperature=0)
data = json.load(open('metadata.json'))
data1 = json.load(open('metadata1.json'))

In [99]:
relevant_keys = ['title', 'date', 'source', 'type', 'register_num']

def format_string(s):
    relevant_keys = ['title', 'date', 'source', 'type', 'register_num']
    for relevant_key in relevant_keys:
        s = s.replace(relevant_key, f"e['{relevant_key}']")

    types = ['lecture', 'article', 'patent']
    for type_ in types:
        s = s.replace(type_, f"'{type_}'")

    s = s.replace("e['date']", "datetime.datetime.strptime(e[\'date\'], \'%Y-%m-%d\')")

    def replace_four_digit_numbers(text):
        pattern = r"\b(\d{4})\b"
        return re.sub(pattern, r"dateparser.parse('\1')", text)

    return replace_four_digit_numbers(s)

class TeslaToolGetDocuments(BaseTool):
    name = "tesla_tool_get_documents"
    description = """
    Can be used to list all relevant Nikola Tesla document entries from a dictionary in memory based on a condition.
    All entries are about documents written by Nikola Tesla, so don't consider him a source, sources are only considered for article publishers.
    Fields that are available for search are title(str), date(str), source(str), type(str, possible values lecture, article, patent), register_num(str)
    Operations that are available for search are "or, and, ==, <=, >=, <, >, !="
    The tool takes as input a string representing conditions for search delimited with a single whitespace.
    Example tool input:
        type == lecture and date < 1905 and date > 1900
    """
    return_direct: bool = True

    data: List
    
    def _run(self, tool_input: str, run_manager: Optional[CallbackManagerForToolRun] = None
    ) -> str:
        """Use the tool."""
        parsed_condition = format_string(tool_input)
        return [e for e in self.data if eval(parsed_condition)]

    async def _arun(
        self, tool_input: str, run_manager: Optional[AsyncCallbackManagerForToolRun] = None
    ) -> str:
        """Use the tool asynchronously."""
        self._run(tool_input)

In [100]:
custom_tool = TeslaToolGetDocuments(data=data)
llm = ChatOpenAI(model="gpt-3.5-turbo-1106")
tools = [custom_tool]
prompt = hub.pull("hwchase17/react")

In [101]:
agent = create_react_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [102]:
res = agent_executor.invoke({"input": "can you give me the list of articles and patents of Nikola Tesla between 1901 and 1905?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should use the tesla_tool_get_documents to search for articles and patents written by Nikola Tesla between 1901 and 1905.
Action: tesla_tool_get_documents
Action Input: type == article or type == patent and date >= 1901 and date <= 1905[0me['type'] == 'article' or e['type'] == 'patent' and datetime.datetime.strptime(e['date'], '%Y-%m-%d') >= dateparser.parse('1901') and datetime.datetime.strptime(e['date'], '%Y-%m-%d') <= dateparser.parse('1905')
[36;1m[1;3m['Signals to Mars Based on Hope of Life on Planet', "Nikola Tesla Tells of Country's War Problems", "Tesla's New Discovery", 'Tesla and Marconi', "Mr. Tesla's Vision", 'My Apparatus, Says Tesla', 'Sleep From Electricity', 'Can Bridge the Gap to Mars', 'Mr. Tesla on the Wireless Transmission of Power', 'Nikola Tesla Objects', 'Wireless on Railroads', "Tesla's Wireless Torpedo", 'The Future of the Wireless Art', 'Tesla on the Peary North Pole Expedition ', 'Reference to

In [None]:
agent_executor.tools[0].data

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.prompt_selector import ConditionalPromptSelector


In [None]:
# from langchain.llms import OpenAI
# from langchain.chains.prompt_selector import ConditionalPromptSelector
# from langchain.prompts import PromptTemplate

# # Set up the four LLMs
# initial_llm = OpenAI(model_name="text-ada-001")
# llm_1 = OpenAI(model_name="text-curie-001")
# llm_2 = OpenAI(model_name="text-babbage-001")
# llm_3 = OpenAI(model_name="text-davinci-002")

# # Define prompts for the three subsequent LLMs
# prompt_1 = PromptTemplate(input_variables=["input"], template="Prompt 1: {input}")
# prompt_2 = PromptTemplate(input_variables=["input"], template="Prompt 2: {input}") 
# prompt_3 = PromptTemplate(input_variables=["input"], template="Prompt 3: {input}")

# # Set up conditional logic to select prompt based on initial LLM output
# prompt_selector = ConditionalPromptSelector(
#     default_prompt=prompt_1,
#     conditionals=[
#         (lambda output: "condition_1" in output, prompt_2),
#         (lambda output: "condition_2" in output, prompt_3)
#     ]
# )

# # Take user input and generate initial LLM output
# user_input = "Enter your prompt here"
# initial_output = initial_llm(user_input)

# # Select prompt based on initial output and generate final output
# selected_prompt = prompt_selector.get_prompt(initial_output)
# final_output = selected_prompt.format_prompt(initial_output).to_string()

In [1]:
import pandas as pd
import sqlite3

# Load the JSON file into a pandas DataFrame
df = pd.read_json('metadata.json')

# Create a connection to the SQLite database
conn = sqlite3.connect('metadata.db')

# Create a table in the database
df.to_sql('metadata', conn)

# Close the connection
conn.close()

In [3]:
conn = sqlite3.connect('metadata.db')


In [33]:
# Execute a SQL query to select all rows from the 'metadata' table
query = "SELECT * FROM metadata"

# Create a cursor object from the connection
cursor = conn.cursor()

# Execute the query and fetch all rows
cursor.execute(query)
result = cursor.fetchall()

# Convert the result to a pandas DataFrame for easier viewing
df_result = pd.DataFrame(result, columns=[column[0] for column in cursor.description])

# Display the DataFrame
print(df_result)


Number of tables: 1


In [34]:
column_names = df_result.columns
print(column_names)


Index(['index', 'id', 'title', 'date', 'type', 'source', 'register_num',
       'file_url', 'file_OAI_id', 'assistant_OAI_id', 'fle_url'],
      dtype='object')


In [38]:
df_result[df_result['type'] == 'lecture']

Unnamed: 0,index,id,title,date,type,source,register_num,file_url,file_OAI_id,assistant_OAI_id,fle_url
176,176,208,On Mechanical and Electrical Oscillators,1893-08-25 00:00:00,lecture,Archives of the Nikola Tesla Museum,,6 On Mechanical and Electrical Oscillators (le...,file-HTiMgHMAfXh4yohdff4PAM6D,asst_X4gAhH13Qlg0qQPCPXf0fevQ,
177,177,209,On Electricity,1897-01-27 00:00:00,lecture,Archives of the Nikola Tesla Museum,,7 On Electricity (lecture).docx,file-qOu3STM95zdEy1iUmh2EheeK,asst_NYy5NG7ohncfI0d4VY3q4MeY,
178,178,210,High Frequency Oscillators for Electro-Therape...,1898-11-17 00:00:00,lecture,Archives of the Nikola Tesla Museum,,8 High Frequency Oscillators for Electro-Thera...,file-ynJinCOm0FTEGTHZiWyi4Loh,asst_awfY3mRnimRmSFb21HuHH15q,
179,179,211,A New System of Alternate Current Motors and T...,1888-05-16 00:00:00,lecture,Archives of the Nikola Tesla Museum,,A New System of Alternate Current Motors and T...,file-hmMlI3olGO9FZMDmEm99fIP6,asst_F3wI4D4KQrVPwQ9Q4HxLUBxC,
180,180,212,Experiments with Alternating Currents of Very ...,1891-05-20 00:00:00,lecture,Archives of the Nikola Tesla Museum,,Experiments With Alternating Currents of Very ...,file-KGzzEcOBy1uki7B9f7c02gHW,asst_4VK1RTjCHcO24XG716AKqJ2z,
181,181,213,Experiments with Alternate Currents of High Po...,1892-02-01 00:00:00,lecture,Archives of the Nikola Tesla Museum,,Experiments With Alternate Currents of High Po...,file-Z9CUj8oKDENLB8EhV9vHdPGa,asst_nHsymJxwj7dNnIfuDCY0Z02c,
313,313,376,On Light and Other High Frequency Phenomena,1893-02-24 00:00:00,lecture,Archives of the Nikola Tesla Museum,,On Light and Other High Frequency Phenomena.docx,file-Er0S9O1PzPvAb34OfEnB8Nji,asst_WbcviPJ0kesx6Heq9tw1pDTx,
314,314,377,Lecture before the New York Academy of Science,1897-04-06 00:00:00,lecture,Archives of the Nikola Tesla Museum,,Lecture Before the New York Academy of Science...,file-hu7VYPQHfloGhsz9XxOwPf3Z,asst_STIVPrTFqPHMhrg6tvqox740,


In [1]:
from langchain_community.utilities.sql_database import SQLDatabase
from langchain_community.agent_toolkits import create_sql_agent
from langchain_openai import ChatOpenAI

db = SQLDatabase.from_uri("sqlite:///metadata.db")
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
agent_executor = create_sql_agent(llm, db=db, agent_type="openai-tools", verbose=True, top_k=200)

In [44]:
sql_prompt = """
You have a single table called metadata that contains entries of files by Nikola Tesla, columns of interest are: 'index', 'id', 'title', 'date', 'type', 'source', 'register_num'.
Source is available only for articles, and is the publisher of the article.
Register_num is available only for patents.

Based on that answer the following question: {0}
"""
question = "can you list Nikola Tesla lectures between 1895 and 1903 chronologically?"
r = agent_executor.invoke(sql_prompt.format(question))



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_list_tables` with ``


[0m[38;5;200m[1;3mmetadata[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `{'table_names': 'metadata'}`


[0m[33;1m[1;3m
CREATE TABLE metadata (
	"index" INTEGER, 
	id INTEGER, 
	title TEXT, 
	date TIMESTAMP, 
	type TEXT, 
	source TEXT, 
	register_num TEXT, 
	file_url TEXT, 
	"file_OAI_id" TEXT, 
	"assistant_OAI_id" TEXT, 
	fle_url TEXT
)

/*
3 rows from metadata table:
index	id	title	date	type	source	register_num	file_url	file_OAI_id	assistant_OAI_id	fle_url
0	13	Signals to Mars Based on Hope of Life on Planet	1919-10-12 00:00:00	article	New York Tribune	None	Signals to Mars Based on Hope of Life on Planet UNI10.docx	file-dQMFNEoWLJgarLzEw4L5ii3j	asst_vcbJYNSJmCOtUvE6xz0FqQpt	None
1	15	Nikola Tesla Tells of Country's War Problems	1917-04-15 00:00:00	article	New York Herald	None	Nikola Tesla Tells of Country's War Problems.docx	file-KQqGw7z1mRcVUdTnSkf3Gmvh	asst_X93wCywCMN

In [8]:
import sqlite3
import pandas as pd

# Create a connection to the SQLite database
conn = sqlite3.connect('metadata.db')

# Close the connection
conn.close()
# Create a connection to the SQLite database
conn = sqlite3.connect('metadata.db')

# Execute a SQL query to select all rows from the table
query = "SELECT * FROM metadata"

# Create a cursor object from the connection
cursor = conn.cursor()

# Execute the query and fetch all rows
cursor.execute(query)
result = cursor.fetchall()

# Close the connection
conn.close()

# Convert the result to a pandas DataFrame for easier viewing
df_result = pd.DataFrame(result, columns=[column[0] for column in cursor.description])

# Display the DataFrame
# Load the JSON file
with open('assistant_summary.json') as f:
    json_data = json.load(f)

# Create a DataFrame from the JSON data
df_json = pd.DataFrame(json_data)

# Merge the two DataFrames based on the matching columns
df_merged = pd.merge(df_result, df_json, left_on='assistant_OAI_id', right_on='assistant_id')

# Display the merged DataFrame
print(df_merged)
# print(df_result)


     index   id                                            title  \
0        0   13  Signals to Mars Based on Hope of Life on Planet   
1        1   15     Nikola Tesla Tells of Country's War Problems   
2        2   16                            Tesla's New Discovery   
3        3   17                                Tesla and Marconi   
4        4   18                               Mr. Tesla's Vision   
..     ...  ...                                              ...   
354    354  419               Wireless Messages Beyond the Ocean   
355    355  420                           Wonders in Electricity   
356    356  421                            Zmai Iovan Iovanovich   
357    357  422           The Problem of Increasing Human Energy   
358    358  423   On Light and Other High Frequency Phenomena II   

                    date     type                                    source  \
0    1919-10-12 00:00:00  article                          New York Tribune   
1    1917-04-15 00:00:00 

In [15]:
titles = df_merged['title'].tolist()
summaries = df_merged['summary'].tolist()
print(titles[10])
print(summaries[10])


DYNAMO-ELECTRIC MACHINE
The document is titled "TESLA PATENT 359748 DYNAMO-ELECTRIC MACHINE" and describes Nikola Tesla's invention of an improvement in Dynamo-Electric Machines. The main objectives of the invention are to increase the efficiency of the machine and to facilitate and cheapen its construction. The invention is related to the magnetic frame, armature, and other construction features.


In [6]:
import json

# Load the JSON file
with open('assistant_summary.json') as f:
    data = json.load(f)

# Access the loaded data
# Example: print the content of the JSON file
print(data)


[{'assistant_id': 'asst_WVPUq16wJsIAv9HDC6VLLb6O', 'summary': 'The document is a patent by Nikola Tesla for improvements in Line Conductors for Systems of Electrical Distribution, dated February 6, 1894. It describes a conductor for electrical circuits composed of a wire for carrying the current, an insulated coating or covering, and a surrounding conducting sheath or screen divided into insulated sections. Additionally, it mentions the combination of a connection between the sheathing and the ground containing a condenser of very small capacity or its equivalent.'}, {'assistant_id': 'asst_ADkBrXbFW53urIozxanJfDDN', 'summary': 'The document is a patent specification by Nikola Tesla dated May 1, 1888, describing his invention of certain new and useful improvements in electro-magnetic motors. It details the construction and operation of the motor, including the use of coils, alternating-current generator, continuous current, and the method for effecting a progressive shifting of the pole