## Setting up OpenAI Model

In [1]:
# Install llama-index libraries
%pip install llama-index-llms-openai llama-index-experimental



In [2]:
import openai
import os

os.environ["OPENAI_API_KEY"] = "ADD_KEY_HERE"

In [3]:
from llama_index.core.query_pipeline import (
    QueryPipeline as QP,
    Link,
    InputComponent,
)
from llama_index.experimental.query_engine.pandas import (
    PandasInstructionParser,
)
from llama_index.llms.openai import OpenAI
from llama_index.core import PromptTemplate

## Reading Pandas Dataframe

In [4]:
# Reading pandas dataframe containing aluminum design allowables

import pandas as pd

df = pd.read_csv("/content/2024_Aluminum_Design_Properties.csv")
df.head()

Unnamed: 0,Material,Specification,Form,Basis,Min Thickness,Max Thickness,F_tu ksi (L),F_tu ksi (LT),F_tu ksi (ST),F_ty ksi (L),...,F_cy ksi (L),F_cy ksi (LT),F_cy ksi (ST),F_su ksi (L & LT),F_bru ksi (L & LT) e/D=1.5,F_bru ksi (L & LT) e/D=2.0,F_bry ksi (L & LT) e/D=1.5,F_bry ksi (L & LT) e/D=2.0,e_ percent (S-Basis),Elastic_Modulus ksi
0,2024 Aluminum,Plate (T351),AMS 4037 and AMS-QQ-A-250/4,A,0.25,0.499,64,64,64,48,...,39,45,45,38,97,119,72,86,12,10.7
1,2024 Aluminum,Plate (T351),AMS 4037 and AMS-QQ-A-250/4,B,0.25,0.499,66,66,66,50,...,41,47,47,39,100,122,76,90,12,10.7
2,2024 Aluminum,Plate (T351),AMS 4037 and AMS-QQ-A-250/4,A,0.5,1.0,63,63,63,48,...,39,45,45,37,95,117,72,86,8,10.7
3,2024 Aluminum,Plate (T351),AMS 4037 and AMS-QQ-A-250/4,B,0.5,1.0,65,65,65,50,...,41,47,47,38,98,120,76,90,8,10.7
4,2024 Aluminum,Plate (T351),AMS 4037 and AMS-QQ-A-250/4,A,1.0,1.5,62,62,62,47,...,39,44,44,37,94,115,72,86,7,10.7


## Setting up the prompt

In [5]:
instruction_str = (
    "1. Convert the query to executable Python code using Pandas.\n"
    "2. The final line of code should be a Python expression that can be called with the `eval()` function.\n"
    "3. The code should represent a solution to the query.\n"
    "4. PRINT ONLY THE EXPRESSION.\n"
    "5. Do not quote the expression.\n"
)

pandas_prompt_str = (
    "You are working with a pandas dataframe in Python.\n"
    "The name of the dataframe is `df`.\n"
    "This is the result of `print(df.head())`:\n"
    "{df_str}\n\n"
    "Follow these instructions:\n"
    "{instruction_str}\n"
    "Query: {query_str}\n\n"
    "Expression:"
)
response_synthesis_prompt_str = (
    "Given an input question, synthesize a response from the query results.\n"
    "Query: {query_str}\n\n"
    "Pandas Instructions (optional):\n{pandas_instructions}\n\n"
    "Pandas Output: {pandas_output}\n\n"
    "Response: "
)

pandas_prompt = PromptTemplate(pandas_prompt_str).partial_format(
    instruction_str=instruction_str, df_str=df.head(5)
)
pandas_output_parser = PandasInstructionParser(df)
response_synthesis_prompt = PromptTemplate(response_synthesis_prompt_str)
llm = OpenAI(model="gpt-3.5-turbo")

In [6]:
qp = QP(
    modules={
        "input": InputComponent(),
        "pandas_prompt": pandas_prompt,
        "llm1": llm,
        "pandas_output_parser": pandas_output_parser,
        "response_synthesis_prompt": response_synthesis_prompt,
        "llm2": llm,
    },
    verbose=True,
)
qp.add_chain(["input", "pandas_prompt", "llm1", "pandas_output_parser"])
qp.add_links(
    [
        Link("input", "response_synthesis_prompt", dest_key="query_str"),
        Link(
            "llm1", "response_synthesis_prompt", dest_key="pandas_instructions"
        ),
        Link(
            "pandas_output_parser",
            "response_synthesis_prompt",
            dest_key="pandas_output",
        ),
    ]
)
# add link from response synthesis prompt to llm2
qp.add_link("response_synthesis_prompt", "llm2")

In [7]:
response = qp.run(
    query_str="Tell me about aluminum and its lowest ftu number at stock thickness 0.25 - 0.49",
)

[1;3;38;2;155;135;227m> Running module input with input: 
query_str: Tell me about aluminum and its lowest ftu number at stock thickness 0.25 - 0.49

[0m[1;3;38;2;155;135;227m> Running module pandas_prompt with input: 
query_str: Tell me about aluminum and its lowest ftu number at stock thickness 0.25 - 0.49

[0m[1;3;38;2;155;135;227m> Running module llm1 with input: 
messages: You are working with a pandas dataframe in Python.
The name of the dataframe is `df`.
This is the result of `print(df.head())`:
        Material Specification                         Form Basis  \
0  ...

[0m[1;3;38;2;155;135;227m> Running module pandas_output_parser with input: 
input: assistant: df[(df['Material Specification'].str.contains('Aluminum')) & 
   (df['Min Thickness'] >= 0.25) & 
   (df['Max Thickness'] <= 0.49)]['F_tu ksi (L)'].min()

[0m[1;3;38;2;155;135;227m> Running module response_synthesis_prompt with input: 
query_str: Tell me about aluminum and its lowest ftu number at stock thickn

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/pandas/core/indexes/base.py", line 3791, in get_loc
    return self._engine.get_loc(casted_key)
  File "index.pyx", line 152, in pandas._libs.index.IndexEngine.get_loc
  File "index.pyx", line 181, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 7080, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 7088, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Material Specification'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/llama_index/experimental/query_engine/pandas/output_parser.py", line 54, in default_output_processor
    output_str = str(safe_eval(module_end_str, global_vars, local_vars))
  File "/usr/local/lib/python3.10/dist-packages/llama_index/experimental/exec_utils.py

In [8]:
print(response.message.content)

The lowest Ftu number for aluminum at a stock thickness of 0.25 - 0.49 is not available due to an error in running the Python code.


In [9]:
response2 = qp.run(
    query_str="What is the lowest ftu allowable for 2024 aluminum alloy plate?",
)

[1;3;38;2;155;135;227m> Running module input with input: 
query_str: What is the lowest ftu allowable for 2024 aluminum alloy plate?

[0m[1;3;38;2;155;135;227m> Running module pandas_prompt with input: 
query_str: What is the lowest ftu allowable for 2024 aluminum alloy plate?

[0m[1;3;38;2;155;135;227m> Running module llm1 with input: 
messages: You are working with a pandas dataframe in Python.
The name of the dataframe is `df`.
This is the result of `print(df.head())`:
        Material Specification                         Form Basis  \
0  ...

[0m[1;3;38;2;155;135;227m> Running module pandas_output_parser with input: 
input: assistant: df['F_tu ksi (L)'].min()

[0m[1;3;38;2;155;135;227m> Running module response_synthesis_prompt with input: 
query_str: What is the lowest ftu allowable for 2024 aluminum alloy plate?
pandas_instructions: assistant: df['F_tu ksi (L)'].min()
pandas_output: 62

[0m[1;3;38;2;155;135;227m> Running module llm2 with input: 
messages: Given an inpu

In [10]:
print(response2.message.content)

The lowest allowable ultimate tensile strength (Ftu) for 2024 aluminum alloy plate is 62 ksi.


In [11]:
response3 = qp.run(
    query_str="what is lowest ftu B basis allowable for 2024 aluminum alloy plate at stock thickness between 0.25 and 0.49",
)

[1;3;38;2;155;135;227m> Running module input with input: 
query_str: what is lowest ftu B basis allowable for 2024 aluminum alloy plate at stock thickness between 0.25 and 0.49

[0m[1;3;38;2;155;135;227m> Running module pandas_prompt with input: 
query_str: what is lowest ftu B basis allowable for 2024 aluminum alloy plate at stock thickness between 0.25 and 0.49

[0m[1;3;38;2;155;135;227m> Running module llm1 with input: 
messages: You are working with a pandas dataframe in Python.
The name of the dataframe is `df`.
This is the result of `print(df.head())`:
        Material Specification                         Form Basis  \
0  ...

[0m[1;3;38;2;155;135;227m> Running module pandas_output_parser with input: 
input: assistant: df[(df['Material Specification'].str.contains('2024 Aluminum')) & 
   (df['Form Basis'] == 'B') & 
   (df['Min Thickness'] >= 0.25) & 
   (df['Max Thickness'] <= 0.49)]['F_tu ksi (LT)'].min...

[0m[1;3;38;2;155;135;227m> Running module response_synthesis_

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/pandas/core/indexes/base.py", line 3791, in get_loc
    return self._engine.get_loc(casted_key)
  File "index.pyx", line 152, in pandas._libs.index.IndexEngine.get_loc
  File "index.pyx", line 181, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 7080, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 7088, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Material Specification'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/llama_index/experimental/query_engine/pandas/output_parser.py", line 54, in default_output_processor
    output_str = str(safe_eval(module_end_str, global_vars, local_vars))
  File "/usr/local/lib/python3.10/dist-packages/llama_index/experimental/exec_utils.py

In [12]:
print(response3.message.content)

The lowest allowable Ftu (ultimate tensile strength) for 2024 aluminum alloy plate with a B basis at a stock thickness between 0.25 and 0.49 is not available due to an error in running the Python code.


In [13]:
response4 = qp.run(
    query_str="What is the average of F_tu mechanical property allowables for 2024 aluminum?",
)

[1;3;38;2;155;135;227m> Running module input with input: 
query_str: What is the average of F_tu mechanical property allowables for 2024 aluminum?

[0m[1;3;38;2;155;135;227m> Running module pandas_prompt with input: 
query_str: What is the average of F_tu mechanical property allowables for 2024 aluminum?

[0m[1;3;38;2;155;135;227m> Running module llm1 with input: 
messages: You are working with a pandas dataframe in Python.
The name of the dataframe is `df`.
This is the result of `print(df.head())`:
        Material Specification                         Form Basis  \
0  ...

[0m[1;3;38;2;155;135;227m> Running module pandas_output_parser with input: 
input: assistant: df['F_tu ksi (L)'].mean()

[0m[1;3;38;2;155;135;227m> Running module response_synthesis_prompt with input: 
query_str: What is the average of F_tu mechanical property allowables for 2024 aluminum?
pandas_instructions: assistant: df['F_tu ksi (L)'].mean()
pandas_output: 64.0

[0m[1;3;38;2;155;135;227m> Running mo

In [14]:
print(response4.message.content)

The average F_tu mechanical property allowable for 2024 aluminum is 64.0 ksi.


In [18]:
response5 = qp.run(
    query_str="For 2024 aluminum plate T351 Basis A and thickness equal to 0.40, what is the F_tu mechanical property allowable?",
)

[1;3;38;2;155;135;227m> Running module input with input: 
query_str: For 2024 aluminum plate T351 Basis A and thickness equal to 0.40, what is the F_tu mechanical property allowable?

[0m[1;3;38;2;155;135;227m> Running module pandas_prompt with input: 
query_str: For 2024 aluminum plate T351 Basis A and thickness equal to 0.40, what is the F_tu mechanical property allowable?

[0m[1;3;38;2;155;135;227m> Running module llm1 with input: 
messages: You are working with a pandas dataframe in Python.
The name of the dataframe is `df`.
This is the result of `print(df.head())`:
        Material Specification                         Form Basis  \
0  ...

[0m[1;3;38;2;155;135;227m> Running module pandas_output_parser with input: 
input: assistant: df[(df['Material Specification'] == '2024 Aluminum  Plate (T351)') & (df['Basis'] == 'A') & (df['Min Thickness'] == 0.40)]['F_tu ksi (L)'].values[0]

[0m[1;3;38;2;155;135;227m> Running module response_synthesis_prompt with input: 
query_str: F

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/pandas/core/indexes/base.py", line 3791, in get_loc
    return self._engine.get_loc(casted_key)
  File "index.pyx", line 152, in pandas._libs.index.IndexEngine.get_loc
  File "index.pyx", line 181, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 7080, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 7088, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Material Specification'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/llama_index/experimental/query_engine/pandas/output_parser.py", line 54, in default_output_processor
    output_str = str(safe_eval(module_end_str, global_vars, local_vars))
  File "/usr/local/lib/python3.10/dist-packages/llama_index/experimental/exec_utils.py