In [None]:
%pip install langchain mysql-connector-python tiktoken

In [2]:
# import required libraries
from langchain.chat_models import ChatOpenAI
from langchain.chains import create_tagging_chain, create_tagging_chain_pydantic
from langchain.prompts import ChatPromptTemplate
import openai
from dotenv import load_dotenv
import os
import tiktoken
import time

In [3]:
# Setting up OpenAI API Key
openai.api_key=os.getenv('<OPENAI_API_KEY>')

In [4]:
# Connecting with MySQL database
import mysql.connector
myconn=mysql.connector.connect(host = "localhost", user = "root",passwd = "<password>", database = "llm")
cur=myconn.cursor()

if myconn:
    print("success")

success


In [5]:
# Initializing ChatOpenAI instance with required specifications

# For variation of top_p values
#llm = ChatOpenAI(model="gpt-4-0613",model_kwargs={"top_p":1.0})

# For variation of temperature
llm = ChatOpenAI(model="gpt-3.5-turbo-0613",temperature=0.0)

In [6]:
# Defining the structure for extracted information
schema = {
    "properties": {
        "Company Name": {"type": "string"},
        "Company URL":{"type":"string"},
        "Company Description":{"type":"string"},
        "Company HQ Country": {"type": "string"},
        "Funding amount":{"type":"string"},
        "Funding round":{"type":"string"},
        "Founder Name": {"type": "string"},
        "Founder Designation": {"type": "string"},
        "Investors Name": {"type": "string"},
        "Investment Amount": {"type": "integer"},
    }
}

In [7]:
# Creating a tagging chain
chain = create_tagging_chain(schema, llm)

In [8]:
# Configuration parameters used for model
print(chain.llm_kwargs)

{'functions': [{'name': 'information_extraction', 'description': 'Extracts the relevant information from the passage.', 'parameters': {'type': 'object', 'properties': {'Company Name': {'title': 'Company Name', 'type': 'string'}, 'Company URL': {'title': 'Company URL', 'type': 'string'}, 'Company Description': {'title': 'Company Description', 'type': 'string'}, 'Company HQ Country': {'title': 'Company HQ Country', 'type': 'string'}, 'Funding amount': {'title': 'Funding amount', 'type': 'string'}, 'Funding round': {'title': 'Funding round', 'type': 'string'}, 'Founder Name': {'title': 'Founder Name', 'type': 'string'}, 'Founder Designation': {'title': 'Founder Designation', 'type': 'string'}, 'Investors Name': {'title': 'Investors Name', 'type': 'string'}, 'Investment Amount': {'title': 'Investment Amount', 'type': 'integer'}}, 'required': []}}], 'function_call': {'name': 'information_extraction'}}


In [9]:
chain.output_parser

JsonOutputFunctionsParser()

In [12]:
article="""Battery recycling startup BatX raises $5 million in funding from Zephyr Peacock, Lets Venture
Synopsis
The startup said the funds will be used for market expansion and to scale up recycling operations across the country.

Lithium battery recycling startup BatX Energies said on Tuesday it has raised $5 million in funding from Zephyr Peacock and Lets Venture. Existing investors JITO Angel Network and the family offices of Mankind Pharma, Excel Industries and BluSmart also participated in the round.

The startup said the funds will be used for market expansion and to scale up recycling operations across the country.

“We are focused on reducing critical materials imports for India and producing them by recycling. With this growth capital, we will showcase developments in setting up industry standards. This investment signifies our unwavering commitment to reshaping the energy sector and circular economy,” said Utkarsh Singh, cofounder & CEO of BatX Energies.

The startup aims to invest in the scaling up of the Hydrogen electrode-based critical material extraction and refining capacities, and commercial R&D for PCAM development from recycled battery minerals. Made through precipitation, precursor cathode active materials (PCAM) are a mixed-metal hydroxide of nickel, cobalt, and other chemical elements.

This will enable a full-stack recycling and refurbishing solution for original equipment manufacturers (OEMs) and their customers.
"""

In [13]:
# Counting number of Input Tokens
encoding = tiktoken.encoding_for_model("gpt-4-0613")
token_count = len(encoding.encode(article))
print(f"The text contains {token_count} tokens.")

The text contains 278 tokens.


In [11]:
# Execute the chain and record time taken
import time

start_time=time.time()
results=chain.run(article)
end_time=time.time()
time_taken=end_time-start_time
print(results)
print("Time Taken: ",time_taken)

In [30]:
# Structure the output of model f
extracted_info = {}

for field, field_schema in schema["properties"].items():
    if field in results:
        expected_type = field_schema["type"]
        actual_value = results[field]

        if expected_type == "string" and isinstance(actual_value, str):
            extracted_info[field] = actual_value
        elif expected_type == "integer" and isinstance(actual_value, int):
            extracted_info[field] = actual_value
        else:
            extracted_info[field] = None
    else:
        extracted_info[field] = None

print(extracted_info)

{'Company Name': 'BatX Energies', 'Company URL': None, 'Company Description': None, 'Company HQ Country': None, 'Funding amount': '$5 million', 'Funding round': 'Not specified', 'Founder Name': 'Utkarsh Singh', 'Founder Designation': 'cofounder & CEO', 'Investors Name': 'Zephyr Peacock, Lets Venture, JITO Angel Network, the family offices of Mankind Pharma, Excel Industries and BluSmart', 'Investment Amount': None}


In [14]:
# Export and store extracted information to database
def insert_into_db(output,article,model):
    query="insert into extraction_results(article_id,model_id) values(%s,%s);"
    val=(article,model)
    try:  
        cur.execute(query,val)   
        myconn.commit()  
        print("New record created!")
    
    except:  
        myconn.rollback()  
        print("error")

    query = """
        UPDATE extraction_results 
        SET 
            Company_Name = %s,
            Company_url = %s,
            Company_description = %s,
            HQ_country = %s,
            Funding_amount = %s,
            funding_round = %s,
            Founder_name = %s,
            Founder_designation = %s,
            Investor_name = %s,
            Investment_amount = %s
        WHERE article_id = %s AND model_id = %s;
    """

    values = (
        output.get("Company Name"),
        output.get("Company URL"),
        output.get("Company Description"),
        output.get("Company HQ Country"),
        output.get("Funding amount"),
        output.get("Funding round"),
        output.get("Founder Name"),
        output.get("Founder Designation"),
        output.get("Investors Name"),
        output.get("Investment Amount"),
        article,
        model
        
    )

    try:  
        cur.execute(query,values)   
        myconn.commit()  
        print("Information Updated!")
    
    except:  
        myconn.rollback() 
        print("error")

In [31]:
insert_into_db(results,2,8)

New record created!
Information Updated!
