In [84]:
import openai
from openai import OpenAI
import os
import requests
import logging
from datetime import datetime

In [85]:
os.environ['OPENAI_API_KEY'] = '<OPENAI_API_KEY>'  # Replace with your actual API key
client = OpenAI()

# Set the URL
url = 'http://47.253.94.63:8900/search_academic_db'

# Define the headers
headers = {
    'accept': 'application/json',
    'Authorization': 'Bearer <Authorization_key>',
    'Content-Type': 'application/json'
}

In [86]:
def generate_embedding(prompt_processes,top_k):
    payload_processes = {
    "query": f"output of {prompt_processes}",
    "top_k": top_k
    }
    response_processes = requests.post(url, headers=headers, json=payload_processes)

    # Check if the request was successful
    if response_processes.status_code == 200:
        output_embedding = response_processes.json()
        output_embedding_reference = output_embedding["result"]
    else:
        print("Request failed with status code:", response_processes.status_code)
        Warning
    
    return output_embedding_reference

In [87]:
def ask_chatgpt_embedding(prompt_processes,top_k,model):
    try:
      prompt_processes_embedding = generate_embedding(prompt_processes,top_k)

      response = client.chat.completions.create(
         model=model,
         messages=[
            {
                "role": "system",
                "content": "You are an academic reseacrcher learning processes and data for life cycle assessment."
               },
            {
                "role": "user",
                "content": f"""
Must Follow:
- List the input and output of production process named "{prompt_processes}" in particular substance matter name, in the same language.
- Consider emissions as output
- Using information from "{prompt_processes_embedding}" (if available) and your own knowledge to provide a logical, clear, and critically analyzed reply.
- Employ bullet points selectively, where they add clarity or organization.
- Cite sources in main text using the Author-Date citation style where applicable.
- Provide a list of references in markdown format of [title.journal.authors.date.](hyperlinks) at the end (or just the source file name), only for the references mentioned in the generated text.
- Use LaTeX quoted by '$' or '$$' within markdown to render mathematical formulas.

Must Avoid:
- Decript anything other than input and output
- List ambiguous broad category names
- Using "e.g." or using "such as"
- Repeat the human's query.
- Translate cited references into the query's language.
- Preface responses with any designation such as "AI:"."""
            }
            ],
        temperature=0.8,
        max_tokens=2000,
        top_p=1
        )
      revised_response = response.choices[0].message.content+"\n\nThe Used Token \n"+str(response.usage.total_tokens)
      return revised_response
    except Exception as e:
      return str(e)

In [88]:
def log_and_generate_markdown(prompt, model, top_k, response):
    # Current date and time
    now = datetime.now()
    date_today = now.strftime('%Y-%m-%d')
    time_now = now.strftime('%H:%M:%S')
    
    # Get response from the chat function
    #response = ask_chatgpt_embedding(prompt, tokens, model)
    
    # Markdown content
    markdown_content = f"""
**Date:** {date_today}  
**Time:** {time_now}  
**Model:** {model}  
**Num of Embeddings:** {str(top_k)}  
**Prompt:** {prompt}  
**Response:**  
{response}
"""
    # Write to a markdown file
    with open(f'out_md/{date_today}_{time_now}.md', 'w') as file:
        file.write(markdown_content)

In [90]:
# Example usage
prompt = "modern dry process, five-stage kiln external decomposition, 4000 t/d"
response = ask_chatgpt_embedding(prompt, 10, model="gpt-4o")
log_and_generate_markdown(prompt, "gpt-4o", 10, response)


In [91]:
print(generate_embedding("new dry process, 2000-4000 t/d",3))

[{'content': 'II. GENERAL PLANT DESCRIPTION 1) Location: The new 2700 tons per day (t/d) nominal capacity line, expected to produce 3000 t/d, is located alongside the new dock, at a distance of approximately 1 km from the original 2200 t/d capacity wet process plant.\n\n2) Rawt? Mix: The raw mix consists of three main com- ponents, which are limestone, black and white shale, plus sand and iron ore additives.\n\nInitially, w<e had planned to install a centralized manual control system and add automation at a later date. How- ever, after thorough reflection it was concluded that in- stalling computer control in an existing plant would even-', 'source': '[The New Integrated and Automated Production Line at Pertigalete. IEEE TRANSACTIONS ON INDUSTRY APPLICATIONS. Carlos Meneses, Juan Cayo De Azcarate, Pompeyo Rios, Alfredo Mujica, Pablo A. Useche, Herbert Egger, Carlos Tumang. 1975-09.](https://doi.org/10.1109/tia.1975.349368)'}, {'content': 'Cases 5 and 6: In Cases 5 and 6 the existing we

In [92]:
print(generate_embedding("new dry process, <2000 t/d",3))

[{'content': 'II. GENERAL PLANT DESCRIPTION 1) Location: The new 2700 tons per day (t/d) nominal capacity line, expected to produce 3000 t/d, is located alongside the new dock, at a distance of approximately 1 km from the original 2200 t/d capacity wet process plant.\n\n2) Rawt? Mix: The raw mix consists of three main com- ponents, which are limestone, black and white shale, plus sand and iron ore additives.\n\nInitially, w<e had planned to install a centralized manual control system and add automation at a later date. How- ever, after thorough reflection it was concluded that in- stalling computer control in an existing plant would even-', 'source': '[The New Integrated and Automated Production Line at Pertigalete. IEEE TRANSACTIONS ON INDUSTRY APPLICATIONS. Carlos Meneses, Juan Cayo De Azcarate, Pompeyo Rios, Alfredo Mujica, Pablo A. Useche, Herbert Egger, Carlos Tumang. 1975-09.](https://doi.org/10.1109/tia.1975.349368)'}, {'content': 'Cases 5 and 6: In Cases 5 and 6 the existing we