In [None]:
import os
import pandas as pd
from dotenv import load_dotenv

if os.getcwd().split("/")[-1] == "sandbox":
    os.chdir("../")
    
    
load_dotenv()

In [77]:
df_dk = pd.read_excel("food_co2_estimator/data/DBv2.xlsx",sheet_name="DK")
df_gb = pd.read_excel("food_co2_estimator/data/DBv2.xlsx",sheet_name="DK")

In [None]:
df_dk

In [19]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
vector_store = Chroma(
    collection_name="test_db",
    embedding_function=embeddings,
    persist_directory="./sandbox/test_db",  # Where to save data locally, remove if not neccesary
)

In [None]:
from typing import Any, Dict, List
from uuid import uuid4

from langchain_core.documents import Document

emission_records_dk: List[Dict[str, Any]] = df_dk.to_dict(orient="records")
emission_records_gb: List[Dict[str, Any]] = df_gb.to_dict(orient="records")

documents = []
uuids = []
for id, (emission_record_dk, emission_record_gb) in enumerate(zip(emission_records_dk,emission_records_gb), 1):
    
    if "Name" in emission_record_gb:   
        documents.append(
            Document(
                page_content=emission_record_gb["Name"],
                metadata=emission_record_dk,
                id=id,
            )
        )
        uuids.append(str(uuid4()))
    

vector_store.add_documents(documents)

In [None]:

results = vector_store.similarity_search(query="lasagna sheets",k=5)
for res in results:
    print(f"* {res.page_content} =  {res.metadata['Total kg CO2e/kg']}")

In [None]:
retriever = vector_store.as_retriever(k=5)

retriever.batch(["tomat","kartoffel"])
    
     

In [None]:
from langchain_core.runnables import RunnablePassthrough


def parse_retriever_output(documents: List[Document]):
    results = {}
    for document in documents:
        if "Total kg CO2e/kg" in document.metadata.keys():
            emission = document.metadata['Total kg CO2e/kg']
            emission_rounded = round(float(emission),1)
            results[document.page_content] = f"{emission_rounded} kg CO2e / kg"
    return results


retriever_chain = retriever | parse_retriever_output


def batch_retriever(inputs: List[str]):
    return dict(zip(inputs,retriever_chain.batch(inputs)))



batch_retriever(["tomat","agurk"])




In [88]:
from food_co2_estimator.chains.recipe_extractor import get_recipe_extractor_chain
from food_co2_estimator.utils.url import get_url_text, get_full_url_text

In [90]:
recipe_extractor = get_recipe_extractor_chain(verbose=True)

In [91]:
url = "https://www.valdemarsro.dk/frikadeller/"
text = get_full_url_text(url)

In [None]:
text

In [None]:
# text = get_full_url_text(url)
text = get_url_text(url)
output = recipe_extractor.invoke(input=text)

In [None]:
output["text"]

In [48]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI

message = """
Given this list of ingredients, return the emissions in kg / Co2e kg provided the best matching emission option. 
{ingredients}

Ingredients emission options:
{context}
"""

prompt = ChatPromptTemplate.from_messages([("human", message)])

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0,verbose=True)


rag_chain = {"context": batch_retriever, "ingredients": RunnablePassthrough()} | prompt | llm

In [57]:
output = rag_chain.invoke(["500 g kartofler", "300 gram svinekød"])

In [None]:
print(output.content)

In [None]:
from deep_translator import GoogleTranslator


GoogleTranslator(source="dk",target="en")

In [None]:
from translate import Translator

translator = Translator(to_lang="en",from_lang="da")
translator.translate("500 g svinekød ")