In [None]:
import os
import pandas as pd
from dotenv import load_dotenv

if os.getcwd().split("/")[-1] == "sandbox":
    os.chdir("../")
    
    
load_dotenv()

In [77]:
df_dk = pd.read_excel("food_co2_estimator/data/DBv2.xlsx",sheet_name="DK")
df_gb = pd.read_excel("food_co2_estimator/data/DBv2.xlsx",sheet_name="DK")

In [None]:
df_dk

In [19]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
vector_store = Chroma(
    collection_name="test_db",
    embedding_function=embeddings,
    persist_directory="./sandbox/test_db",  # Where to save data locally, remove if not neccesary
)

In [None]:
from typing import Any, Dict, List
from uuid import uuid4

from langchain_core.documents import Document

emission_records_dk: List[Dict[str, Any]] = df_dk.to_dict(orient="records")
emission_records_gb: List[Dict[str, Any]] = df_gb.to_dict(orient="records")

documents = []
uuids = []
for id, (emission_record_dk, emission_record_gb) in enumerate(zip(emission_records_dk,emission_records_gb), 1):
    
    if "Name" in emission_record_gb:   
        documents.append(
            Document(
                page_content=emission_record_gb["Name"],
                metadata=emission_record_dk,
                id=id,
            )
        )
        uuids.append(str(uuid4()))
    

vector_store.add_documents(documents)

In [None]:

results = vector_store.similarity_search(query="lasagna sheets",k=5)
for res in results:
    print(f"* {res.page_content} =  {res.metadata['Total kg CO2e/kg']}")

In [None]:
retriever = vector_store.as_retriever(k=5)

retriever.batch(["tomat","kartoffel"])
    
     

In [None]:
from langchain_core.runnables import RunnablePassthrough


def parse_retriever_output(documents: List[Document]):
    results = {}
    for document in documents:
        if "Total kg CO2e/kg" in document.metadata.keys():
            emission = document.metadata['Total kg CO2e/kg']
            emission_rounded = round(float(emission),1)
            results[document.page_content] = f"{emission_rounded} kg CO2e / kg"
    return results


retriever_chain = retriever | parse_retriever_output


def batch_retriever(inputs: List[str]):
    return dict(zip(inputs,retriever_chain.batch(inputs)))



batch_retriever(["tomat","agurk"])




In [88]:
from food_co2_estimator.chains.recipe_extractor import get_recipe_extractor_chain
from food_co2_estimator.utils.url import get_url_text, get_full_url_text

In [127]:
recipe_extractor = get_recipe_extractor_chain(verbose=False)

In [128]:
url = "https://www.valdemarsro.dk/frikadeller/"
text = get_full_url_text(url)

In [129]:
text = get_url_text(url)
output = recipe_extractor.invoke(input=text)

In [135]:
def parse_recipe_output(output: Dict[str,Any]) -> List[str]:
    return output["text"].ingredients

In [139]:
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
extractor_chain = get_url_text | recipe_extractor | parse_recipe_output

In [None]:
extractor_chain.invoke(url)

In [141]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_openai import ChatOpenAI

from food_co2_estimator.output_parsers.sql_co2_estimator import CO2Emissions

message = """
Given this list of ingredients, return the emissions in kg / Co2e kg provided the best matching emission option. 
{ingredients}

Ingredients emission options:
{context}
"""

prompt = ChatPromptTemplate.from_messages([("human", message)])

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0,verbose=True)

structured_llm = llm.with_structured_output(CO2Emissions)

rag_chain = extractor_chain | {"context": batch_retriever, "ingredients": RunnablePassthrough()} | prompt | structured_llm

In [143]:
output = rag_chain.invoke(url)

In [None]:
output

In [None]:
from translate import Translator
from operator import itemgetter


translator = Translator(to_lang="en", from_lang="da")

def _translate_if_danish(inputs:List[str],language: str):
    if language == "en":
        return inputs
    
    inputs_str = ", ".join(inputs)
    translations = translator.translate(inputs_str)
    return translations.split(", ")

def translate_if_danish(_dict: Dict[str,str]):
    return _translate_if_danish(inputs=_dict["inputs"],language=_dict["language"])

 
_translate_if_danish(["tomat","kartoffel"],language="da")


In [146]:
from langchain_core.runnables import RunnableLambda

translation_chain = {"inputs": itemgetter("inputs"), "language": itemgetter("language")} | RunnableLambda(translate_if_danish) | rag_chain

In [None]:
output = translation_chain.invoke({"inputs":["tomat","kartoffel"],"language":"da"})

In [None]:
print(output.emissions[0])