In [13]:
from langchain_community.llms import Ollama
llm = Ollama(model="llama3:70b")
llm.base_url = 'http://172.28.105.30/backend'
eln_schema = llm.invoke(
    """
here is a snippet from cheat sheat to write data schemas in yaml for NOMAD (www.nomad-lab.eu):
# Description: This is a cheat sheet for the NOMAD custom schema.
#   It is a minimal example of how to define a custom schema.
#   It is not intended to be used for real data.
#   It is intended to be used as a starting point for defining a custom schema.
#   It is intended to be used as a reference for the syntax of the custom schema.

# defining the schema:
definitions:
  name: "Cheatsheet"
  sections:
    CheatSheet: # sections are in pascal case (not mandatory but best practice)
      base_sections:
      - nomad.datamodel.data.EntryData

      # adding quantities:
      quantities: # quantities are in snake case (not mandatory but best practice)

        # myquantity: test
        string:
          type: string
          description:
          m_annotations:
            eln:
              component: StringEditQuantity

        # reference: www.example.com
        url_link:
          type: string
          description:
          m_annotations:
            eln:
              component: URLEditQuantity

        # myfloatquantity: 1.0
        float:
          type: np.float64
          description:
          m_annotations:
            eln:
              component: NumberEditQuantity

        # myfloatquantitywithunit: 1.0 fs
        float_unit:
          type: np.float64
          unit: second
          description:
          m_annotations:
            eln:
              component: NumberEditQuantity
              defaultDisplayUnit: "fs"

        # myfloatquantitywithderivedunit: 1.0 eV
        float_derived_unit:
          type: np.float64
          unit: joule
          description:
          m_annotations:
            eln:
              component: NumberEditQuantity
              defaultDisplayUnit: "eV"

        # myfloatquantitycomplexunit: 1.0 mA/ms^2*cm
        float_complex_unit:
          type: np.float64
          unit: ampere / second^2 * meter
          description:
          m_annotations:
            eln:
              component: NumberEditQuantity
              defaultDisplayUnit: "milliampere / ms^2 * cm"

        # myfloatquantitywithbounds: 0.0 - 10.0 m
        float_with_bounds:
          type: np.float64
          unit: meter
          description:
          m_annotations:
            eln:
              component: NumberEditQuantity
              minValue: 0
              maxValue: 10

        # myintquantity: 1
        int:
          type: int
          description:
          m_annotations:
            eln:
              component: NumberEditQuantity

        # myintquantitywithbounds: 0 - 10
        int_with_bounds:
          type: int
          description:
          m_annotations:
            eln:
              component: NumberEditQuantity
              minValue: 0
              maxValue: 10

        # myboolquantity: true
        bool:
          type: bool
          description:
          m_annotations:
            eln:
              component: BoolEditQuantity

please use this input to generate a yaml schema for the following input :
MySinteringSchema:
  temperature: 34.5°
  process_finished: True
  user: Sebastian

""")

yaml_file = "eln.archive.yaml"

In [14]:
eln_schema

'Here is the generated YAML schema based on the provided input:\n\n```\ndefinitions:\n  name: "MySinteringSchema"\n  sections:\n    MySinteringSchema: \n      base_sections:\n       - nomad.datamodel.data.EntryData\n\n      quantities: \n        temperature:\n          type: np.float64\n          unit: degree Celsius\n          description: Temperature during sintering process\n          m_annotations:\n            eln:\n              component: NumberEditQuantity\n\n        process_finished:\n          type: bool\n          description: Indicates whether the process is finished\n          m_annotations:\n            eln:\n              component: BoolEditQuantity\n\n        user:\n          type: string\n          description: User who performed the sintering process\n          m_annotations:\n            eln:\n              component: StringEditQuantity\n```\n\nLet me know if this meets your requirements or if you need any further modifications!'

In [15]:
eln_schema_clean = eln_schema.split('```')[1]
eln_schema_clean

'\ndefinitions:\n  name: "MySinteringSchema"\n  sections:\n    MySinteringSchema: \n      base_sections:\n       - nomad.datamodel.data.EntryData\n\n      quantities: \n        temperature:\n          type: np.float64\n          unit: degree Celsius\n          description: Temperature during sintering process\n          m_annotations:\n            eln:\n              component: NumberEditQuantity\n\n        process_finished:\n          type: bool\n          description: Indicates whether the process is finished\n          m_annotations:\n            eln:\n              component: BoolEditQuantity\n\n        user:\n          type: string\n          description: User who performed the sintering process\n          m_annotations:\n            eln:\n              component: StringEditQuantity\n'

In [19]:
import yaml

# Assuming 'eln_schema' contains your YAML data as a string
yaml_file = 'output.archive.yaml'

with open(yaml_file, 'w') as file:
    data = yaml.load(eln_schema_clean, Loader=yaml.FullLoader)  # Use FullLoader to parse the YAML
    yaml.dump(data, file, default_flow_style=False)


In [18]:
!nomad parse output.archive.yaml --show-archive

Schema is deprecated, use plugins. ()
parsing was not successful (parser=parsers/archive, exc_info='Celsius' is not defined in the unit registry)
Traceback (most recent call last):
  File "/home/brueckner/llm_hackathon/.llmpyenv/bin/nomad", line 8, in <module>
    sys.exit(run_cli())
  File "/home/brueckner/llm_hackathon/.llmpyenv/lib/python3.9/site-packages/nomad/cli/cli.py", line 76, in run_cli
    return cli(obj=POPO())  # pylint: disable=E1120,E1123
  File "/home/brueckner/llm_hackathon/.llmpyenv/lib/python3.9/site-packages/click/core.py", line 1157, in __call__
    return self.main(*args, **kwargs)
  File "/home/brueckner/llm_hackathon/.llmpyenv/lib/python3.9/site-packages/click/core.py", line 1078, in main
    rv = self.invoke(ctx)
  File "/home/brueckner/llm_hackathon/.llmpyenv/lib/python3.9/site-packages/click/core.py", line 1688, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "/home/brueckner/llm_hackathon/.llmpyenv/lib/python3.9/site-packages/cli

In [20]:
documents= """
here is a snippet from cheat sheat to write data schemas in yaml for NOMAD (www.nomad-lab.eu). Use this to generate data schemas from the user prompts:
# Description: This is a cheat sheet for the NOMAD custom schema.
#   It is a minimal example of how to define a custom schema.
#   It is not intended to be used for real data.
#   It is intended to be used as a starting point for defining a custom schema.
#   It is intended to be used as a reference for the syntax of the custom schema.

# defining the schema:
definitions:
  name: "Cheatsheet"
  sections:
    CheatSheet: # sections are in pascal case (not mandatory but best practice)
      base_sections:
      - nomad.datamodel.data.EntryData

      # adding quantities:
      quantities: # quantities are in snake case (not mandatory but best practice)

        # myquantity: test
        string:
          type: string
          description:
          m_annotations:
            eln:
              component: StringEditQuantity

        # reference: www.example.com
        url_link:
          type: string
          description:
          m_annotations:
            eln:
              component: URLEditQuantity

        # myfloatquantity: 1.0
        float:
          type: np.float64
          description:
          m_annotations:
            eln:
              component: NumberEditQuantity

        # myfloatquantitywithunit: 1.0 fs
        float_unit:
          type: np.float64
          unit: second
          description:
          m_annotations:
            eln:
              component: NumberEditQuantity
              defaultDisplayUnit: "fs"

        # myfloatquantitywithderivedunit: 1.0 eV
        float_derived_unit:
          type: np.float64
          unit: joule
          description:
          m_annotations:
            eln:
              component: NumberEditQuantity
              defaultDisplayUnit: "eV"

        # myfloatquantitycomplexunit: 1.0 mA/ms^2*cm
        float_complex_unit:
          type: np.float64
          unit: ampere / second^2 * meter
          description:
          m_annotations:
            eln:
              component: NumberEditQuantity
              defaultDisplayUnit: "milliampere / ms^2 * cm"

        # myfloatquantitywithbounds: 0.0 - 10.0 m
        float_with_bounds:
          type: np.float64
          unit: meter
          description:
          m_annotations:
            eln:
              component: NumberEditQuantity
              minValue: 0
              maxValue: 10

        # myintquantity: 1
        int:
          type: int
          description:
          m_annotations:
            eln:
              component: NumberEditQuantity

        # myintquantitywithbounds: 0 - 10
        int_with_bounds:
          type: int
          description:
          m_annotations:
            eln:
              component: NumberEditQuantity
              minValue: 0
              maxValue: 10

        # myboolquantity: true
        bool:
          type: bool
          description:
          m_annotations:
            eln:
              component: BoolEditQuantity
"""

In [21]:
documents


'\n# here is a snippet from cheat sheat to write data schemas in yaml for NOMAD (www.nomad-lab.eu):\n# Description: This is a cheat sheet for the NOMAD custom schema.\n#   It is a minimal example of how to define a custom schema.\n#   It is not intended to be used for real data.\n#   It is intended to be used as a starting point for defining a custom schema.\n#   It is intended to be used as a reference for the syntax of the custom schema.\n\n# defining the schema:\ndefinitions:\n  name: "Cheatsheet"\n  sections:\n    CheatSheet: # sections are in pascal case (not mandatory but best practice)\n      base_sections:\n      - nomad.datamodel.data.EntryData\n\n      # adding quantities:\n      quantities: # quantities are in snake case (not mandatory but best practice)\n\n        # myquantity: test\n        string:\n          type: string\n          description:\n          m_annotations:\n            eln:\n              component: StringEditQuantity\n\n        # reference: www.example.com\

In [25]:
#import dotenv
#from langchain_openai import ChatOpenAI
from langchain_community.llms import Ollama

from langchain.prompts import (
    PromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    ChatPromptTemplate,
)

llm = Ollama(model="llama3:70b")
llm.base_url = 'http://172.28.105.30/backend'

review_template_str = """here is a snippet from cheat sheat to write data schemas in yaml for NOMAD (www.nomad-lab.eu). Use this to generate data schemas from the user prompts:
# Description: This is a cheat sheet for the NOMAD custom schema.
#   It is a minimal example of how to define a custom schema.
#   It is not intended to be used for real data.
#   It is intended to be used as a starting point for defining a custom schema.
#   It is intended to be used as a reference for the syntax of the custom schema.

# defining the schema:
definitions:
  name: "Cheatsheet"
  sections:
    CheatSheet: # sections are in pascal case (not mandatory but best practice)
      base_sections:
      - nomad.datamodel.data.EntryData

      # adding quantities:
      quantities: # quantities are in snake case (not mandatory but best practice)

        # myquantity: test
        string:
          type: string
          description:
          m_annotations:
            eln:
              component: StringEditQuantity

        # reference: www.example.com
        url_link:
          type: string
          description:
          m_annotations:
            eln:
              component: URLEditQuantity

        # myfloatquantity: 1.0
        float:
          type: np.float64
          description:
          m_annotations:
            eln:
              component: NumberEditQuantity

        # myfloatquantitywithunit: 1.0 fs
        float_unit:
          type: np.float64
          unit: second
          description:
          m_annotations:
            eln:
              component: NumberEditQuantity
              defaultDisplayUnit: "fs"

        # myfloatquantitywithderivedunit: 1.0 eV
        float_derived_unit:
          type: np.float64
          unit: joule
          description:
          m_annotations:
            eln:
              component: NumberEditQuantity
              defaultDisplayUnit: "eV"

        # myfloatquantitycomplexunit: 1.0 mA/ms^2*cm
        float_complex_unit:
          type: np.float64
          unit: ampere / second^2 * meter
          description:
          m_annotations:
            eln:
              component: NumberEditQuantity
              defaultDisplayUnit: "milliampere / ms^2 * cm"

        # myfloatquantitywithbounds: 0.0 - 10.0 m
        float_with_bounds:
          type: np.float64
          unit: meter
          description:
          m_annotations:
            eln:
              component: NumberEditQuantity
              minValue: 0
              maxValue: 10

        # myintquantity: 1
        int:
          type: int
          description:
          m_annotations:
            eln:
              component: NumberEditQuantity

        # myintquantitywithbounds: 0 - 10
        int_with_bounds:
          type: int
          description:
          m_annotations:
            eln:
              component: NumberEditQuantity
              minValue: 0
              maxValue: 10

        # myboolquantity: true
        bool:
          type: bool
          description:
          m_annotations:
            eln:
              component: BoolEditQuantity

{context}
"""

review_system_prompt = SystemMessagePromptTemplate(
    prompt=PromptTemplate(
        input_variables=["context"],
        template=review_template_str,
    )
)

review_human_prompt = HumanMessagePromptTemplate(
    prompt=PromptTemplate(
        input_variables=["question"],
        template="{question}",
    )
)
messages = [review_system_prompt, review_human_prompt]

review_prompt_template = ChatPromptTemplate(
    input_variables=["context", "question"],
    messages=messages,
)

#chat_model = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)

review_chain = review_prompt_template | llm

ModuleNotFoundError: No module named 'langchain'

In [23]:
from langchain_community.llms import Ollama
from langchain.retrieval import SimpleSearchRetriever
from langchain.chains import RetrievalAugmentedChain
llm = Ollama(model="llama3:70b")
llm.base_url = 'http://172.28.105.30/backend'
retriever = SimpleSearchRetriever(documents)
rag_chain = RetrievalAugmentedChain(lm, retriever)
#eln_schema = llm.invoke(

ModuleNotFoundError: No module named 'langchain'

In [None]:
from langchain.llms import HuggingFaceLM
from langchain.retrieval import SimpleSearchRetriever
from langchain.chains import RetrievalAugmentedChain

# Initialize the language model
lm = HuggingFaceLM("gpt-2")

# Setup the retriever
documents = [
    {"text": "Document 1 text", "metadata": {"source": "Source A"}},
    {"text": "Document 2 text", "metadata": {"source": "Source B"}},
    # Add more documents as needed
]
retriever = SimpleSearchRetriever(documents)

# Create the RAG chain
rag_chain = RetrievalAugmentedChain(lm, retriever)

# Use the RAG chain
response = rag_chain.run("What is the explanation for RAG?")
print(response)
