# Large Language Models

In [11]:
from langchain_dartmouth.llms import DartmouthLLM

## Baseline Completion Models

In [12]:
llm = DartmouthLLM("codellama-13b-python-hf", return_full_text=True)

response = llm.invoke("import socket\n\ndef ping_exponential_backoff(host: str):")

In [13]:
from IPython.display import display, Markdown

display(Markdown("```python\n" + response + "\n```"))

```python
import socket

def ping_exponential_backoff(host: str):
    """
    Returns a tuple containing:
    - the ping status (success/fail)
    - the ping latency (if successful, else None)
    """
    timeout = 1000
    while True:
        try:
            # send a ping packet to the host
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sock.settimeout(timeout / 1000)
            sock.connect((host, 80))
            return (True, timeout)
        except socket.error:
            # if the host is unreachable, try again
            timeout *= 2
        finally:
            sock.close()

def ping_exponential_backoff_iter(host: str):
    """
    Yields the ping status of the host (fail/success)
    """
    while True:
        yield ping_exponential_backoff(host)

```

## Instruction-Tuned Chat Models

In [14]:
from langchain_dartmouth.llms import ChatDartmouth

llm = ChatDartmouth(model_name="llama-3-1-8b-instruct")

In [15]:
response = llm.invoke("Hi there! Who are you?")

In [16]:
response

AIMessage(content='I\'m an artificial intelligence model known as Llama. Llama stands for "Large Language Model Meta AI."', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 23, 'prompt_tokens': 42, 'total_tokens': 65}, 'model_name': 'meta-llama/Meta-Llama-3.1-8B-Instruct', 'system_fingerprint': '2.2.0-sha-db7e043', 'finish_reason': 'eos_token', 'logprobs': None}, id='run-ed094057-1173-4df9-81bb-5726f89c4f41-0', usage_metadata={'input_tokens': 42, 'output_tokens': 23, 'total_tokens': 65})