# Local Mixtral

In [24]:
from langchain.schema.runnable import RunnableParallel, RunnablePassthrough, RunnableLambda
from operator import itemgetter
from langchain.chat_models import ChatOllama
from langchain.schema import get_buffer_string, OutputParserException, format_document
from langchain.callbacks.tracers import ConsoleCallbackHandler
from langchain.schema import StrOutputParser
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from alhazen.utils.output_parsers import JsonEnclosedByTextOutputParser

def _sanitize_output(text: str):
    _, after = text.split("```python")
    return after.split("```")[0]

sys_p = '''You are an expert python programmer. 
You write concise, well-documented, high-quality source code.'''

hum_p_1 = '''The following snippet of HTML enclosed in triple backticks is a table about 
large language models. Only using the HTML data, write a python function that plots 
a horizontal bar chart with the name of each large language model on the vertical axis and 
the number of parameters used in each model on the horizontal. Use the seaborn library.
Only generate valid python code in your answer.   

```{html}```
'''
prompt = ChatPromptTemplate.from_messages([
            ("system", sys_p),
            ("human", hum_p_1)])

model = ChatOllama(model='codellama:34b-python')

codegen_chain = prompt | model | StrOutputParser() 

In [25]:
import requests
from bs4 import BeautifulSoup

# Replace this URL with the one you want to scrape
url = "https://en.wikipedia.org/wiki/Large_language_model#List"

# Send a request to the URL and get the HTML content
response = requests.get(url)
html_content = response.text

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html_content, "html.parser")

# Find the table you want to extract (you may need to adjust the selector based on the page)
table = soup.find("table", {"class": "wikitable sortable"})

print(table)

<table class="wikitable sortable">
<tbody><tr>
<th>Name</th>
<th>Release date<sup class="reference" id="cite_ref-120"><a href="#cite_note-120">[a]</a></sup></th>
<th>Developer</th>
<th>Number of parameters<sup class="reference" id="cite_ref-121"><a href="#cite_note-121">[b]</a></sup></th>
<th>Corpus size
</th>
<th>Training cost (petaFLOP-day)</th>
<th>License<sup class="reference" id="cite_ref-122"><a href="#cite_note-122">[c]</a></sup></th>
<th>Notes
</th></tr>
<tr>
<td><a href="/wiki/GPT-1" title="GPT-1">GPT-1</a></td>
<td><span data-sort-value="000000002018-06-01-0000" style="white-space:nowrap">June 2018</span></td>
<td><a href="/wiki/OpenAI" title="OpenAI">OpenAI</a></td>
<td><span data-sort-value="117000000 !">117 million</span></td>
<td>
</td>
<td></td>
<td class="table-yes" style="background:#9EFF9E;vertical-align:middle;text-align:center;">MIT<sup class="reference" id="cite_ref-gpt1_123-0"><a href="#cite_note-gpt1-123">[120]</a></sup>
</td>
<td>First GPT model, decoder-only tr

In [26]:
psrc = codegen_chain.invoke({'html':str(table)}, config={'callbacks': [ConsoleCallbackHandler()]})
print(psrc)

[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence] Entering Chain run with input:
[0m{
  "html": "<table class=\"wikitable sortable\">\n<tbody><tr>\n<th>Name</th>\n<th>Release date<sup class=\"reference\" id=\"cite_ref-120\"><a href=\"#cite_note-120\">[a]</a></sup></th>\n<th>Developer</th>\n<th>Number of parameters<sup class=\"reference\" id=\"cite_ref-121\"><a href=\"#cite_note-121\">[b]</a></sup></th>\n<th>Corpus size\n</th>\n<th>Training cost (petaFLOP-day)</th>\n<th>License<sup class=\"reference\" id=\"cite_ref-122\"><a href=\"#cite_note-122\">[c]</a></sup></th>\n<th>Notes\n</th></tr>\n<tr>\n<td><a href=\"/wiki/GPT-1\" title=\"GPT-1\">GPT-1</a></td>\n<td><span data-sort-value=\"000000002018-06-01-0000\" style=\"white-space:nowrap\">June 2018</span></td>\n<td><a href=\"/wiki/OpenAI\" title=\"OpenAI\">OpenAI</a></td>\n<td><span data-sort-value=\"117000000 !\">117 million</span></td>\n<td>\n</td>\n<td></td>\n<td class=\"table-yes\" style=\"background:#9EFF9E;vertical-align

In [None]:

# Extract the header of the table
header = [th.text for th in table.tbody.tr.find_all("th")]

# Extract the rows of the table (excluding the header)
rows = []
for tr in table.tbody.find_all("tr"):
    cells = [td.text for td in tr.find_all("td")]
    if cells:
        rows.append(cells)

# Print the extracted data
print("Header:", header)
print("Rows:", rows)