In [1]:
from langchain_community.llms import Ollama

llm = Ollama(model="mistral")

In [2]:
llm.invoke("tell me a short quote from someone great")

' "Be the change you wish to see in the world." - Mahatma Gandhi'

In [3]:
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader(
    web_path = "https://www.langchain.com/langsmith"
)

docs = loader.load()

In [4]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200,
    add_start_index = True
)


In [5]:
all_splits = text_splitter.split_documents(docs)

In [11]:
from langchain_community import embeddings

embedding = embeddings.OllamaEmbeddings(
    model = "nomic-embed-text"
)

In [12]:
from langchain_community.vectorstores import Chroma
vectorstore = Chroma.from_documents(
    documents = all_splits,
    embedding = embedding
)

In [13]:
retriever = vectorstore.as_retriever(
    search_type = "similarity",
    search_kwargs = {"k":10}
)

In [15]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder


contextualize_q_system_prompt = """Given a Chat History and the latest user question \ 
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question \ 
just reformulate it if needed and otherwise return it as it is."""

In [17]:
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

In [18]:
from langchain_core.output_parsers import StrOutputParser

contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser()

In [19]:
from langchain_core.messages import AIMessage, HumanMessage

contextualize_q_chain.invoke(
    {
        "chat_history":[
            HumanMessage(content = "What Does LLM Stand For?"),
            AIMessage(content="Large language model"),
        ],
        "question" : "What ius meant by large ?"
    }
)

' What do you mean by "large" in the context of a large language model?'

In [1]:
from langchain.schema import AIMessage, HumanMessage, SystemMessage

AIMessage(content="This is content")

AIMessage(content='This is content')

In [2]:
chat = []
chat.append(AIMessage(content="this is content"))

In [3]:
chat

[AIMessage(content='this is content')]

In [17]:
HumanMessage(content="content").dict()['type']

'human'

In [24]:
from langchain_community.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.chains import RetrievalQA
from langchain_community.llms import OpenAI
from dotenv import load_dotenv, find_dotenv
import os
import streamlit as st
import io
from langchain import hub
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community import embeddings
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.schema import AIMessage, HumanMessage, SystemMessage
from langchain_community.llms import Ollama
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain.chains import create_retrieval_chain
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings

loader = CSVLoader("test.csv")
text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000, chunk_overlap=200, add_start_index=True
        )
docs = loader.load()
# all_splits = text_splitter.split_documents(docs)
embedding = embeddings.OllamaEmbeddings(model="nomic-embed-text")
vectorstore = Chroma.from_documents(documents=docs, embedding=embedding)

In [25]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 10})

In [26]:
retriever.get_relevant_documents("Hi how are you")

[Document(page_content='Number; Incident State; Active; Reassignment Count; Reopen Count; Sys Mod Count; Made SLA; Caller ID; Opened By; Opened At; Sys Created By; Sys Created At; Sys Updated By; Sys Updated At; Contact Type; Location; Category; Subcategory; U Symptom; CMDB CI; Impact; Urgency; Priority; Assignment Group; Assigned To; Knowledge; U Priority Confirmation; Notify; Problem ID; RFC; Vendor; Caused By; Closed Code; Resolved By; Resolved At; Incident Description; Notes: INC0000064; Closed; false;0;0;6; true; Caller 1034; Opened by 180; 29/2/2016 07:14; Created by 81; 29/2/2016 07:18; Updated by 908; 7/3/2016 15:00; Phone; Location 150; Category 10; Subcategory 77; Symptom 471; ?; 2 - Medium; 2 - Medium; 3 - Moderate; Group 60; Resolver 98; true; false; Do Not Notify; ?; ?; ?; code 6; Resolved by 61; 1/3/2016 07:21; 7/3/2016 15:00; Caller 1034 reported slow network performance. The issue was identified as a bandwidth bottleneck by Resolver 98. After optimizing network traffic 

In [39]:
import pandas as pd
import numpy as np
from langchain.docstore.document import Document

columns_to_embed = ["Number", " Incident Description", " Notes"]
columns_to_metadata = [
    "Number",
    " Incident State",
    " Active",
    " Reassignment Count",
    " Reopen Count",
    " Sys Mod Count",
    " Made SLA",
    " Caller ID",
    " Opened By",
    " Opened At",
    " Sys Created By",
    " Sys Created At",
    " Sys Updated By",
    " Sys Updated At",
    " Contact Type",
    " Location",
    " Category",
    " Subcategory",
    " U Symptom",
    " CMDB CI",
    " Impact",
    " Urgency",
    " Priority",
    " Assignment Group",
    " Assigned To",
    " Knowledge",
    " U Priority Confirmation",
    " Notify",
    " Problem ID",
    " RFC",
    " Vendor",
    " Caused By",
    " Closed Code",
    " Resolved By",
    " Resolved At",
]

docs = []
df = pd.read_csv("test.csv", sep=";")
for index, row in df.iterrows():
    to_metadata = {col: row[col] for col in columns_to_metadata if col in row}
    values_to_embed = {k: row[k] for k in columns_to_embed if k in row}
    to_embed = "\n".join(
        f"{k.strip()}: {v.strip()}" for k, v in values_to_embed.items()
    )
    newDoc = Document(page_content=to_embed, metadata=to_metadata)
    docs.append(newDoc)

In [40]:
docs

[Document(page_content='Number: INC0000045\nIncident Description: The incident was initially reported as a network connectivity issue by Caller 2403. After investigation and troubleshooting by the assigned team (Resolved by 149)\nNotes: it was determined that the issue stemmed from a misconfiguration in the router settings. The misconfiguration was corrected   restoring normal network connectivity.', metadata={'Number': 'INC0000045', ' Incident State': ' Closed', ' Active': ' false', ' Reassignment Count': 0, ' Reopen Count': 0, ' Sys Mod Count': 4, ' Made SLA': ' true', ' Caller ID': ' Caller 2403', ' Opened By': ' Opened by 8', ' Opened At': ' 29/2/2016 01:16', ' Sys Created By': ' Created by 6', ' Sys Created At': ' 29/2/2016 01:23', ' Sys Updated By': ' Updated by 908', ' Sys Updated At': ' 5/3/2016 12:00', ' Contact Type': ' Phone', ' Location': ' Location 143', ' Category': ' Category 55', ' Subcategory': ' Subcategory 170', ' U Symptom': ' Symptom 72', ' CMDB CI': ' ?', ' Impact

In [37]:
for i in columns_to_metadata:
    if i in df.columns.to_list():
        print(f"{i} is true")
    else:
        print(f"{i} is false")

Number is true
Incident State is false
Active is false
Reassignment Count is false
Reopen Count is false
Sys Mod Count is false
Made SLA is false
Caller ID is false
Opened By is false
Opened At is false
Sys Created By is false
Sys Created At is false
Sys Updated By is false
Sys Updated At is false
Contact Type is false
Location is false
Category is false
Subcategory is false
U Symptom is false
CMDB CI is false
Impact is false
Urgency is false
Priority is false
Assignment Group is false
Assigned To is false
Knowledge is false
U Priority Confirmation is false
Notify is false
Problem ID is false
RFC is false
Vendor is false
Caused By is false
Closed Code is false
Resolved By is false
Resolved At is false


In [38]:
df.columns.tolist()

['Number',
 ' Incident State',
 ' Active',
 ' Reassignment Count',
 ' Reopen Count',
 ' Sys Mod Count',
 ' Made SLA',
 ' Caller ID',
 ' Opened By',
 ' Opened At',
 ' Sys Created By',
 ' Sys Created At',
 ' Sys Updated By',
 ' Sys Updated At',
 ' Contact Type',
 ' Location',
 ' Category',
 ' Subcategory',
 ' U Symptom',
 ' CMDB CI',
 ' Impact',
 ' Urgency',
 ' Priority',
 ' Assignment Group',
 ' Assigned To',
 ' Knowledge',
 ' U Priority Confirmation',
 ' Notify',
 ' Problem ID',
 ' RFC',
 ' Vendor',
 ' Caused By',
 ' Closed Code',
 ' Resolved By',
 ' Resolved At',
 ' Incident Description',
 ' Notes']

In [9]:

!curl -X POST https://polite-papayas-bathe.loca.lt/v1/completions \
-H "Content-Type: application/json" \
-d '{"model": "facebook/opt-125m","prompt": "Abidjan is located in", "max_tokens": 50, "temperature": 0.8}'


{"object":"error","message":"[{'type': 'json_invalid', 'loc': ('body', 0), 'msg': 'JSON decode error', 'input': {}, 'ctx': {'error': 'Expecting value'}}]","type":"BadRequestError","param":null,"code":400}


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100   212  100   204  100     8    262     10 --:--:-- --:--:-- --:--:--   274
100   212  100   204  100     8    262     10 --:--:-- --:--:-- --:--:--   274
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:01 --:--:--     0curl: (6) Could not resolve host: facebook
curl: (3) URL rejected: Malformed input to a URL function
curl: (3) URL rejected: Port number was not a decimal number between 0 and 65535
curl: (3) URL rejected: Bad

In [28]:
from openai import OpenAI
from pprint import pprint

# Modify OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "EMPTY"
openai_api_base = "https://green-waves-glow.loca.lt/v1"
client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
)
completion = client.completions.create(model="gpt2",prompt="Tell me about sweden")
print("Completion result:", completion.choices[0].text)

Completion result: , all you pedophiles. Who don't appreciate my moral teachings? (cur


"\nEven Sweden lops submarine lights don't really register"