In [None]:
import getpass
import os

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass()


# commands run:

## for langchain & langflow environment
1. pip install langchain
2. pip3 install torch torchvision
3. pip install langchain langsmith langchain-community langgraph langchain-cli langchainhub langchain-openai langchain-chroma bs4

## langflow start

python3 -m langflow run


## remote login

ssh -C -p 222 -L 7860:127.0.0.1:7860 cmejo@rstudio-tr.braverock.com # langflow http://localhost:7860/ 
ssh -C -p 222 -L 11434:127.0.0.1:11434 cmejo@rstudio-tr.braverock.com # for ollama http://localhost:11434/ 


In [None]:
# load and preprocess custom dataset

import json
import os
from transformers import AutoTokenizer, AutoModel
import numpy as np
import faiss
import torch

# Load a pre-trained LLaMA model and tokenizer
model_name = "meta-llama/llama3"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Load custom dataset
def load_dataset(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)
    return data

dataset_path = "path/to/your/dataset.json"
dataset = load_dataset(dataset_path)

# Generate embeddings for the dataset
def generate_embeddings(text, tokenizer, model):
    inputs = tokenizer(text, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()

embeddings_list = [generate_embeddings(doc["text"], tokenizer, model) for doc in dataset]
embeddings_array = np.array(embeddings_list)

# Create a FAISS index
dimension = embeddings_array.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings_array)

# Save the index
faiss.write_index(index, "faiss_index.bin")


In [None]:
# langfllow configuration 

from langchain.chains import SimpleChain
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.prompts import Prompt
from langchain.vectorstores import FAISS

# Load the FAISS index
index = faiss.read_index("faiss_index.bin")

# Define a prompt template
prompt_template = """
You are an AI assistant specialized in answering questions based on the provided dataset. Given the following document text, provide a brief and accurate answer to the question:
Document: {document}
Question: {question}
Answer:
"""

# Define the function to find the most relevant document
def find_relevant_document(question, index, dataset, embeddings):
    query_embedding = embeddings.generate_embeddings(question).reshape(1, -1)
    D, I = index.search(query_embedding, k=1)
    document = dataset[I[0][0]]["text"]
    return document

# Define the chatbot function
def rag_chatbot(question, index, dataset, embeddings):
    document = find_relevant_document(question, index, dataset, embeddings)
    prompt = Prompt(prompt_template.format(document=document, question=question))
    answer = prompt.run()
    return answer

# Example usage
question = "What is quantum entanglement?"
answer = rag_chatbot(question, index, dataset, HuggingFaceEmbeddings(model, tokenizer))
print("Answer:", answer)


In [None]:
# Ollama Configuration File

api_version: v1
name: physics-rag-chatbot
description: A RAG chatbot for answering physics questions using LLaMA3 and LangFlow
models:
  - name: llama3
    version: latest
endpoints:
  - path: /ask
    method: POST
    handler: rag_chatbot

In [None]:
# deploy ollama

ollama deploy


In [None]:
# create knowledge graph using 'rdflib'

from rdflib import Graph, Literal, RDF, URIRef, Namespace
from rdflib.namespace import FOAF, DC

# Create an RDF graph
g = Graph()
n = Namespace("http://example.org/")

# Add data to the graph
for doc in dataset:
    doc_uri = URIRef(f"http://example.org/document/{doc['id']}")
    g.add((doc_uri, DC.title, Literal(doc["title"])))
    g.add((doc_uri, DC.description, Literal(doc["text"])))
    for author in doc.get("authors", []):
        g.add((doc_uri, DC.creator, Literal(author)))

# Save the graph
g.serialize(destination="knowledge_graph.rdf", format="xml")


In [None]:
# Data Visualization in R

install.packages("ggplot2")
install.packages("jsonlite")


In [None]:
# Load and Visualize Data:

library(ggplot2)
library(jsonlite)

# Load dataset
dataset <- fromJSON("path/to/your/dataset.json")

# Convert to data frame
df <- data.frame(
  Title = sapply(dataset, function(x) x$title),
  Date = as.Date(sapply(dataset, function(x) x$date))
)

# Plot the number of documents over time
ggplot(df, aes(x = Date)) +
  geom_histogram(binwidth = 30) +
  labs(title = "Number of Documents Over Time",
       x = "Date",
       y = "Number of Documents")
