# The purpose of this workbook is to input the latest ethereum pricing data into the mistral LLM model.

# 1. Import and Install Required Packages

In [None]:
%pip install langchain
%pip install python-dotenv
%pip install mistralai
%pip install pandas
%pip install faiss-cpu
%pip install langchain-openai
%conda install -c pytorch faiss-gpu
%pip install langchain-mistralai


from langchain_core.messages import HumanMessage
from langchain.document_loaders.csv_loader import CSVLoader
from langchain_community.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain_openai import OpenAIEmbeddings
from langchain.chains import LLMChain
from mistralai.client import MistralClient  # Have to use mistral directly as langchain library is broken
from langchain_core.messages import HumanMessage
from langchain_mistralai.chat_models import ChatMistralAI
from dotenv import load_dotenv

import os

## Prepare the data
Prepare our data

In [None]:
import pandas as pd

df = pd.read_csv("ethereum-prices.csv")



## 2. Read in Mistral API Key and OpenAPI Key
Bit of hack to use both, only need openai for embeddings which will aim to move away from

In [None]:
load_dotenv()
mistral_api_key = os.getenv('MISTRAL_API_KEY')
open_api_key = os.getenv('OPEN_API_KEY')

## 3. Vectorise the ethereum data

We load in the etherreum data, then convert it to embeddings and store in a faiss vector store.

In [None]:
loader = CSVLoader(file_path='ethereum-prices.csv')
documents = loader.load()

# Test to show documents are loaded correctly
print(documents[0])

embeddings = OpenAIEmbeddings(openai_api_key=open_api_key)
vector_store = FAISS.from_documents(documents=documents, embedding=embeddings)


## Define similarity search function

In [34]:
def retrieve_info(query):
    similar_response = vector_store.similarity_search(query=query, k=3)
    page_contents_array = [doc.page_content for doc in similar_response]
    print(page_contents_array)
    return page_contents_array

## Test out our similarity search

In [35]:
message = """
Jesse, we have to trade. What was the price of ethereum on the 5th of January?
"""
results = retrieve_info(message)

print(results)

['timestamp: 2024-01-08T00:00:00Z\nopen_price: 5.1937804114183994e-08\nhigh_price: 5.1937804114183994e-08\nlow_price: 5.1937804114183994e-08\nclose_price: 5.1937804114183994e-08\naverage_price: 5.1937804114183994e-08\nvolume: 544173617.8901023', 'timestamp: 2024-01-05T00:00:00Z\nopen_price: 5.0886391014356565e-08\nhigh_price: 5.0886391014356565e-08\nlow_price: 5.0886391014356565e-08\nclose_price: 5.0886391014356565e-08\naverage_price: 5.0886391014356565e-08\nvolume: 321986247.33510756', 'timestamp: 2024-01-08T00:00:00Z\nopen_price: 6.326884246348339e-05\nhigh_price: 6.326884246348339e-05\nlow_price: 6.326884246348339e-05\nclose_price: 6.326884246348339e-05\naverage_price: 6.326884246348339e-05\nvolume: 3500000.0']
['timestamp: 2024-01-08T00:00:00Z\nopen_price: 5.1937804114183994e-08\nhigh_price: 5.1937804114183994e-08\nlow_price: 5.1937804114183994e-08\nclose_price: 5.1937804114183994e-08\naverage_price: 5.1937804114183994e-08\nvolume: 544173617.8901023', 'timestamp: 2024-01-05T00:00:0

## Now lets tune mistral to use our new data embeddings

In [37]:
llm = ChatMistralAI(mistral_api_key=mistral_api_key, model='mistral-tiny', temperature=0)


template = """
You are a blockchain expert, specializing in Ethereum. You are talking to a friend who is new to the space. They ask you a question about Ethereum. You answer their question. The conversation continues.
Responses should be concise and to the point. Do not use jargon or technical terms. Do not use slang or colloquialisms. Do not use contractions. Do not use emojis.

Below is a message you might receive:
{message}

Here is a list of relevant pricing data:
{ethereum_prices}

Please write me some nice responses
"""

prompt = PromptTemplate(
    input_variables={"message", "ethereum_prices"},
    template=template
)

chain = LLMChain(llm=llm, prompt=prompt)

## Create function to generate response

In [38]:
def generate_response(message):
    ethereum_prices = retrieve_info(message)
    response      = chain.run(message=message,  ethereum_prices=ethereum_prices)
    return response

## Now test if our response generates actual correct data. 
This may actually only train on the format of data.

In [39]:
message = """
Jesse, we have to trade. What was the price of ethereum on the 5th of January?
"""

generate_response(message)

['timestamp: 2024-01-08T00:00:00Z\nopen_price: 5.1937804114183994e-08\nhigh_price: 5.1937804114183994e-08\nlow_price: 5.1937804114183994e-08\nclose_price: 5.1937804114183994e-08\naverage_price: 5.1937804114183994e-08\nvolume: 544173617.8901023', 'timestamp: 2024-01-05T00:00:00Z\nopen_price: 5.0886391014356565e-08\nhigh_price: 5.0886391014356565e-08\nlow_price: 5.0886391014356565e-08\nclose_price: 5.0886391014356565e-08\naverage_price: 5.0886391014356565e-08\nvolume: 321986247.33510756', 'timestamp: 2024-01-08T00:00:00Z\nopen_price: 6.326884246348339e-05\nhigh_price: 6.326884246348339e-05\nlow_price: 6.326884246348339e-05\nclose_price: 6.326884246348339e-05\naverage_price: 6.326884246348339e-05\nvolume: 3500000.0']


"Response 1:\nThe price of Ethereum on the 5th of January was 5.0886391014356565e-08, according to the provided data.\n\nResponse 2:\nBased on the given information, Ethereum was priced at 5.0886391014356565e-08 on January 5th.\n\nResponse 3:\nAccording to the data, Ethereum's price on January 5th was 5.0886391014356565e-08."