# The purpose of this workbook is to input the latest ethereum pricing data into the mistral LLM model.

# 1. Import and Install Required Packages

In [118]:
%pip install langchain
%pip install python-dotenv
%pip install mistralai
%pip install pandas
%pip install faiss-cpu
%pip install langchain-openai
%conda install -c pytorch faiss-gpu
%pip install langchain-mistralai


from langchain_core.messages import HumanMessage
from langchain.document_loaders.csv_loader import CSVLoader
from langchain_community.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import DataFrameLoader
from langchain_openai import OpenAIEmbeddings
from langchain.chains import LLMChain
from mistralai.client import MistralClient  # Have to use mistral directly as langchain library is broken
from langchain_core.messages import HumanMessage
from langchain_mistralai.chat_models import ChatMistralAI
from dotenv import load_dotenv

import os

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Collecting package metadata (current_repodata.json): done
Solving environment: unsuccessful initial attempt using frozen solve. Retrying with flexible solve.
Collecting package metadata (repodata.json): done
Solving environment: unsuccessful initial attempt using frozen solve. Retrying with flexible solve.

PackagesNotFoundError: The following packages are not available from current channels:

  - faiss-gpu

Current channels:

  - https://conda.anaconda.org/pytorch/osx-arm64
  - https://conda.anaconda.org/pytorch/noarch
  - https://repo.anaconda.com/pkgs/main/osx-arm64
  - https://repo.anaconda.co

## Prepare the data
Step 1 
Remove any values where price is between 1 and 0.

Step 2
Combine values from each row into one document column so we can feed it into the LLM

Step 3 
Add column coin type 

In [119]:
import pandas as pd

df = pd.read_csv("ethereum-prices.csv") 

clean_df = df[df['open_price'] >= 1]
clean_df['coin_type'] = 'ethereum'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_df['coin_type'] = 'ethereum'


## 2. Read in Mistral API Key and OpenAPI Key
Bit of hack to use both, only need openai for embeddings which will aim to move away from

In [120]:
load_dotenv()
mistral_api_key = os.getenv('MISTRAL_API_KEY')
open_api_key = os.getenv('OPEN_API_KEY')

## 3. Vectorise the ethereum data

We convert it to embeddings and store in a faiss vector store.

In [121]:

loader = DataFrameLoader(clean_df, page_content_column="timestamp")
documents = loader.load()
print(documents[:1])

embeddings = OpenAIEmbeddings(openai_api_key=open_api_key)
vector_store = FAISS.from_documents(documents=documents, embedding=embeddings)

[Document(page_content='2024-01-17T00:00:00Z', metadata={'open_price': 2589.29, 'high_price': 2591.73, 'low_price': 2558.43, 'close_price': 2560.65, 'average_price': 2574.6510185185184, 'volume': 4014.371835845924, 'coin_type': 'ethereum'}), Document(page_content='2024-01-16T00:00:00Z', metadata={'open_price': 2508.98, 'high_price': 2613.76, 'low_price': 2501.0, 'close_price': 2587.7, 'average_price': 2546.108397626113, 'volume': 13752.889683320847, 'coin_type': 'ethereum'}), Document(page_content='2024-01-15T00:00:00Z', metadata={'open_price': 2473.59, 'high_price': 2550.3, 'low_price': 2473.59, 'close_price': 2509.17, 'average_price': 2520.5667963386727, 'volume': 13620.516250583076, 'coin_type': 'ethereum'}), Document(page_content='2024-01-14T00:00:00Z', metadata={'open_price': 2578.2, 'high_price': 2578.2, 'low_price': 2471.47, 'close_price': 2473.85, 'average_price': 2534.040058479532, 'volume': 9636.707505916576, 'coin_type': 'ethereum'}), Document(page_content='2024-01-13T00:00:

## Define similarity search function

This similarity search returns the top 3 results from our vector store. It relates what the query from the end user is asking to what is in the vector store. 

- similar_response performs the similarity search inside our vector store. 


In [None]:
def retrieve_info(query):
    similar_response = vector_store.similarity_search(query=query, k=3)
    print(similar_response)
    return similar_response

## Now lets tune mistral to use our new data embeddings

In [None]:
llm = ChatMistralAI(mistral_api_key=mistral_api_key, model='mistral-tiny', temperature=0)


template = """
You are a blockchain expert, specializing in Ethereum and telling prices. 

Below is a message you might receive:
{message}

Here is a list of relevant pricing data:
{ethereum_prices}

Please return pricing data
"""

prompt = PromptTemplate(
    input_variables={"message", "ethereum_prices"},
    template=template
)

chain = LLMChain(llm=llm, prompt=prompt)

## Create function to generate response

In [None]:
def generate_response(message):
    ethereum_prices = retrieve_info(message)
    response      = chain.run(message=message,  ethereum_prices=ethereum_prices)
    return response

## Now test if our response generates actual correct data. 
This may actually only train on the format of data.

In [117]:
message = """
What was the price of ethereum on 2023-12-21T00:00:00?
"""

generate_response(message)

[Document(page_content='2023-12-21T00:00:00Z', metadata={'open_price': 2196.43, 'high_price': 2278.19, 'low_price': 2185.11, 'close_price': 2241.29, 'average_price': 2225.0103348214284, 'volume': 13972.04538901768, 'coin_type': 'ethereum'}), Document(page_content='2023-12-31T00:00:00Z', metadata={'open_price': 2292.91, 'high_price': 2317.57, 'low_price': 2265.67, 'close_price': 2280.69, 'average_price': 2295.204309623431, 'volume': 6987.294741855056, 'coin_type': 'ethereum'}), Document(page_content='2023-12-31T00:00:00Z', metadata={'open_price': 2217.123285260862, 'high_price': 2217.123285260862, 'low_price': 2217.123285260862, 'close_price': 2217.123285260862, 'average_price': 2217.123285260862, 'volume': 0.23, 'coin_type': 'ethereum'})]


'Based on the provided data, there is no document with the exact timestamp of 2023-12-21T00:00:00. Therefore, I cannot provide the exact price of Ethereum on that date and time. The closest available data is from the 31st of December, 2023 with a close price of 2280.69.'