In [1]:
# Load environment variables
# python -m ipykernel install --user --name=webnearme-venv
from dotenv import load_dotenv,find_dotenv
load_dotenv(find_dotenv())



True

In [2]:
# Run basic query with OpenAI wrapper

from langchain.llms import OpenAI
llm = OpenAI(model_name="text-davinci-003")
llm("explain large language models in one sentence")

'\n\nLarge language models are a type of artificial intelligence (AI) that can generate human-like text by training on large amounts of text data.'

In [3]:
# import schema for chat messages and ChatOpenAI in order to query chatmodels GPT-3.5-turbo or GPT-4

from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)
from langchain.chat_models import ChatOpenAI

In [4]:
chat = ChatOpenAI(model_name="gpt-3.5-turbo",temperature=0.3)
messages = [
    SystemMessage(content="You are an expert data scientist"),
    HumanMessage(content="Write a Python script that trains a neural network on simulated data ")
]
response=chat(messages)

print(response.content,end='\n')

Sure! Here's an example script that trains a neural network on simulated data using the Keras library:

```python
import numpy as np
from keras.models import Sequential
from keras.layers import Dense

# Generate simulated data
np.random.seed(0)
X = np.random.rand(100, 2)
y = np.random.randint(0, 2, size=(100,))

# Define the neural network model
model = Sequential()
model.add(Dense(10, input_dim=2, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=10, batch_size=10)

# Evaluate the model
loss, accuracy = model.evaluate(X, y)
print(f"Loss: {loss}, Accuracy: {accuracy}")
```

In this script, we first generate simulated data using `numpy.random.rand()` and `numpy.random.randint()`. We then define a simple neural network model using the `Sequential` class from Keras. The model consists of two dense layers with 10 and 1 units respective

In [5]:
# Import prompt and define PromptTemplate

from langchain import PromptTemplate

template = """
You are an expert data scientist with an expertise in building deep learning models. 
Explain the concept of {concept} in a couple of lines
"""

prompt = PromptTemplate(
    input_variables=["concept"],
    template=template,
)

In [6]:
# Run LLM with PromptTemplate

llm(prompt.format(concept="vector"))

'\nVector is a mathematical object that has magnitude and direction. It is used to represent physical quantities such as force, velocity, and acceleration, as well as abstract quantities such as temperature, pressure, and electric current. In deep learning, vectors are used to represent data points, which can be used as inputs to a model or as outputs from a model.'

In [7]:
# Import LLMChain and define chain with language model and prompt as arguments.

from langchain.chains import LLMChain
chain = LLMChain(llm=llm, prompt=prompt)

# Run the chain only specifying the input variable.
print(chain.run("autoencoder"))


An autoencoder is a neural network architecture used for unsupervised learning, where the network learns to represent (or encode) an input in a compressed form and then reconstruct it (or decode it) to match the original input. It is typically used for dimensionality reduction and feature extraction.


In [8]:
# Define a second prompt 

second_prompt = PromptTemplate(
    input_variables=["ml_concept"],
    template="Turn the concept description of {ml_concept} and explain it to me like I'm five in 500 words",
)
chain_two = LLMChain(llm=llm, prompt=second_prompt)

In [9]:
# Define a sequential chain using the two chains above: the second chain takes the output of the first chain as input

from langchain.chains import SimpleSequentialChain
overall_chain = SimpleSequentialChain(chains=[chain, chain_two], verbose=True)

# Run the chain specifying only the input variable for the first chain.
explanation = overall_chain.run("autoencoder")
print(explanation)



[1m> Entering new SimpleSequentialChain chain...[0m
[36;1m[1;3m
An autoencoder is a type of artificial neural network that learns to represent input data as a compressed representation, or "embedding". It is used to learn efficient data encodings in an unsupervised manner by reconstructing the input data from the learned embedding.[0m
[33;1m[1;3m

An autoencoder is a computer program that can learn to make data more compact and easier to understand. It's like a special kind of puzzle.

Imagine you have a big jigsaw puzzle with lots of pieces. To make it easier to put together, you could sort the pieces into piles of different shapes. That would make it easier to find the pieces that go together. 

An autoencoder works in a similar way. It looks at a set of data and finds ways to sort it into different piles. It tries to make the piles as small as possible, so that it can fit more data into each pile. This way, the autoencoder can make the data more compact and easier to unders

In [24]:
# Import utility for splitting up texts and split up the explanation given above into document chunks

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 100,
    chunk_overlap  = 0,
)

texts = text_splitter.create_documents([explanation])
len(texts)

14

In [11]:
# Individual text chunks can be accessed with "page_content"

texts[0].page_content

'An autoencoder is a computer program that can learn to make data more compact and easier to'

In [23]:
# Import and instantiate OpenAI embeddings

from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model_name="ada")
print(type(embeddings))

<class 'langchain.embeddings.openai.OpenAIEmbeddings'>


In [13]:
# Turn the first text chunk into a vector with the embedding

query_result = embeddings.embed_query(texts[0].page_content)
print(query_result[0:3])

[-0.028041064536572253, 0.03055861942736854, 0.014163807863452081]


In [26]:
# Import and initialize Pinecone client

import os
import pinecone
from langchain.vectorstores import Pinecone


pinecone.init(
    api_key=os.getenv('PINECONE_API_KEY'),  
    environment=os.getenv('PINECONE_ENV')  
)

In [36]:
# # Upload vectors to Pinecone
# # import pinecone
# import pinecone      
# # pinecone.list_indexes()

# pinecone.create_index('websitenearme-fast-api', dimension=1024)
# index_name = pinecone.index.Index('websitenearme-fast-api')
# # print(index_name)

# # index_name = "websitenearme-fast-api"
# search = pinecone.from_existing_index(texts=texts, embeddings=embeddings, index_name=index_name)
print(texts[0])

page_content='An autoencoder is a computer program that can learn to make data more compact and easier to' metadata={'text': 'An autoencoder is a computer program that can learn to make data more compact and easier to'}


In [38]:
import requests
from requests.packages.urllib3.util.ssl_ import create_urllib3_context

CIPHERS = (
    'ECDHE+AESGCM:ECDHE+CHACHA20:DHE+AESGCM:DHE+CHACHA20:ECDH+AESGCM:ECDH+CHACHA20:DH+AESGCM:DH+CHACHA20:'
    'ECDHE+AES:!aNULL:!eNULL:!EXPORT:!DES:!MD5:!PSK:!RC4:!HMAC_SHA1:!SHA1:!DHE+AES:!ECDH+AES:!DH+AES'
)

requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS = CIPHERS
# Skip the following two lines if they cause errors
# requests.packages.urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST = CIPHERS
# requests.packages.urllib3.contrib.pyopenssl.inject_into_urllib3()
requests.packages.urllib3.util.ssl_.create_default_context = create_urllib3_context


In [39]:
# Initialize Pinecone
index_name = "websitenearme-fast-api"
DIMENSIONS=128

# Create and configure index if doesn't already exist
if index_name not in pinecone.list_indexes():
    pinecone.create_index(
        name=index_name, 
        metric="cosine",
        dimension=DIMENSIONS)
    docsearch = Pinecone.from_documents(texts[0], embeddings, index_name=index_name)

else:
    docsearch = Pinecone.from_existing_index(index_name, embeddings)

print(docsearch)

<langchain.vectorstores.pinecone.Pinecone object at 0x12a0798d0>


In [40]:
import pinecone      
pinecone.list_indexes()

['websitenearme-fast-api']

In [41]:
pinecone.describe_index(index_name)

IndexDescription(name='websitenearme-fast-api', metric='cosine', replicas=1, dimension=128.0, shards=1, pods=1, pod_type='p1', status={'ready': True, 'state': 'Ready'}, metadata_config=None, source_collection='')

## we created the index up to this point
## next we will insert the data into pinecone

In [32]:
# Do a simple vector similarity search

query = "What is magical about an autoencoder?"
result = docsearch.similarity_search(query)

print(result)

[]


In [None]:
# Import Python REPL tool and instantiate Python agent

from langchain.agents.agent_toolkits import create_python_agent
from langchain.tools.python.tool import PythonREPLTool
from langchain.python import PythonREPL
from langchain.llms.openai import OpenAI

agent_executor = create_python_agent(
    llm=OpenAI(temperature=0, max_tokens=1000),
    tool=PythonREPLTool(),
    verbose=True
)

In [None]:
# Execute the Python agent

agent_executor.run("Find the roots (zeros) if the quadratic function 3 * x**2 + 2*x -1")