In [1]:
from dotenv import load_dotenv,find_dotenv
from tqdm.autonotebook import tqdm
import pinecone
from langchain.llms import OpenAI

  from tqdm.autonotebook import tqdm


In [2]:
load_dotenv(find_dotenv())

True

In [3]:
llm=OpenAI(model_name="text-davinci-003")

In [4]:
llm("explain large language models in one sentence")

'\n\nLarge language models are neural networks that are trained on large amounts of text data to generate human-like language.'

In [5]:
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)
from langchain.chat_models import ChatOpenAI

In [6]:
chat = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.3)
messages = [
    SystemMessage(content="You are an expert data scientist"),
    HumanMessage(content="Write a python program that trains a neural network on simulated data")
]
response = chat(messages)

In [7]:
print(response.content,end="/n")

Sure! Here's an example of a Python program that trains a neural network on simulated data using the Keras library:

```python
import numpy as np
from keras.models import Sequential
from keras.layers import Dense

# Generate simulated data
np.random.seed(0)
X = np.random.rand(1000, 2)
y = np.random.randint(2, size=(1000, 1))

# Define the neural network architecture
model = Sequential()
model.add(Dense(10, input_dim=2, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=10, batch_size=32)

# Evaluate the model
loss, accuracy = model.evaluate(X, y)
print(f'Loss: {loss:.4f}')
print(f'Accuracy: {accuracy:.4f}')
```

In this example, we first generate simulated data using `numpy.random.rand` and `numpy.random.randint`. We create a neural network model using the `Sequential` class from Keras and add two fully connected layers using the `D

In [8]:
from langchain import PromptTemplate

template = """
You are an expert data scientist with an expertise in building deep learning models.
Explain the concept of {concept} in a couple of lines
"""

prompt = PromptTemplate(
    input_variables=["concept"],
    template=template
    )

In [9]:
prompt

PromptTemplate(input_variables=['concept'], template='\nYou are an expert data scientist with an expertise in building deep learning models.\nExplain the concept of {concept} in a couple of lines\n')

In [10]:
llm(prompt.format(concept="regularization"))

'\nRegularization is a technique used to avoid overfitting, which is when a model performs well on training data but poorly on unseen data. It is done by adding a penalty to the cost function of a model, such as the weights of the model, to reduce the complexity of the model and to prevent overfitting.'

In [11]:
from langchain import LLMChain

In [12]:
chain = LLMChain(llm=llm, prompt=prompt)
print(chain("autoencoder"))

{'concept': 'autoencoder', 'text': "\nAn autoencoder is an unsupervised learning algorithm that takes an input, compresses it into a lower dimensional space (the 'encoding'), and then reconstructs the original input from the encoding. It is used for data compression, feature extraction, and dimensionality reduction."}


In [13]:
second_prompt=PromptTemplate(
    input_variables=['ml_concept'],
    template="Turn the concept description of {ml_concept} and explain it to me like I'm five in 500 words",
)
chain_two = LLMChain (llm=llm, prompt=second_prompt)


In [20]:
from langchain.chains import SimpleSequentialChain

overall_chain = SimpleSequentialChain(chains=[chain,chain_two], verbose=True)

explanation = overall_chain.run("autoencoder")
print (explanation)



[1m> Entering new SimpleSequentialChain chain...[0m
[36;1m[1;3m
Autoencoders are a type of neural network that can be used to learn efficient data representations in an unsupervised manner. They are composed of an encoder that maps the input data into a low-dimensional representation, and a decoder that reconstructs the input data from the low-dimensional representation.[0m
[33;1m[1;3m

Autoencoders are a type of fun tool that computers use to learn about different kinds of data. It's like when you have a bunch of toys that you can't play with, so you have to figure out what they are and how you can use them. 

Autoencoders can help computers learn about data without being given any instructions. They use two different parts that work together. The first part is called an encoder. It takes the raw data and turns it into something smaller and simpler. It's like taking a big, complicated toy and breaking it down into smaller pieces. 

The second part is called a decoder. It take

In [15]:
explanation = overall_chain.run("HNSW algorithm")
print (explanation)



[1m> Entering new SimpleSequentialChain chain...[0m
[36;1m[1;3m
HNSW (Hierarchical Navigable Small World) is an algorithm used for efficient approximate nearest neighbor search in high-dimensional data. It works by constructing a hierarchical graph of nodes connected by edges, which represent the similarity between pairs of points in the data set. The algorithm is fast and scalable, making it suitable for large-scale applications.[0m
[33;1m[1;3m

HNSW is an algorithm that helps you find things that are close to each other. Imagine you have a bunch of apples in a box. You want to find the apple that is the closest to you. You can reach in and grab one, but that might not be the closest. HNSW helps you find the closest apple in the box.

HNSW works by creating a graph of all the apples in the box. This graph is like a map of all the apples. Each apple is connected to other apples by edges. The edges represent how similar the apples are. So if two apples are very similar, they wi

In [21]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [22]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 100,
    chunk_overlap = 0
)
texts = text_splitter.create_documents([explanation])

In [23]:
texts

[Document(page_content="Autoencoders are a type of fun tool that computers use to learn about different kinds of data. It's"),
 Document(page_content="like when you have a bunch of toys that you can't play with, so you have to figure out what they"),
 Document(page_content='are and how you can use them.'),
 Document(page_content='Autoencoders can help computers learn about data without being given any instructions. They use two'),
 Document(page_content='different parts that work together. The first part is called an encoder. It takes the raw data and'),
 Document(page_content="turns it into something smaller and simpler. It's like taking a big, complicated toy and breaking"),
 Document(page_content='it down into smaller pieces.'),
 Document(page_content='The second part is called a decoder. It takes the smaller, simpler data from the encoder and turns'),
 Document(page_content="it back into something bigger and more complicated. It's like taking the pieces of the toy and"),
 Document(

In [24]:
texts[0].page_content

"Autoencoders are a type of fun tool that computers use to learn about different kinds of data. It's"

In [25]:
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(deployment="text-embedding-ada-002")

In [26]:
embeddings

OpenAIEmbeddings(client=<class 'openai.api_resources.embedding.Embedding'>, model='text-embedding-ada-002', deployment='text-embedding-ada-002', openai_api_version='', openai_api_base='', openai_api_type='', openai_proxy='', embedding_ctx_length=8191, openai_api_key='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', openai_organization='', allowed_special=set(), disallowed_special='all', chunk_size=1000, max_retries=6, request_timeout=None, headers=None, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False)

In [27]:
from langchain.vectorstores import Pinecone
import os
import pinecone

In [28]:
pinecone.init(
    api_key=os.getenv('PINECONE_API_KEY'),
    environment = os.getenv('PINECONE_ENV')
)

In [29]:
index_name = "langchain-tutorial"
search = Pinecone.from_documents(texts, embeddings, index_name=index_name)

In [41]:
import langchain
langchain.__version__

'0.0.324'

In [35]:
query = "what is fun about an autoencoder?"
search_results =  search.similarity_search(query)

In [36]:
print(search_results)

[Document(page_content="Autoencoders are a type of fun tool that computers use to learn about different kinds of data. It's"), Document(page_content='Autoencoders can help computers learn about data without being given any instructions. They use two'), Document(page_content='So, autoencoders help computers learn about data without being told what to do. They can take'), Document(page_content='complicated. This helps them learn better and faster.')]
