In [13]:
import os 
import warnings 
from dotenv import load_dotenv 

load_dotenv() 

PINECONE_API_KEY = os.getenv("PINECONE_API_KEY") 
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

warnings.filterwarnings('ignore')

In [14]:

from langchain_openai.chat_models import ChatOpenAI
llm = ChatOpenAI(model_name="gpt-3.5-turbo", api_key=OPENAI_API_KEY)
llm("Explain large language models in one sentence") 

AIMessage(content='Large language models are advanced artificial intelligence systems that are capable of understanding and generating human-like text based on vast amounts of training data.', response_metadata={'token_usage': {'completion_tokens': 26, 'prompt_tokens': 15, 'total_tokens': 41}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-6d30ef1b-72ed-4e1a-acdb-a17abaf8fbdd-0', usage_metadata={'input_tokens': 15, 'output_tokens': 26, 'total_tokens': 41})

In [15]:
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
) 

chat = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.3) 
messages = [
    SystemMessage(content="You are an expert data scientist"), 
    HumanMessage(content="Write a python script that trains a neural network on simulated data.") 
]
response = chat(messages) 

In [16]:
print(response.content, end='\n')

Here is a simple example of a Python script that trains a neural network on simulated data using the TensorFlow library:

```python
import numpy as np
import tensorflow as tf

# Generate simulated data
X = np.random.rand(1000, 10)
y = np.random.randint(0, 2, 1000)

# Define the neural network architecture
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(10,)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=10, batch_size=32)

# Evaluate the model
loss, accuracy = model.evaluate(X, y)
print(f'Loss: {loss}, Accuracy: {accuracy}')
```

In this script:
1. We generate simulated data `X` and `y`.
2. We define a simple neural network model with 2 hidden layers and an output layer.
3. We compile the model with Adam opt

In [17]:
# Prompt Templates
from langchain import PromptTemplate 

template = """ 
You are an expert data scientist with an expetise in building deep learning models. 
Explain the concept {concept} in a couple of lines.
"""

prompt = PromptTemplate(
    input_variables=["concept"], 
    template=template
) 
prompt 

PromptTemplate(input_variables=['concept'], template=' \nYou are an expert data scientist with an expetise in building deep learning models. \nExplain the concept {concept} in a couple of lines.\n')

In [18]:
llm(prompt.format(concept="regularization"))

AIMessage(content='Regularization is a technique used in machine learning to prevent overfitting by adding a penalty term to the loss function. This penalty term discourages overly complex models, leading to better generalization and improved performance on unseen data.', response_metadata={'token_usage': {'completion_tokens': 45, 'prompt_tokens': 37, 'total_tokens': 82}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-345beb34-b9af-4e1f-8c51-6495243fbdde-0', usage_metadata={'input_tokens': 37, 'output_tokens': 45, 'total_tokens': 82})

In [19]:
llm(prompt.format(concept="bagging"))

AIMessage(content='Bagging, or Bootstrap Aggregating, is a machine learning technique that involves training multiple models on different subsets of the training data and then combining their predictions to generate a final output. This helps to reduce overfitting and improve the overall accuracy and stability of the model.', response_metadata={'token_usage': {'completion_tokens': 55, 'prompt_tokens': 38, 'total_tokens': 93}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-63055b9f-c4d3-4484-880e-68b057b91ca9-0', usage_metadata={'input_tokens': 38, 'output_tokens': 55, 'total_tokens': 93})

In [20]:
# Chains
from langchain.chains import LLMChain 

chain = LLMChain(llm=llm, prompt=prompt) 

# Run the chain only specifying the input variable 
print(chain.run("autoencoder"))

An autoencoder is a type of artificial neural network used for unsupervised learning that aims to encode input data into a more compressed representation and then decode it back to the original input with minimal loss of information. It is commonly used for tasks such as image and text data compression, anomaly detection, and feature extraction.


In [23]:
second_prompt = PromptTemplate(
    input_variables=['ml_concept'], 
    template="Turn the concept description of {ml_concept} and explain it to me like I'm five in 500 words."
)
chain_two = LLMChain(llm=llm, prompt=second_prompt) 
chain_two

LLMChain(prompt=PromptTemplate(input_variables=['ml_concept'], template="Turn the concept description of {ml_concept} and explain it to me like I'm five in 500 words."), llm=ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x000001D343122090>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x000001D343145FD0>, openai_api_key=SecretStr('**********'), openai_proxy=''))

In [24]:
# Sequential Chains
from langchain.chains import SimpleSequentialChain 
overall_chain = SimpleSequentialChain(chains=[chain, chain_two], verbose=True) 

explanation = overall_chain.run("autoencoder") 
print(explanation)



[1m> Entering new SimpleSequentialChain chain...[0m
[36;1m[1;3mAn autoencoder is a type of artificial neural network used for unsupervised learning that compresses input data into a lower-dimensional representation and then reconstructs the original data from this compressed representation. It is commonly used for tasks such as data denoising, dimensionality reduction, and anomaly detection.[0m
[33;1m[1;3mAn autoencoder is like a really smart robot that can take a picture of a cat, squish it down into a tiny picture, and then make a new picture of the cat that looks almost exactly the same as the original one. It's like a magic trick where the robot can make things smaller and then bring them back to life!

So, imagine you have a huge puzzle with lots and lots of pieces. The autoencoder is like a special machine that can take all those puzzle pieces and put them together in a simpler way. It's like turning a big, messy puzzle into a smaller, neater puzzle that still shows the 

In [25]:
# Text Splitting
from langchain.text_splitter import RecursiveCharacterTextSplitter 

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100, 
    chunk_overlap=0
)

texts = text_splitter.create_documents([explanation])
texts

[Document(page_content='An autoencoder is like a really smart robot that can take a picture of a cat, squish it down into a'),
 Document(page_content='tiny picture, and then make a new picture of the cat that looks almost exactly the same as the'),
 Document(page_content="original one. It's like a magic trick where the robot can make things smaller and then bring them"),
 Document(page_content='back to life!'),
 Document(page_content='So, imagine you have a huge puzzle with lots and lots of pieces. The autoencoder is like a special'),
 Document(page_content="machine that can take all those puzzle pieces and put them together in a simpler way. It's like"),
 Document(page_content='turning a big, messy puzzle into a smaller, neater puzzle that still shows the same picture. This'),
 Document(page_content='makes it easier for the robot to understand and work with the puzzle.'),
 Document(page_content="But the cool thing is, the autoencoder doesn't just make things smaller, it can also help 

In [26]:
# Extract plain text 
texts[0].page_content

'An autoencoder is like a really smart robot that can take a picture of a cat, squish it down into a'

In [40]:
# Turn into embedding 
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings() 


In [41]:
# Vector Representation (Store into Pinecone)
query_result = embeddings.embed_query(texts[0].page_content) 
query_result

[-0.03867767949835719,
 0.004066864237415921,
 -0.010681889568446257,
 -0.010579962826678441,
 -0.0007249517156492957,
 0.0038833967075935574,
 -0.0023273200526148026,
 -0.030577927535437237,
 0.0038969870329615287,
 -0.03495396853176065,
 0.007957056573354776,
 0.05800292938782912,
 -0.005137091766348808,
 -0.011898211814516226,
 0.017694427609122616,
 -0.004369246066470081,
 -0.004678422825377571,
 0.018074952994135314,
 -0.007365882774953115,
 -0.019569872952127546,
 -0.008344376888220805,
 0.0034043425114010195,
 0.0007325961863572835,
 -0.03767200752832142,
 -0.002488703594752533,
 -0.0035232568106328154,
 0.022940240115769946,
 -0.03294262086639863,
 0.003764482641192417,
 0.015193832887126755,
 0.03375803107525067,
 -0.022654847473994356,
 -0.03441036296762279,
 -0.06539599188219555,
 -0.04362451136616751,
 -0.0007100874573491531,
 0.011442940338471654,
 -0.02228791194868832,
 0.01009751163122055,
 -0.020385285023624824,
 0.02067067952804566,
 0.004773554171630745,
 -0.007780383

In [43]:
# Create Pinecone Instance
import pinecone 
from langchain.vectorstores import Pinecone 

pinecone_client = pinecone.Pinecone(api_key=PINECONE_API_KEY) 

In [45]:
# Store to Pinecone
index_name = "langchain-quickstart"
search = Pinecone.from_documents(texts, embeddings, index_name=index_name)

In [46]:
# We can ask questions 
query = "What is magical about an autoencoder?" 

result = search.similarity_search(query) 
result

[Document(page_content='Overall, the autoencoder is a super smart robot that can help us make sense of complicated pictures'),
 Document(page_content="But the cool thing is, the autoencoder doesn't just make things smaller, it can also help clean up"),
 Document(page_content='wearing a hat instead of a cat, the autoencoder can figure out that something strange is going on'),
 Document(page_content='So, imagine you have a huge puzzle with lots and lots of pieces. The autoencoder is like a special')]