# Basics

In [1]:
import os

import pinecone

from playground_secret_key import SECRET_KEY
from langchain.schema import (SystemMessage, HumanMessage, AIMessage)
from langchain.chat_models import ChatOpenAI

os.environ['OPENAI_API_KEY'] = SECRET_KEY
chat = ChatOpenAI(
    openai_api_key = os.environ['OPENAI_API_KEY'],
    model = 'gpt-3.5-turbo'
)

messages = [
    SystemMessage(content='You are a tutor that helps highschool students.'),
    HumanMessage(content='Hi tutor, how are you today?'),
    AIMessage(content='I am great, thank you, how can I help you today?.'),
    HumanMessage(content='I would like you to explain to me second order derivatives')
]

# TODO : to have chat history you append both the AI response and the new prompt to the messages list

res = chat.invoke(messages)




  warn_deprecated(


# Chat history

In [2]:
messages.append(res)
prompt = HumanMessage(content='How does is this used in finding maxima and minima of a function')
messages.append(prompt)
res = chat.invoke(messages)
print(res.content)


In finding maxima and minima of a function, the second derivative test is often used. Here's how it works:

1. Find the critical points of the function by setting the first derivative equal to zero and solving for \( x \).
2. Use the second derivative test to determine whether each critical point corresponds to a maximum, minimum, or neither:
   - If \( f''(x) > 0 \) at a critical point, then the function has a local minimum at that point.
   - If \( f''(x) < 0 \) at a critical point, then the function has a local maximum at that point.
   - If \( f''(x) = 0 \) or the second derivative does not exist at a critical point, the test is inconclusive.

By analyzing the concavity of the function at the critical points using the second derivative, you can determine whether each critical point corresponds to a maximum, minimum, or neither.

It's important to note that the second derivative test can only determine local extrema, not absolute extrema. To find absolute extrema, you may also need 

# Loading data

In [3]:
import glob
from langchain_community.document_loaders import DirectoryLoader
from pathlib import Path
from pathlib import Path
import glob

target_dir = '/Users/lorenzodeappolonia/Desktop/supervised_learning/to_do'
documents = DirectoryLoader(path=target_dir, glob='01_*.pdf', recursive=True, show_progress=True).load_and_split()


100%|██████████| 1/1 [00:02<00:00,  2.06s/it]


In [4]:
from pinecone import Pinecone 
from playground_secret_key import PINECONE_KEY

os.environ['PINECONE_API_KEY'] = PINECONE_KEY
environment = os.environ.get('PINECONE_ENVIRONMENT')

pc = Pinecone()



In [5]:
print(pc.list_indexes())

{'indexes': [{'dimension': 1536,
              'host': 'rag-xezwua8.svc.gcp-starter.pinecone.io',
              'metric': 'cosine',
              'name': 'rag',
              'spec': {'pod': {'environment': 'gcp-starter',
                               'pod_type': 'starter',
                               'pods': 1,
                               'replicas': 1,
                               'shards': 1}},
              'status': {'ready': True, 'state': 'Ready'}}]}


In [6]:
# from pinecone import ServerlessSpec, PodSpec 
# import time
# index_name = "llama-2-rag"
# 
# if index_name not in pinecone.list_indexes().names():
#     pinecone.create_index(
#         index_name,
#         dimension=1536,
#         metric='cosine',
#         spec=PodSpec(environment="us-west1-gcp", pod_type="p1.x1")
# 
#     )
# 
#     while not pinecone.describe_index(index_name).status['ready']:
#         time.sleep(1)
#         
# index = pinecone.index(index_name)

In [7]:
index = pc.Index(name='rag')
print(index.describe_index_stats())

{'dimension': 1536,
 'index_fullness': 0.00027,
 'namespaces': {'': {'vector_count': 27}},
 'total_vector_count': 27}


In [8]:
from langchain_openai.embeddings import OpenAIEmbeddings

embed_model = OpenAIEmbeddings(model='text-embedding-ada-002')

In [9]:
texts = ['this is the first chunk of text',
         'then here is another chunk of text']

res = embed_model.embed_documents(texts)
print(len(res), len(res[0]))
print(documents[0].page_content)

2 1536
Introduction to Supervised Learning

Michela Papandrea michela.papandrea@supsi.ch

Supervised Learning Bachelor of Data Science and Artiﬁcial Intelligence University of Applied Sciences and Arts of Southern Switzerland

Michela Papandrea (SUPSI)

Introduction to Supervised Learning

1 / 25

Overview

1

Introduction

2 Data Representation

3 Classiﬁcation vs Regression

Deﬁnition of Classiﬁcation Deﬁnition of Regression

4 Generalization, Overﬁtting and Underﬁtting

Michela Papandrea (SUPSI)

Introduction to Supervised Learning

2 / 25

Machine Learning

extracting knowledge from data.

intersection of statistics, artiﬁcial intelligence, and computer science (aka predictive analytics or statistical learning )

ML applications is ubiquitous many modern websites and devices have machine learning algorithms at their core

Example

automatic recommendations of which movies to watch, what food to order or which products to buy,

personalized online music streaming

recognizing friend

In [33]:
from langchain_text_splitters import CharacterTextSplitter

text_splitter = CharacterTextSplitter(
    separator="\n\n",
    chunk_size=500,
    chunk_overlap=200,
    length_function=len,
    is_separator_regex=False,
)




In [10]:
batch_size = 500
i=0
vectors = []
j=0

for document in documents:
    i=0
    while i <= len(document.page_content):
        print(i)
        batch = document.page_content[i:batch_size]
        i += batch_size
        embeds = embed_model.embed_query(batch)
        vectors.append({'id' : f'{j}_{i}', 'values' : embeds})
    j+=1
        
print(vectors)    
index.upsert(vectors = vectors)

    



0
500
1000
1500
2000
2500
3000
3500
0
500
1000
1500
2000
2500
3000
3500
0
500
1000
1500
2000
2500
3000
3500
0
500
1000
[{'id': '0_500', 'values': [0.01558062600115537, 0.0076565736756312435, 0.026841474088281095, -0.041378932144134815, -0.026680986712080842, 0.018990977623136354, -0.00363436497962474, 0.024300427379960018, -0.012170274379174386, -0.04132543697294981, 0.020408614560700894, 0.02419343517494477, -0.009676036876962807, -0.007081494997456724, 0.023671851667955258, -0.0014836701559113594, 0.014417094852010079, 0.012083343794676134, 0.009040775199882537, -0.011989726313779757, -0.03292660941522305, 0.021237797164649425, -0.00338861897585642, -0.01802805522858007, -0.018335656188184325, -0.009228010161675293, 0.015754488101474493, -0.0349059531008111, -0.0015421810814715956, -0.01590159982223326, -0.00773681736373137, -0.01491192984208447, -0.01828215915435408, -0.01372165017602406, -0.010806133544117469, -0.04151267379738781, 0.015941723063267247, -0.015045668701369605, 0.015

{'upserted_count': 27}

In [11]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.00027,
 'namespaces': {'': {'vector_count': 27}},
 'total_vector_count': 27}

In [36]:
from langchain_pinecone import Pinecone


vectorstore = Pinecone(index, embed_model)

query = 'give me an example of a Machine Learning application'

res = vectorstore.similarity_search(query=query, k=2)
for el in res:
    print(el.page_content)

Introduction to Supervised Learning

Michela Papandrea michela.papandrea@supsi.ch

Supervised Learning Bachelor of Data Science and Artiﬁcial Intelligence University of Applied Sciences and Arts of Southern Switzerland

Michela Papandrea (SUPSI)

Introduction to Supervised Learning

1 / 25

Overview

1

Introduction

2 Data Representation

3 Classiﬁcation vs Regression

Deﬁnition of Classiﬁcation Deﬁnition of Regression

4 Generalization, Overﬁtting and Underﬁtting

Michela Papandrea (SUPSI)

Introduction to Supervised Learning

2 / 25

Machine Learning

extracting knowledge from data.

intersection of statistics, artiﬁcial intelligence, and computer science (aka predictive analytics or statistical learning )

ML applications is ubiquitous many modern websites and devices have machine learning algorithms at their core

Example

automatic recommendations of which movies to watch, what food to order or which products to buy,

personalized online music streaming

recognizing friends faces

In [42]:
def augmented_prompt(query: str):
    results = vectorstore.similarity_search(query, k=1)
    source_knowledge = '\n'.join([x.page_content for x in results])
    augmented_prompt = f"""Using the context below, answer the query. 
    
    Contexts: 
    {source_knowledge} 
    
    Query: 
    {query}"""
    return augmented_prompt

In [38]:
print(augmented_prompt(query))

Using the context below, answer the query. 
    Contexts: 
    Introduction to Supervised Learning

Michela Papandrea michela.papandrea@supsi.ch

Supervised Learning Bachelor of Data Science and Artiﬁcial Intelligence University of Applied Sciences and Arts of Southern Switzerland

Michela Papandrea (SUPSI)

Introduction to Supervised Learning

1 / 25

Overview

1

Introduction

2 Data Representation

3 Classiﬁcation vs Regression

Deﬁnition of Classiﬁcation Deﬁnition of Regression

4 Generalization, Overﬁtting and Underﬁtting

Michela Papandrea (SUPSI)

Introduction to Supervised Learning

2 / 25

Machine Learning

extracting knowledge from data.

intersection of statistics, artiﬁcial intelligence, and computer science (aka predictive analytics or statistical learning )

ML applications is ubiquitous many modern websites and devices have machine learning algorithms at their core

Example

automatic recommendations of which movies to watch, what food to order or which products to buy,


In [44]:
prompt = HumanMessage(
    content=augmented_prompt('According to Michela Papandrea, what are the main steps of M.L. analysis')
)

messages.append(prompt)
res = chat(messages)

print(res.content)

According to Michela Papandrea, the main steps of Machine Learning analysis are:

1. Understand the problem we are trying to solve and if the data can solve the problem.
2. Formalize the problem.
3. Collect enough data to solve the problem.
4. Identify features and algorithms which allow the right predictions.
5. Define metrics for the performance measurement.
6. Generate the predictive model and integrate the Machine Learning solution within a business product.

These steps are crucial in the Machine Learning analysis process to ensure the development of accurate and effective predictive models.
