In [10]:
# Introducción documento "externo"
import requests
from bs4 import BeautifulSoup

web_link='https://towardsdatascience.com/3-business-skills-you-need-to-progress-your-data-science-career-in-2025-146f841d1a1e'

response = requests.get(web_link)
if response.status_code == 200:
    soup = BeautifulSoup(response.content, 'html.parser')
    text = soup.get_text(separator="\n", strip=True)


In [11]:
# Split del texto recibido
import langchain

print(langchain.__version__)

from langchain.text_splitter  import CharacterTextSplitter

text_splitter = CharacterTextSplitter(
    separator="\n",  
    chunk_size=800,  
    # chunk_overlap=200 
)

splits = text_splitter.split_text(text)
print(len(splits))

0.3.11
19


In [12]:
# Vectorizar
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

from langchain_community.vectorstores import Chroma

vector_store = Chroma.from_texts(
    texts=splits,
    collection_name="ds_career",
    embedding=embeddings,
    persist_directory="./chroma_ds_career",
)

retriever = vector_store.as_retriever()

In [13]:
# Ollama 
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import ChatPromptTemplate
from langchain_ollama.chat_models import ChatOllama
from langchain_core.runnables import RunnableLambda, RunnablePassthrough

# Prompt
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

# Local LLM
ollama_llm = "llama3.2"
model_local = ChatOllama(model=ollama_llm)

# Chain
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model_local
    | StrOutputParser()
)


In [14]:
chain.invoke("What would you do if you would like to progress in your data science career?")

In [None]:
chain.invoke("Would you insult your boss if you wanted to improve in your data science career?")

'No, the text does not suggest that insulting one\'s boss would be an effective way to improve in a data science career. In fact, the text mentions "collaboration-driving", "project-scoping", "stakeholder-managing", and "strategy-setting" as skills that are important for progression in a data science career, which implies a more collaborative and professional approach.'

In [None]:
chain.invoke("Is it a good idea to be a good a communication?")

"Yes, according to the context, being a good communicator is a good idea. The text emphasizes that soft communication skills, such as data storytelling and cross-team collaboration, are crucial for managing teams, pitching products, communicating insights, motivating team members, negotiating better pay, securing funding, and accessing government grants. It suggests that developing these skills can open doors to new opportunities and improve one's career prospects as a data scientist transitioning into leadership roles."