In [1]:
import os
# Fix for Windows symlink permission error with Hugging Face Hub
# This patches the symlink creation to use hard links or copies instead
import sys

if sys.platform == 'win32':
    # Monkey patch os.symlink to use os.link (hard link) or shutil.copy as fallback
    import shutil
    _original_symlink = os.symlink
    
    def _patched_symlink(src, dst, target_is_directory=False):
        try:
            # Try to create a hard link first (works without admin privileges)
            if os.path.isfile(src):
                try:
                    os.link(src, dst)
                    return
                except (OSError, AttributeError):
                    pass
            # Fallback to copying the file
            if os.path.isfile(src):
                shutil.copy2(src, dst)
            elif os.path.isdir(src):
                shutil.copytree(src, dst, dirs_exist_ok=True)
        except Exception:
            # If all else fails, try the original symlink (might work if Developer Mode is enabled)
            try:
                _original_symlink(src, dst, target_is_directory)
            except OSError:
                # Final fallback: just copy
                if os.path.isfile(src):
                    shutil.copy2(src, dst)
                elif os.path.isdir(src):
                    shutil.copytree(src, dst, dirs_exist_ok=True)
    
    os.symlink = _patched_symlink

print("Windows symlink workaround enabled")


Windows symlink workaround enabled


In [2]:
pip install pydantic pydantic-settings asyncpg pinecone pinecone[grpc] asyncio docling

Note: you may need to restart the kernel to use updated packages.


In [3]:
from src.config import settings

print(settings.postgres_async_connection_string)
print(settings.postgres_config)

postgresql+asyncpg://postgres:farsight-test@farsight-take-home-project.cl7vlnrlhdqy.us-east-1.rds.amazonaws.com:5432/postgres
{'host': 'farsight-take-home-project.cl7vlnrlhdqy.us-east-1.rds.amazonaws.com', 'port': 5432, 'user': 'postgres', 'password': 'farsight-test', 'database': 'postgres'}


In [4]:
from src.db import get_pinecone_client

client = await get_pinecone_client()

idx = client.list_indexes()

print(idx)

  from tqdm.autonotebook import tqdm


['farsight-take-home-project']


In [5]:
import asyncio
from src.db import PostgresClient, PineconeClient
from src.llm import OpenAIClient
from src.models import AcquisitionModel, FundingRoundModel, OrganizationModel, PineconeOrganizationModel

from pprint import pprint

async with PostgresClient() as postgres_client, PineconeClient() as pinecone_client, OpenAIClient() as openai_client:
    acquisitions = AcquisitionModel(client=postgres_client)
    funding_rounds = FundingRoundModel(client=postgres_client)
    organizations = OrganizationModel(client=postgres_client)
    pc_organizations = PineconeOrganizationModel(
        pinecone_client=pinecone_client,
        openai_client=openai_client
    )
    asyncio.gather(
        acquisitions.initialize(),
        funding_rounds.initialize(),
        organizations.initialize(),
        pc_organizations.initialize()
    )
    acqs, frs, orgs, pc_orgs = await asyncio.gather(
        acquisitions.get(limit=10, acquisition_price_usd_min=1000000),
        funding_rounds.get(limit=10),
        organizations.get(limit=10),
        pc_organizations.query(
            text="AI Companies",
            top_k=10
        )
    )
    pprint((acqs, frs, orgs, pc_orgs))

([Acquisition(acquisition_uuid=UUID('87252713-6f4b-481a-a198-3b867759d97d'), acquiree_uuid=UUID('df10ca0d-afbe-4d5e-8559-cb725af0fb37'), acquirer_uuid=UUID('ec070a75-5468-9a55-1b09-6794923f358e'), acquisition_type='acquisition', acquisition_announce_date=datetime.datetime(2024, 12, 9, 0, 0), acquisition_price_usd=17500000, terms='cash', acquirer_type='company'),
  Acquisition(acquisition_uuid=UUID('a7e70d81-b4b6-492b-9f66-d63ec3d0601b'), acquiree_uuid=UUID('4f5dd354-adbb-44a2-93ed-ce36a1d79c70'), acquirer_uuid=UUID('0aa97c3c-9113-2726-2a8a-f43f52b11654'), acquisition_type='acquisition', acquisition_announce_date=datetime.datetime(2024, 12, 6, 0, 0), acquisition_price_usd=64135271, terms='cash_and_stock', acquirer_type='company'),
  Acquisition(acquisition_uuid=UUID('f2222d9a-1b0c-4678-8d1c-3fbaced9c9d1'), acquiree_uuid=UUID('3aedd9fe-ee5c-48ea-b7cd-377a88e947a1'), acquirer_uuid=UUID('b49fa379-3c64-45e3-9be3-3b0ff384632f'), acquisition_type='acquisition', acquisition_announce_date=datet

In [6]:
from src.tools import generate_llm_response

response = await generate_llm_response(
            prompt="Explain quantum computing",
            model="gpt-4.1-mini",
            instructions="You are a helpful physics teacher. Explain concepts simply."
        )

print(response)

async for chunk in await generate_llm_response(
            prompt="Tell me a story",
            model="gpt-4.1-mini",
            stream=True
        ):
            if chunk.choices[0].delta.content:
                print(chunk.choices[0].delta.content, end="")

ChatCompletion(id='chatcmpl-CoFXlZc15m5r14HbWiDtAYwS8NgNj', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Sure! Quantum computing is a type of computing that uses the principles of quantum mechanics, which is the physics of very tiny particles like atoms and electrons.\n\nIn regular computers, the basic unit of information is called a **bit**, which can be either 0 or 1. In quantum computers, the basic unit is a **quantum bit** or **qubit**. Qubits are special because they can be both 0 and 1 at the same time, thanks to a property called **superposition**.\n\nAnother important property is **entanglement**, where qubits become linked so that the state of one instantly influences the state of another, no matter how far apart they are.\n\nThese properties let quantum computers process lots of possibilities all at once, making them potentially much more powerful for certain tasks, like searching databases, factoring large numbers, or s