# Apify pinecone inegration
Use Apify's Pinecone integration to seamlessly transfer selected data from Apify Actors to a Pinecone database.

This example focuses on integrating Apify with Pinecone using the Apify Actors.

In [14]:
pip install apify-client

Note: you may need to restart the kernel to use updated packages.


In [15]:
from apify_client import ApifyClient

In [16]:
APIFY_API_TOKEN = "YOUR APIFY TOKEN"
OPENAI_API_KEY = "YOUR OPENAI API KEY"
PINECONE_TOKEN = "YOUR PINECONE TOKEN"

client = ApifyClient(APIFY_API_TOKEN)

pinecone_integration_inputs = {
    "index_name": "apify",
    "pinecone_token": PINECONE_TOKEN,
    "openai_token": OPENAI_API_KEY,
    "fields": ["text"],
    "perform_chunking": True,
    "chunk_size": 2048,
    "chunk_overlap": 0,
}

Call Apify's Website Content Crawler and crawl https://docs.pinecone.io/home (limited to return only 1 results)

In [17]:
actor_call = client.actor("apify/website-content-crawler").call(
    run_input={"maxCrawlPages": 1, "startUrls": [{"url": "https://docs.pinecone.io/home"}]}
)

print("Actor website content crawler finished")
actor_call

Actor website content crawler finished


{'id': '35gIpfWbojKuaKO3f',
 'actId': 'aYG0l9s7dbB7j3gbS',
 'userId': 'vaTaQJYTC5hi1UCDf',
 'startedAt': datetime.datetime(2024, 5, 22, 21, 8, 18, 384000, tzinfo=datetime.timezone.utc),
 'finishedAt': datetime.datetime(2024, 5, 22, 21, 8, 54, 375000, tzinfo=datetime.timezone.utc),
 'status': 'SUCCEEDED',
 'statusMessage': 'Finished! Total 1 requests: 1 succeeded, 0 failed.',
 'isStatusMessageTerminal': True,
 'meta': {'origin': 'API',
  'userAgent': 'ApifyClient/1.6.4 (linux; Python/3.11.3); isAtHome/False'},
 'stats': {'inputBodyLen': 1087,
  'rebootCount': 0,
  'restartCount': 0,
  'durationMillis': 35879,
  'resurrectCount': 0,
  'runTimeSecs': 35.879,
  'metamorph': 0,
  'computeUnits': 0.03986555555555556,
  'memAvgBytes': 507361075.78049463,
  'memMaxBytes': 877694976,
  'memCurrentBytes': 0,
  'cpuAvgUsage': 60.676578433355964,
  'cpuMaxUsage': 141.05091319052988,
  'cpuCurrentUsage': 0,
  'netRxBytes': 7574424,
  'netTxBytes': 337422},
 'options': {'build': 'version-0',
  'time

Call Apify's Pinecone integration and store all data in the Pinecone Vector Database

In [18]:
pinecone_integration_inputs["dataset_id"] = actor_call["defaultDatasetId"]
actor_call = client.actor("jan.turon/pinecone-integration").call(run_input=pinecone_integration_inputs)
print("Apify's pinecone integration finished")
actor_call

Apify's pinecone integration finished


{'id': 'iR3iIZZeOtWs9kkQb',
 'actId': 'hlXrHgW69eovdGwaE',
 'userId': 'vaTaQJYTC5hi1UCDf',
 'startedAt': datetime.datetime(2024, 5, 22, 21, 8, 55, 371000, tzinfo=datetime.timezone.utc),
 'finishedAt': datetime.datetime(2024, 5, 22, 21, 9, 0, 191000, tzinfo=datetime.timezone.utc),
 'status': 'SUCCEEDED',
 'meta': {'origin': 'API',
  'userAgent': 'ApifyClient/1.6.4 (linux; Python/3.11.3); isAtHome/False'},
 'stats': {'inputBodyLen': 1061,
  'rebootCount': 0,
  'restartCount': 0,
  'durationMillis': 4687,
  'resurrectCount': 0,
  'runTimeSecs': 4.687,
  'metamorph': 0,
  'computeUnits': 0.005207777777777778,
  'memAvgBytes': 45921876.9106422,
  'memMaxBytes': 107581440,
  'memCurrentBytes': 0,
  'cpuAvgUsage': 30.875625020371864,
  'cpuMaxUsage': 102.04602394454948,
  'cpuCurrentUsage': 0,
  'netRxBytes': 28149,
  'netTxBytes': 5622},
 'options': {'build': 'latest',
  'timeoutSecs': 3600,
  'memoryMbytes': 4096,
  'diskMbytes': 8192},
 'buildId': 'MSILbxF1RFVVU3YH4',
 'exitCode': 0,
 'def