# Metadata filtering with Pinecone


In [None]:
import os
import pandas as pd
from pinecone import Pinecone
from dotenv import load_dotenv
from pinecone import ServerlessSpec

load_dotenv()

## Creating an Index

In [None]:
# initialize connection to pinecone (get API key at app.pinecone.io)
api_key = os.environ.get('PINECONE_API_KEY') or 'PINECONE_API_KEY'

# configure client
pc = Pinecone(api_key=api_key)

cloud = os.environ.get('PINECONE_CLOUD') or 'aws'
region = os.environ.get('PINECONE_REGION') or 'us-east-1'

spec = ServerlessSpec(cloud=cloud, region=region)

index_name = "pinecone-metadata-filtering"

# Delete index if exists
if index_name in pc.list_indexes().names():
    pc.delete_index(index_name)

# Create an index
pc.create_index(
    name=index_name, 
    dimension=2, 
    metric="euclidean",
    spec=spec
)

# wait for index to be ready before connecting
while not pc.describe_index(index_name).status['ready']:
    time.sleep(1)
# Connect to the index
index = pc.Index(index_name)

## Generate sample document data

In [None]:
df = pd.DataFrame()
df["id"] = ["F-1", "F-2", "S-1", "S-2"]
df["vector"] = [[1., 1.], [2., 2.], [3., 3.], [4., 4.]]
df["metadata"] = [
    {"category": "finance", "published": 2015},
    {"category": "finance", "published": 2016},
    {"category": "sport", "published": 2017},
    {"category": "sport", "published": 2018}]
df

## Insert vectors

In [None]:
index.upsert(vectors=zip(df.id, df.vector, df.metadata))
index.describe_index_stats()

## Fetch a vector

In [None]:
index.fetch(ids=["F-1"])

## Query top-3 without filtering

In [None]:
query_results = index.query(vector=df[df.id == "F-1"].vector[0], top_k=3)
query_results

## Query results with articles in finance published after 2015

In [None]:
filter_condition = {
    "category" : {"$eq": "finance"},
    "published": {"$gt": 2015 }
}
query_results = index.query(vector=
    df[df.id == "F-1"].vector[0], top_k=3, filter=filter_condition
)
query_results

In [None]:
pc.delete_index(index_name)