In [None]:
from dotenv import load_dotenv
load_dotenv('.env.local')
import os
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec

In [None]:
api_key = os.getenv("PINECONE_API_KEY")
pc = Pinecone(api_key)

pc.create_index(
    name = "rmw", dimension=1536, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")
)   


In [None]:
import json
data = json.load(open("reviews.json"))
data['reviews']


In [6]:
for review in data['reviews']:
    if all(key in review for key in ["Work-Life Balance", "Compensation and Benefits", 
                                     "Career Growth and Development Opportunities", 
                                     "Company Culture", "Leadership and Management"]):
        combined_input = (
            f"Work-Life Balance: {review['Work-Life Balance']}, "
            f"Compensation and Benefits: {review['Compensation and Benefits']}, "
            f"Career Growth and Development Opportunities: {review['Career Growth and Development Opportunities']}, "
            f"Company Culture: {review['Company Culture']}, "
            f"Leadership and Management: {review['Leadership and Management']}"
        )
process_data =[]
client = OpenAI()

for review in data['reviews']:
    response = client.embeddings.create(
        input=combined_input,
        model = "text-embedding-3-small",
    )

    embedding = response.data[0].embedding
    process_data.append({
        "values": embedding,
        "id": review["Company"],
        "metadata":{
            "Work-Life Balance": review["Work-Life Balance"],
            "Compensation and Benefits": review["Compensation and Benefits"],
            "Career Growth and Development Opportunities": review["Career Growth and Development Opportunities"],
            "Company Culture": review["Company Culture"],
            "Leadership and Management": review["Leadership and Management"],
            "Stars": review["Stars"]
        }
    })


In [15]:
# process_data[18]["metadata"]

{'Work-Life Balance': 'Very good',
 'Compensation and Benefits': 'Competitive',
 'Career Growth and Development Opportunities': 'Abundant',
 'Company Culture': 'Entrepreneurial and supportive',
 'Leadership and Management': 'Inspiring',
 'Stars': 4}

In [12]:
index = pc.Index('rmw')
index.upsert(
    vectors=process_data,
    namespace="ns1"
)


{'upserted_count': 20}

In [28]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 20}},
 'total_vector_count': 20}