In [16]:
from dotenv import load_dotenv
load_dotenv('.env.local')
import os
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec

In [18]:
api_key = os.getenv("PINECONE_API_KEY")
pc = Pinecone(api_key)

pc.create_index(
    name = "rmw", dimension=1536, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")
)   


In [19]:
import json
data = json.load(open("reviews.json"))
data['reviews']


[{'Company': 'Google',
  'Work-Life Balance': 'Excellent',
  'Compensation and Benefits': 'Highly competitive',
  'Career Growth and Development Opportunities': 'Abundant',
  'Company Culture': 'Innovative and collaborative',
  'Leadership and Management': 'Visionary',
  'Stars': 5},
 {'Company': 'Apple',
  'Work-Life Balance': 'Good',
  'Compensation and Benefits': 'Very competitive',
  'Career Growth and Development Opportunities': 'Plentiful',
  'Company Culture': 'Fast-paced and creative',
  'Leadership and Management': 'Strong',
  'Stars': 4},
 {'Company': 'Amazon',
  'Work-Life Balance': 'Challenging',
  'Compensation and Benefits': 'Competitive',
  'Career Growth and Development Opportunities': 'Numerous',
  'Company Culture': 'Fast-paced and results-driven',
  'Leadership and Management': 'Data-driven',
  'Stars': 3},
 {'Company': 'Microsoft',
  'Work-Life Balance': 'Very good',
  'Compensation and Benefits': 'Excellent',
  'Career Growth and Development Opportunities': 'Extens

In [23]:
for review in data['reviews']:
    if all(key in review for key in ["Work-Life Balance", "Compensation and Benefits", 
                                     "Career Growth and Development Opportunities", 
                                     "Company Culture", "Leadership and Management"]):
        combined_input = (
            f"Work-Life Balance: {review['Work-Life Balance']}, "
            f"Compensation and Benefits: {review['Compensation and Benefits']}, "
            f"Career Growth and Development Opportunities: {review['Career Growth and Development Opportunities']}, "
            f"Company Culture: {review['Company Culture']}, "
            f"Leadership and Management: {review['Leadership and Management']}"
        )
process_data =[]
client = OpenAI()

for review in data['reviews']:
    response = client.embeddings.create(
        input=combined_input,
        model = "text-embedding-3-small",
    )

    embedding = response.data[0].embedding
    process_data.append({
        "values": embedding,
        "id": review["Company"],
        "metadata":{
            "Work-Life Balance": review["Work-Life Balance"],
            "Compensation and Benefits": review["Compensation and Benefits"],
            "Career Growth and Development Opportunities": review["Career Growth and Development Opportunities"],
            "Company Culture": review["Company Culture"],
            "Leadership and Management": review["Leadership and Management"],
            "Stars": review["Stars"]
        }
    })


In [26]:
process_data[0]

{'values': [-0.00351948,
  0.0032631338,
  0.056552943,
  0.052692674,
  0.016309638,
  0.006882136,
  -0.002821314,
  0.026563477,
  0.02791457,
  0.020676566,
  0.05607041,
  -0.029410426,
  -0.018022632,
  -0.028541865,
  0.013583322,
  0.021677824,
  -0.025405396,
  -0.015863294,
  -0.009234488,
  0.008812271,
  -0.005491836,
  0.023197806,
  0.012847458,
  0.040146798,
  0.017286768,
  0.048012096,
  -0.11262336,
  0.0076964116,
  -0.030206606,
  -0.007485303,
  0.054526303,
  -0.036407165,
  -0.0015787898,
  0.013812526,
  -0.00776276,
  0.036720812,
  0.040050294,
  0.009608451,
  0.053754248,
  -0.0021382272,
  -0.016249321,
  0.009216392,
  0.03322244,
  0.008963062,
  0.020302603,
  -0.016225195,
  -0.017853744,
  0.013378246,
  0.022654954,
  0.01729883,
  -0.047746703,
  -0.018324215,
  0.03170246,
  -0.009005284,
  -0.03664843,
  0.0027006804,
  0.040026166,
  -0.036600176,
  -0.021243544,
  0.033198316,
  0.06181256,
  -0.07976281,
  0.02119529,
  -0.020157844,
  -0.01244

In [27]:
index = pc.Index('rmw')
index.upsert(
    vectors=process_data,
    namespace="ns1"
)


{'upserted_count': 20}

In [28]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 20}},
 'total_vector_count': 20}