In [8]:
import os
from dotenv import load_dotenv
from pinecone import Pinecone, ServerlessSpec

# Load environment variables from .env file
load_dotenv()

# Retrieve API key securely
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")

# Initialize Pinecone using the new client
pc = Pinecone(api_key=PINECONE_API_KEY)

# Define index name
index_name = "research-embeddings1"

# Get existing indexes
existing_indexes = [idx.name for idx in pc.list_indexes()]

# Only create the index if it does not exist
if index_name not in existing_indexes:
    print(f"Creating new index: {index_name}...")
    pc.create_index(
        name=index_name,
        dimension=1536,  # OpenAI embedding dimensions
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        )
    )
    print(f"Index '{index_name}' created successfully.")
else:
    print(f"Index '{index_name}' already exists. Skipping creation.")

# Connect to the existing index
index = pc.Index(index_name)

# List available indexes
print("Available Indexes:", existing_indexes)


Creating new index: research-embeddings1...
Index 'research-embeddings1' created successfully.
Available Indexes: ['research-embeddings']


In [10]:
import openai
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Retrieve API key securely
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# Initialize OpenAI client
client = openai.OpenAI(api_key=OPENAI_API_KEY)

# Define sample research summaries
research_entries = [
    {"id": "entry_1", "summary": "The impact of AI on real estate market trends."},
    {"id": "entry_2", "summary": "Neural networks used in financial modeling for real estate investments."},
]

# Generate and store embeddings
for entry in research_entries:
    # Generate embedding using OpenAI
    embedding = client.embeddings.create(
        input=entry["summary"],
        model="text-embedding-ada-002"
    ).data[0].embedding

    # Store in Pinecone
    index.upsert([(entry["id"], embedding)])

print("Embeddings stored successfully!")


Embeddings stored successfully!


In [11]:
query_text = "How AI is transforming the real estate industry"

# Generate embedding for query
query_embedding = client.embeddings.create(
    input=query_text,
    model="text-embedding-ada-002"
).data[0].embedding

# Search Pinecone for similar research
search_results = index.query(
    vector=query_embedding, 
    top_k=5,  # Retrieve the top 5 most similar results
    include_metadata=True
)

# Display results
for match in search_results["matches"]:
    print(f"Similar Research ID: {match['id']}, Score: {match['score']}")


Similar Research ID: entry_1, Score: 0.954398274
Similar Research ID: entry_2, Score: 0.843019366


In [12]:
for match in search_results["matches"]:
    similarity_score = match["score"]
    research_id = match["id"]

    if similarity_score > 0.80:
        print(f"Suggest merging with research entry: {research_id}")
    elif 0.50 <= similarity_score <= 0.80:
        print(f"Suggest linking research with: {research_id}")
    else:
        print(f"Entry {research_id} is not related.")


Suggest merging with research entry: entry_1
Suggest merging with research entry: entry_2


In [13]:
def categorize_research(text):
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a research assistant that categorizes research into relevant industries and education types."},
            {"role": "user", "content": f"Classify the following research: {text}"}
        ]
    )
    return response.choices[0].message.content

# Test with a sample research summary
sample_text = "Neural networks used in financial modeling for real estate investments."
categories = categorize_research(sample_text)
print("Categorized As:", categories)


Categorized As: This research can be classified under the following categories:

Industries:
1. Finance - As it involves financial modeling.
2. Real Estate - It specifically focuses on real estate investments.
3. Technology/Artificial Intelligence - It utilizes neural networks, a subset of AI technologies.

Education Types:
1. Business/Finance Education - Relevant for students or professionals studying finance or real estate investment.
2. Computer Science/AI Education - Applicable for those studying artificial intelligence, machine learning, or data science, especially in the context of its application to financial models.


In [4]:
import secrets

api_key = secrets.token_hex(32)  # Generates a 64-character hex string
print(f"Your API Secret Key: {api_key}")


Your API Secret Key: 3a403fc7933dc77a59df4f25fdd84707fe648072a099d388e157edafa6157bf2


In [16]:
import os
import pinecone
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")

# Initialize Pinecone
pc = pinecone.Pinecone(api_key=PINECONE_API_KEY)
index = pc.Index("research-embeddings")

# Query Pinecone
search_results = index.query(vector=[0]*1536, top_k=100, include_metadata=True)

# Print the results
print(search_results)


{'matches': [{'id': 'research123',
              'metadata': {'activity': 0.0,
                           'pinned': False,
                           'timestamp': '2025-03-01T19:13:32.016564'},
              'score': 0.0,
              'values': []}],
 'namespace': '',
 'usage': {'read_units': 6}}
