## Notebook to Load Data into Pinecone

In [9]:
from dotenv import load_dotenv
import os
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec

load_dotenv()

True

### Setup Pinecone

In [8]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pc.create_index(
    name="rms-rag", 
    dimension=1536,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )

## Import JSON data - JSONLoader LangChain

In [None]:
import json
from langchain_community.document_loaders import JSONLoader
from pathlib import Path
from pprint import pprint

# Load the documents
file_path='./reviews.json'
data = json.loads(Path(file_path).read_text())
pprint(data)

In [34]:
def metadata_func(record: dict, metadata: dict) -> dict:
    # metadata["source"] = "facebook" 
    metadata["school"] = record.get("school")
    # metadata["comment"] = record.get("comment")
    return metadata

loader = JSONLoader(
    file_path='./reviews.json',
    jq_schema='.reviews[]',
    text_content=False,
    metadata_func=metadata_func
)

data = loader.load()
pprint(data)

[Document(metadata={'source': 'C:\\Users\\yasho\\Documents\\Computer Science\\HeadstarterAI\\RateMySchool-RAG\\reviews.json', 'seq_num': 1, 'school': 'University of California, Berkeley'}, page_content='{"school": "University of California, Berkeley", "comment": "I had a great experience at this school. The professors were knowledgeable and supportive.", "overall_rating": 4, "individual_ratings": {"professors": 4, "campus": 4, "food": 3, "social_life": 4}, "date": "2016-05-02"}'),
 Document(metadata={'source': 'C:\\Users\\yasho\\Documents\\Computer Science\\HeadstarterAI\\RateMySchool-RAG\\reviews.json', 'seq_num': 2, 'school': 'University of California, Berkeley'}, page_content='{"school": "University of California, Berkeley", "comment": "This school exceeded my expectations. The campus is beautiful and the social life is vibrant.", "overall_rating": 5, "individual_ratings": {"professors": 5, "campus": 5, "food": 4, "social_life": 5}, "date": "2016-05-03"}'),
 Document(metadata={'sour

## JSON Data - Tutorial

In [30]:
import json
data = json.load(open("reviews.json"))
data["reviews"]

[{'school': 'University of California, Berkeley',
  'comment': 'I had a great experience at this school. The professors were knowledgeable and supportive.',
  'overall_rating': 4,
  'individual_ratings': {'professors': 4,
   'campus': 4,
   'food': 3,
   'social_life': 4},
  'date': '2016-05-02'},
 {'school': 'University of California, Berkeley',
  'comment': 'This school exceeded my expectations. The campus is beautiful and the social life is vibrant.',
  'overall_rating': 5,
  'individual_ratings': {'professors': 5,
   'campus': 5,
   'food': 4,
   'social_life': 5},
  'date': '2016-05-03'},
 {'school': 'University of California, Berkeley',
  'comment': 'I had a mixed experience at this school. Some professors were great, while others were not.',
  'overall_rating': 3,
  'individual_ratings': {'professors': 3,
   'campus': 4,
   'food': 3,
   'social_life': 3},
  'date': '2016-05-04'},
 {'school': 'University of California, Berkeley',
  'comment': 'The campus is stunning and the prof