# Palimpzest Quickstart

## 1. Load API Key with .env

In [1]:
from dotenv import load_dotenv

# Your .env file should contain OPENAI_API_KEY=abc
load_dotenv()

True

## 2. Download Sample Data

In [2]:
# use gdown to download tar file
!gdown 1fcTQ0ku9PfipzWv-bVugC4Dpg8qG-MJ1

# open tar file
!tar -xzf enron-eval-tiny.tar

Downloading...
From: https://drive.google.com/uc?id=1fcTQ0ku9PfipzWv-bVugC4Dpg8qG-MJ1
To: /home/jovyan/enron-eval-tiny.tar
100%|██████████████████████████████████████| 23.2k/23.2k [00:00<00:00, 3.31MB/s]


## 3. Filter for Enron emails based on semantic text parameters

In [3]:
import palimpzest as pz

pz.DataDirectory().registerLocalDirectory("testdata/enron-eval-tiny", "enron-tiny")

In [4]:
import palimpzest as pz

class Email(pz.TextFile):
  """Represents an email, which can subclass a text file"""
  sender = pz.StringField(desc="The email address of the sender", required=True)
  subject = pz.StringField(desc="The subject of the email", required=True)

# define logical plan
emails = pz.Dataset(source="enron-tiny", schema=Email) # invokes a convert operation
emails = emails.filterByStr("The email is not quoting from a news article or an article written by someone outside of Enron")
emails = emails.filterByStr("The email refers to a fraudulent scheme (i.e., \"Raptor\", \"Deathstar\", \"Chewco\", and/or \"Fat Boy\")")

# user specified policy
policy = pz.MaxQuality()

# execute the plan
results, plan = pz.Execute(emails, policy, num_samples=2, nocache=True, verbose=True)


--------------------------------------------
Sentinel Plan 1:
 0. MarshalAndScanDataOp -> File 

 1. File -> InduceFromCandidateOp -> TextFile 
    Using hardcoded function
    (contents,filena...) -> (contents,filena...)

 2. TextFile -> InduceFromCandidateOp -> Email 
    Using Model.GPT_4
    Token budget: 1.0
    Query strategy: QueryStrategy.BONDED_WITH_FALLBACK
    (contents,filena...) -> (contents,filena...)

 3. Email -> FilterCandidateOp -> Email 
    Using Model.GPT_4
    Filter: "The email is not quoting from a news article or an article written by someone outside of Enron"
    (contents,filena...) -> (contents,filena...)

 4. Email -> FilterCandidateOp -> Email 
    Using Model.GPT_4
    Filter: "The email refers to a fraudulent scheme (i.e., "Raptor", "Deathstar", "Chewco", and/or "Fat Boy")"
    (contents,filena...) -> (contents,filena...)

---

Sentinel Plan 0:
 0. MarshalAndScanDataOp -> File 

 1. File -> InduceFromCandidateOp -> TextFile 
    Using hardcoded function
