# Setup

In [5]:
import os
import pandas as pd

from pprint import pprint

from sklearn.metrics import accuracy_score

from skllm import MultiLabelZeroShotGPTClassifier
from skllm.config import SKLLMConfig

In [6]:
#See notes in INSTALL.md for how to set this, DO NOT HARD CODE YOUR API KEY HERE, 
# if your repository is public, then someone will steal your API key and make you pay for their shit 
SKLLMConfig.set_openai_key(os.environ.get('OPENAI_API_KEY'))

# Collect a Dataset

In [7]:
estate_sale_items = [
    "An old chair with cool upholstery",
    "Costume Jewelry, various earrings",
    "Artwork of unknown origin",
    "Collectible Coins and Currency",
    "China Sets, some complete, some not",
    "Silverware, real silver",
    "Glassware, crystal",
    "Books, various authors",
    "Vinyl Records",
    "Clothing",
    "Tools",
    "Electronics",
    "Rugs and Textiles",
    "Kitchenware",
    "Decorative Items"
]

data = pd.DataFrame(estate_sale_items, columns=["estatesaleitems"])

# Understand Your Data

In [8]:
data.head()

Unnamed: 0,estatesaleitems
0,An old chair with cool upholstery
1,"Costume Jewelry, various earrings"
2,Artwork of unknown origin
3,Collectible Coins and Currency
4,"China Sets, some complete, some not"


In [9]:
data.describe()

Unnamed: 0,estatesaleitems
count,15
unique,15
top,An old chair with cool upholstery
freq,1


In [10]:
X = data["estatesaleitems"]

In [12]:
print(X)

0       An old chair with cool upholstery
1       Costume Jewelry, various earrings
2               Artwork of unknown origin
3          Collectible Coins and Currency
4     China Sets, some complete, some not
5                 Silverware, real silver
6                      Glassware, crystal
7                  Books, various authors
8                           Vinyl Records
9                                Clothing
10                                  Tools
11                            Electronics
12                      Rugs and Textiles
13                            Kitchenware
14                       Decorative Items
Name: estatesaleitems, dtype: object


# "Develop" a Model aka just use OpenAI's API

In [15]:
# Define candidate labels
candidate_labels = [
    "Collectibles",
    "Valuable",
    "Kitchenware",
    "Media"
]

# Create and fit the classifier
clf = MultiLabelZeroShotGPTClassifier(max_labels=2) 
clf.fit(X, [candidate_labels])

In [19]:
# Predict the labels
labels = clf.predict(X)

100%|███████████████████████████████████████████| 15/15 [00:48<00:00,  3.26s/it]


In [22]:
data["ChatGPTLabel"] = labels

In [23]:
# Rename the column to match the format you mentioned
data.rename(columns={"estatesaleitems": "postTitle"}, inplace=True)

In [25]:
# Display the resulting DataFrame
print(data)

                              postTitle                 ChatGPTLabel
0     An old chair with cool upholstery               [Collectibles]
1     Costume Jewelry, various earrings     [Collectibles, Valuable]
2             Artwork of unknown origin               [Collectibles]
3        Collectible Coins and Currency               [Collectibles]
4   China Sets, some complete, some not  [Collectibles, Kitchenware]
5               Silverware, real silver     [Collectibles, Valuable]
6                    Glassware, crystal     [Collectibles, Valuable]
7                Books, various authors                      [Media]
8                         Vinyl Records                      [Media]
9                              Clothing               [Collectibles]
10                                Tools               [Collectibles]
11                          Electronics               [Collectibles]
12                    Rugs and Textiles               [Collectibles]
13                          Kitche

# Choose a measure of success, Choose an evaluation protocol / evaluate

# Skipped Steps
* Beat a baseline
* Overfit, regularize and tune
* Communicate with stakeholders
* Ship an inference model
* Monitor and maintain

In [17]:
print(labels)

[['Collectibles'], ['Collectibles', 'Valuable'], ['Collectibles'], ['Collectibles'], ['Collectibles', 'Kitchenware'], ['Collectibles', 'Valuable'], ['Collectibles', 'Valuable'], ['Media'], ['Media'], ['Collectibles'], ['Collectibles'], ['Collectibles'], ['Collectibles'], ['Kitchenware'], ['Collectibles']]
