<a href="https://colab.research.google.com/github/claudio1975/Medium-blog/blob/master/Scikit-LLM/Scikit_LLM_Features_examples_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# install scikit-llm library
!pip install scikit-llm &> /dev/null

In [None]:
# Install the dotenv package
!pip install python_dotenv &> /dev/null


In [None]:
# API Key Configuration
from skllm.config import SKLLMConfig
import os
from dotenv import load_dotenv, find_dotenv
root_folder="/content/"
_ = load_dotenv(root_folder + "pswd.env") # read local .env file

OPENAI_SECRET_KEY  = os.getenv('OPENAI_API_KEY')
SKLLMConfig.set_openai_key(OPENAI_SECRET_KEY)

In [None]:
# upload libraries
from skllm.models.gpt.classification.zero_shot import ZeroShotGPTClassifier
from skllm.models.gpt.classification.zero_shot import CoTGPTClassifier
from skllm.models.gpt.classification.tunable import GPTClassifier
from skllm.models.gpt.classification.few_shot import (
FewShotGPTClassifier,
MultiLabelFewShotGPTClassifier,
DynamicFewShotGPTClassifier
)
from skllm.models.gpt.text2text.summarization import GPTSummarizer
from skllm.models.gpt.text2text.translation import GPTTranslator
from skllm.models.gpt.text2text.tunable import TunableGPTText2Text
from skllm.models.gpt.vectorization import GPTVectorizer
from skllm.models.gpt.tagging.ner import GPTExplainableNER as NER
from skllm.datasets import get_multilabel_classification_dataset
from skllm.datasets import get_classification_dataset
from skllm.datasets import get_translation_dataset

import warnings
warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)


### Few-Shot Text Classification

In [None]:
# Create an instance of FewShotGPTClassifier with the specified model
clf = FewShotGPTClassifier(model="gpt-3.5-turbo")
# Prepare the training data (X) and labels (y) for few-shot learning
X_train = [
    "I love the new feature of this app!",
    "I had a terrible experience yesterday.",
    "The product is okay, not too bad.",
]
y_train = ["positive", "negative", "neutral"]
# Fit the model using few-shot learning
clf.fit(X_train, y_train)
# Define the input text to be classified
X_test = [
    "The support team was very helpful.",
    "I'm not satisfied with the product quality.",
    "It works as expected."
]
# Predict the class labels for the input text
labels = clf.predict(X_test)
# Print the predicted labels
print(labels)

100%|██████████| 3/3 [00:02<00:00,  1.28it/s]

['positive' 'negative' 'positive']





### Multi-Label Few-Shot Text Classification

In [None]:
# Create an instance of FewShotGPTClassifier with the specified model
clf = MultiLabelFewShotGPTClassifier(max_labels=2, model="gpt-3.5-turbo")

# Prepare the training data and corresponding multi-label target
X, y = get_multilabel_classification_dataset()
X_train=[
 'The product was of excellent quality, and the packaging was also very good. Highly recommend!',
 'The delivery was super fast, but the product did not match the information provided on the website.',
 'Great variety of products, but the customer support was quite unresponsive.',
 'Affordable prices and an easy-to-use website. A great shopping experience overall.',
 'The delivery was delayed, and the packaging was damaged. Not a good experience.',
 'Excellent customer support, but the return policy is quite complicated.',
 'The product was not as described. However, the return process was easy and quick.',
 'Great service and fast delivery. The product was also of high quality.',
 'The prices are a bit high. However, the product quality and user experience are worth it.',
 'The website provides detailed information about products. The delivery was also very fast.']

y_train=[
 ['Quality', 'Packaging'],
 ['Delivery', 'Product Information'],
 ['Product Variety', 'Customer Support'],
 ['Price', 'User Experience'],
 ['Delivery', 'Packaging'],
 ['Customer Support', 'Return Policy'],
 ['Product Information', 'Return Policy'],
 ['Service', 'Delivery', 'Quality'],
 ['Price', 'Quality', 'User Experience'],
 ['Product Information', 'Delivery']
]
# Define the input text to be classified
X_test=[X[4]]
# Fit the model using few-shot learning
clf.fit(X_train,y_train)
# Predict the class labels for the input text
labels = clf.predict(X_test)
# Print the predicted labels
print(labels)

100%|██████████| 1/1 [00:00<00:00,  1.57it/s]

[['Delivery' 'Packaging']]





### Chain-of-Thought Text Classification

In [None]:
# Define the training data and labels for a sentiment analysis task
X_train = [
    "I love this new phone, its performance is outstanding and the battery life is amazing.",
    "This software update is terrible, it crashes all the time and is very slow.",
    "The book was okay, some parts were interesting but others were boring.",
]
y_train = ["positive", "negative", "neutral"]
# Initialize the CoTGPTClassifier with the specified model
clf = CoTGPTClassifier(model="gpt-3.5-turbo")
# Fit the classifier with the training data
clf.fit(X_train, y_train)
# Define a new test sentence (not seen during training)
X_test = ["The movie had some good moments."]
# Predict the class labels for the new test sentence
predictions = clf.predict(X_test)
# Extract labels and reasoning from predictions
labels, reasoning = predictions[:, 0], predictions[:, 1]
# Print the predicted label and corresponding reasoning for the new test sentence
for i, (label, reason) in enumerate(zip(labels, reasoning)):
    print(f"Sentence: '{X_test[i]}'")
    print(f"Predicted Label: {label}")
    print(f"Reasoning: {reason}")
    print("-" * 60)

100%|██████████| 1/1 [00:01<00:00,  1.60s/it]

Sentence: 'The movie had some good moments.'
Predicted Label: positive
Reasoning: {'positive': "The text mentions 'good moments,' indicating a positive aspect within the movie.", 'negative': 'There are no explicit negative statements in the text.', 'neutral': "The text does not provide a strong sentiment either way; it merely acknowledges the presence of 'good moments.'"}
------------------------------------------------------------





### Text Translation

In [None]:
# Define the training data
X_train=["I love dancing salsa and bachata. It's a fun way to express myself."]
# Initialize the GPTTranslator with the specified model
t = GPTTranslator(model="gpt-3.5-turbo", output_language="Portoguese")
# Translate the sentence
translated_text = t.fit_transform(X_train)
translated_text

100%|██████████| 1/1 [00:00<00:00,  1.40it/s]


array(['Eu amo dançar salsa e bachata. É uma forma divertida de me expressar.'],
      dtype=object)