In [5]:
import kfp.compiler as compiler
import kfp.dsl as dsl
from kfp import components

kserve_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/master/components/kubeflow/kfserving/component.yaml')

@dsl.pipeline(
    name='KServe pipeline',
    description='A pipeline for KServe.'
)
def KServePipeline(
    action='apply',
    model_name='sklearnserver',
    namespace='kubeflow-user-example-com',
    custom_model_spec='{"name": "sklearnserver", "image": "dfm871002/sklearnserver-default:latest", "port": "5000"}'
):
    kserve_op(action=action,
              model_name=model_name,
              namespace=namespace,
              custom_model_spec=custom_model_spec)

if __name__ == '__main__':
    compiler.Compiler().compile(KServePipeline, 'sklsv.zip')

In [4]:
## the following example use python's request to send restapi requests
import json
import re
import string
import pandas as pd
import requests
import numpy as np
import nltk
import joblib
import sys
from random import shuffle
from nltk.corpus import twitter_samples
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import TweetTokenizer
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from nltk import data

message = 'good ending'

log_folder = './'

data.path.append(log_folder)
nltk.download('twitter_samples', download_dir = log_folder)
nltk.download('stopwords', download_dir = log_folder)

pos_tweets = twitter_samples.strings('positive_tweets.json')
neg_tweets = twitter_samples.strings('negative_tweets.json')
print(f"positive sentiment GOOD total samples {len(pos_tweets)}")
print(f"negative sentiment  Bad total samples {len(neg_tweets)}")

class Preprocess():   
    def __init__(self):
        self.tokenizer = TweetTokenizer(preserve_case=False, strip_handles=True,reduce_len=True)
        self.stopwords_en = stopwords.words('english') 
        self.punctuation_en = string.punctuation
        self.stemmer = PorterStemmer()        
    def __remove_unwanted_characters__(self, tweet):
        tweet = re.sub(r'^RT[\s]+', '', tweet)
        tweet = re.sub(r'https?:\/\/.*[\r\n]*', '', tweet)
        tweet = re.sub(r'#', '', tweet)
        tweet = re.sub('\S+@\S+', '', tweet)
        tweet = re.sub(r'\d+', '', tweet)
        return tweet    
    def __tokenize_tweet__(self, tweet):        
        return self.tokenizer.tokenize(tweet)   
    def __remove_stopwords__(self, tweet_tokens):
        tweets_clean = []
        for word in tweet_tokens:
            if (word not in self.stopwords_en and word not in self.punctuation_en):
                tweets_clean.append(word)
        return tweets_clean   
    def __text_stemming__(self,tweet_tokens):
        tweets_stem = [] 
        for word in tweet_tokens:
            stem_word = self.stemmer.stem(word)  
            tweets_stem.append(stem_word)
        return tweets_stem
    def preprocess(self, tweets):
        tweets_processed = []
        for _, tweet in tqdm(enumerate(tweets)):        
            tweet = self.__remove_unwanted_characters__(tweet)            
            tweet_tokens = self.__tokenize_tweet__(tweet)            
            tweet_clean = self.__remove_stopwords__(tweet_tokens)
            tweet_stems = self.__text_stemming__(tweet_clean)
            tweets_processed.extend([tweet_stems])
        return tweets_processed

def extract_features(processed_tweet, bow_word_frequency):
    features = np.zeros((1,3))
    features[0,0] = 1
    for word in processed_tweet:
        features[0,1] = bow_word_frequency.get((word, 1), 0) + features[0,1]
        features[0,2] = bow_word_frequency.get((word, 0), 0) + features[0,2]
    return features

text_processor = Preprocess()
processed_pos_tweets = text_processor.preprocess(pos_tweets)
processed_neg_tweets = text_processor.preprocess(neg_tweets)

def build_bow_dict(tweets, labels):
    freq = {}
    for tweet, label in list(zip(tweets, labels)):
        for word in tweet:
            freq[(word, label)] = freq.get((word, label), 0) + 1    
    return freq

labels = [1 for i in range(len(processed_pos_tweets))]
labels.extend([0 for i in range(len(processed_neg_tweets))])

twitter_processed_corpus = processed_pos_tweets + processed_neg_tweets
bow_word_frequency = build_bow_dict(twitter_processed_corpus, labels)

data = [message]
data = text_processor.preprocess(data)
            
data_o = str(data)
data_o = data_o[2:len(data_o)-2]

vect = np.zeros((1, 3))
for index, tweet in enumerate(data):
    vect[index, :] = extract_features(tweet, bow_word_frequency)

formData = {
    'instances': vect.tolist()
}

headers = {"Cookie": "authservice_session=MTY0NzM5ODY3NnxOd3dBTkZkR1RVVkpSRFJTU2tsWlZVNVFTRUZWV0RKT1NGUlVSRWt6VDFaQk5VSkZSa0ZJUzB4WVJsZEVUMEZaU2pOUFNsazFTMEU9fCnroQJLCLGBPY61jCnLrylQac2XHgF4OgUOWOC3aCjX",
           "Host": "sklearnserver.kubeflow-user-example-com.example.com"}

print('Your input sentence is: ' + message)
res = requests.post('http://istio-ingressgateway.istio-system/v1/models/model:predict', headers=headers, json=formData)
print(res.text)

[nltk_data] Downloading package twitter_samples to ./...
[nltk_data]   Package twitter_samples is already up-to-date!
[nltk_data] Downloading package stopwords to ./...
[nltk_data]   Package stopwords is already up-to-date!
489it [00:00, 4879.19it/s]

positive sentiment GOOD total samples 5000
negative sentiment  Bad total samples 5000


5000it [00:01, 4714.54it/s]
5000it [00:01, 4569.41it/s]
1it [00:00, 3949.44it/s]

Your input sentence is: good ending
{"predictions": [1]}



