Skip to content

add python script to udf guides #820

@BohuTANG

Description

@BohuTANG

Requirements

Must use Python's standard library; third-party imports are not allowed.

Demo

-- Create the sentiment analysis function
CREATE OR REPLACE FUNCTION sentiment_analysis(STRING) RETURNS STRING
LANGUAGE python HANDLER = 'sentiment_analysis'
AS $$
def remove_stop_words(text, stop_words):
    """
    Removes common stop words from the text.
    
    Args:
    text (str): The input text.
    stop_words (set): A set of stop words to remove.
    
    Returns:
    str: Text with stop words removed.
    """
    return ' '.join([word for word in text.split() if word.lower() not in stop_words])

def calculate_sentiment(text, positive_words, negative_words):
    """
    Calculates the sentiment score of the text.
    
    Args:
    text (str): The input text.
    positive_words (set): A set of positive words.
    negative_words (set): A set of negative words.
    
    Returns:
    int: Sentiment score.
    """
    words = text.split()
    score = sum(1 for word in words if word in positive_words) - sum(1 for word in words if word in negative_words)
    return score

def get_sentiment_label(score):
    """
    Determines the sentiment label based on the sentiment score.
    
    Args:
    score (int): The sentiment score.
    
    Returns:
    str: Sentiment label ('Positive', 'Negative', 'Neutral').
    """
    if score > 0:
        return 'Positive'
    elif score < 0:
        return 'Negative'
    else:
        return 'Neutral'

def sentiment_analysis(text):
    """
    Analyzes the sentiment of the input text.
    
    Args:
    text (str): The input text.
    
    Returns:
    str: Sentiment analysis result including the score and label.
    """
    stop_words = set(["a", "an", "the", "and", "or", "but", "if", "then", "so"])
    positive_words = set(["good", "happy", "joy", "excellent", "positive", "love"])
    negative_words = set(["bad", "sad", "pain", "terrible", "negative", "hate"])

    clean_text = remove_stop_words(text, stop_words)
    sentiment_score = calculate_sentiment(clean_text, positive_words, negative_words)
    sentiment_label = get_sentiment_label(sentiment_score)
    
    return f'Sentiment Score: {sentiment_score}; Sentiment Label: {sentiment_label}'
$$;

CREATE OR REPLACE TABLE texts (
    original_text STRING
);

-- Insert sample data
INSERT INTO texts (original_text)
VALUES 
('The quick brown fox feels happy and joyful'),
('A hard journey, but it was painful and sad'),
('Uncertain outcomes leave everyone unsure and hesitant'),
('The movie was excellent and everyone loved it'),
('A terrible experience that made me feel bad');


SELECT
    original_text,
    sentiment_analysis(original_text) AS processed_text
FROM
    texts;

Result:

┌────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│                     original_text                     │                 processed_text                 │
├───────────────────────────────────────────────────────┼────────────────────────────────────────────────┤
│ The quick brown fox feels happy and joyful            │ Sentiment Score: 1; Sentiment Label: Positive  │
│ A hard journey, but it was painful and sad            │ Sentiment Score: -1; Sentiment Label: Negative │
│ Uncertain outcomes leave everyone unsure and hesitant │ Sentiment Score: 0; Sentiment Label: Neutral   │
│ The movie was excellent and everyone loved it         │ Sentiment Score: 1; Sentiment Label: Positive  │
│ A terrible experience that made me feel bad           │ Sentiment Score: -2; Sentiment Label: Negative │
└────────────────────────────────────────────────────────────────────────────────────────────────────────┘

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions