### YouTube Shorts Sentiment Model

Libraries

In [1]:
import pandas as pd
from transformers import pipeline
from openai import ChatCompletion
import ssl
import certifi

None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


In [2]:
# !pip install transformers --trusted-host pypi.org --trusted-host files.pythonhosted.org

Functions

In [4]:
import pandas as pd
from transformers import pipeline
from openai import ChatCompletion
import ssl
import certifi
import requests
from transformers.pipelines import PipelineException

def fill_sentiment_column(df):
    """
    Fills the sentiment column of a DataFrame using a pre-trained NLP model.

    Parameters:
        df (pd.DataFrame): DataFrame with a 'comment' column and a 'sentiment' column to fill.

    Returns:
        pd.DataFrame: Updated DataFrame with sentiment values ('positive', 'negative', 'neutral').
    """
    # Configure SSL settings to avoid certificate issues
    ssl._create_default_https_context = ssl._create_unverified_context

    try:
        # Load pre-trained sentiment analysis model
        sentiment_model = pipeline(
            "sentiment-analysis", 
            model="distilbert-base-uncased-finetuned-sst-2-english", 
            cache_dir="./models"
        )

        # Apply sentiment analysis to each comment
        df['sentiment'] = df['comment'].apply(lambda x: sentiment_model(x)[0]['label'].lower())

    except PipelineException as pe:
        print("Pipeline Error occurred:", pe)
        raise RuntimeError("Failed to initialize the pipeline due to model errors.")
    except requests.exceptions.SSLError as e:
        print("SSL Error occurred:", e)
        raise RuntimeError("Failed to connect to the model server due to SSL error.")

    return df

def generate_sentiment_insights(df, openai_api_key):
    """
    Generates insights on positive and negative sentiments using ChatGPT.

    Parameters:
        df (pd.DataFrame): DataFrame with 'sentiment' column containing 'positive' or 'negative'.
        openai_api_key (str): API key for OpenAI's ChatGPT.

    Returns:
        dict: Insights for positive and negative sentiments.
    """
    ChatCompletion.api_key = openai_api_key

    # Group comments by sentiment
    positive_comments = df[df['sentiment'] == 'positive']['comment'].tolist()
    negative_comments = df[df['sentiment'] == 'negative']['comment'].tolist()

    # Generate prompts for ChatGPT
    prompts = {
        "positive": f"The following are positive comments:\n{positive_comments}\n\nWhy do people respond positively to this content?",
        "negative": f"The following are negative comments:\n{negative_comments}\n\nWhy do people respond negatively to this content?",
    }

    insights = {}
    for sentiment, prompt in prompts.items():
        try:
            response = ChatCompletion.create(
                model="gpt-4",
                messages=[{"role": "user", "content": prompt}]
            )
            insights[sentiment] = response['choices'][0]['message']['content']
        except Exception as e:
            insights[sentiment] = f"Error generating insight: {str(e)}"

    return insights

# Wrapper function
def analyze_sentiments(df, openai_api_key):
    """
    Performs sentiment analysis and generates insights on the reasons for positive and negative sentiments.

    Parameters:
        df (pd.DataFrame): Input DataFrame with 'shorts_url', 'comment', 'written_date', 'sentiment' columns.
        openai_api_key (str): API key for OpenAI's ChatGPT.

    Returns:
        dict: Insights for positive and negative sentiments.
    """
    try:
        # Step 1: Fill the sentiment column
        df = fill_sentiment_column(df)

        # Step 2: Generate sentiment insights
        insights = generate_sentiment_insights(df, openai_api_key)

    except Exception as e:
        print("Error during sentiment analysis:", e)
        return {"error": str(e)}

    return insights

In [5]:
# Example usage
df = pd.DataFrame({
    'shorts_url': ['url1', 'url2', 'url3'],
    'comment': ['Great video!', 'I didn\'t like it.', 'It was okay.'],
    'written_date': ['2024-12-11', '2024-12-11', '2024-12-11'],
    'sentiment': [None, None, None]
})
openai_api_key = "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"
insights = analyze_sentiments(df, openai_api_key)
print(insights)


SSL Error occurred: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /distilbert-base-uncased-finetuned-sst-2-english/resolve/main/config.json (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate in certificate chain (_ssl.c:1006)')))"), '(Request ID: e84ccdf3-5574-476e-ba49-9489c34e98d6)')
Error during sentiment analysis: Failed to connect to the model server due to SSL error.
{'error': 'Failed to connect to the model server due to SSL error.'}
