In [None]:
!pip install transformers

In [None]:
from google.colab import files

uploaded = files.upload()

In [None]:
import pandas as pd
from transformers import pipeline

# Load CSV file into a DataFrame with the correct delimiter
df = pd.read_csv('example_videos_channel_2023.csv', delimiter=';')

# Replace NaN values in 'videoDescription' with an empty string
df['videoDescription'] = df['videoDescription'].fillna('')

# Filter out rows where 'videoDescription' is empty
df = df[df['videoDescription'] != '']

# Create the sentiment classification pipeline
classifier = pipeline("text-classification", "clampert/multilingual-sentiment-covid19")

# Define a function to apply sentiment analysis and return the result
def analyze_sentiment(text):
    max_seq_length = 512
    truncated_text = text[:max_seq_length]

    result = classifier(truncated_text)
    label = result[0]['label']
    score = result[0]['score']
    probabilities = {label: score}

    # Extract probabilities for each label
    if 'scores' in result[0]:
        for res in result[0]['scores']:
            probabilities[res['label']] = res['score']

    return label, score, probabilities

# Apply the sentiment analysis to each text in the 'videoDescription' column
df['sentiment_output'], df['sentiment_score'], df['sentiment_probabilities'] = zip(*df['videoDescription'].map(analyze_sentiment))

# Display the DataFrame with the sentiment analysis results
df[['videoDescription', 'sentiment_output', 'sentiment_score', 'sentiment_probabilities']].to_csv('example_sentiment_analysis_results.csv', index=False)

# Calculate overall probabilities
overall_probabilities = {'negative': 0, 'positive': 0}
total_rows = len(df)

for _, row in df.iterrows():
    probabilities = row['sentiment_probabilities']
    for label, probability in probabilities.items():
        overall_probabilities[label] += probability

# Normalize the probabilities to sum up to 100%
total_probability = sum(overall_probabilities.values())
for label in overall_probabilities:
    overall_probabilities[label] /= total_probability
    overall_probabilities[label] *= 100

# Display overall probabilities
print("Overall Probabilities:")
for label, probability in overall_probabilities.items():
    print(f"{label}: {probability:.2f}%")

In [None]:
from google.colab import files
files.download('example_sentiment_analysis_results.csv')