In [None]:
!pip3 install virtualenv

In [None]:
!virtualenv perspectiveAPI

In [None]:
!source perspectiveAPI/bin/activate

In [None]:
!perspectiveAPI/bin/pip install google-api-python-client

In [None]:
!curl https://sdk.cloud.google.com | bash

In [None]:
!/root/google-cloud-sdk/bin/gcloud init

In [None]:
!gcloud services enable commentanalyzer.googleapis.com

In [None]:
from google.colab import files

uploaded = files.upload()

In [None]:
import pandas as pd
import json
from googleapiclient import discovery

# Define your API_KEY
API_KEY = "API_KEY"

# Initialize the client
client = discovery.build(
    "commentanalyzer",
    "v1alpha1",
    developerKey=API_KEY,
    discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",
    static_discovery=False,
)

# Function to analyze text and return results
def analyze_text(text):
    try:
        analyze_request = {
            'comment': {'text': text},
            'requestedAttributes': {
                'TOXICITY': {},
                'IDENTITY_ATTACK': {},
                'INSULT': {},
                'PROFANITY': {},
                'THREAT': {}
            }
        }
        response = client.comments().analyze(body=analyze_request).execute()
        return response
    except Exception as e:
        error_details = e.error_details[0]
        if 'LANGUAGE_NOT_SUPPORTED_BY_ATTRIBUTE' in error_details:
            print(f"Skipping analysis for text due to unsupported language: {text}")
        elif 'LANGUAGE_NOT_SUPPORTED' in error_details:
            print(f"Skipping analysis for text due to undefined language: {text}")
        else:
            print(f"Skipping analysis due to error: {error_details}")
        # Return default values of 0 for each category
        return {
            'attributeScores': {
                'TOXICITY': {'summaryScore': {'value': 0}},
                'IDENTITY_ATTACK': {'summaryScore': {'value': 0}},
                'INSULT': {'summaryScore': {'value': 0}},
                'PROFANITY': {'summaryScore': {'value': 0}},
                'THREAT': {'summaryScore': {'value': 0}}
            }
        }

# Load CSV file into a DataFrame with the correct delimiter
df = pd.read_csv('example_videos_channel_2023.csv', delimiter=';')

# Replace NaN values in 'videoDescription' with an empty string
df['videoDescription'] = df['videoDescription'].fillna('')

# Define empty list to store results
results = []

# Analyze each text and store the results
for index, row in df.iterrows():
    text = row['videoDescription']
    print(f"Analyzing text at row {index+1}...")
    response = analyze_text(text)
    result = {}
    for attribute, scores in response['attributeScores'].items():
        score_value = scores['summaryScore']['value']
        result[attribute] = score_value
        # Extract spanScores values if present
        if 'spanScores' in scores:
            for score in scores['spanScores']:
                score_type = score['score']['type']
                score_value = score['score']['value']
                result[f'{attribute}_{score_type}'] = score_value
    results.append(result)

# Create a DataFrame to store the results
results_df = pd.DataFrame(results)

# Combine the 'videoDescription' column with the DataFrame
results_df['videoDescription'] = df['videoDescription']

# Reorder columns
columns_order = ['videoDescription'] + sorted(results_df.columns[:-1])  # Exclude the 'videoDescription' column
results_df = results_df[columns_order]

# Save the DataFrame to a CSV file
results_df.to_csv('example_perspective_video_description.csv', index=False)

Analyzing text at row 1...
Analyzing text at row 2...
Analyzing text at row 3...


In [None]:
from google.colab import files
files.download('example_perspective_video_description.csv')