In [1]:
!pip install nltk vaderSentiment

Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [2]:
from google.colab import drive
drive.mount('/content/drive')

import json
import glob
import os
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

Mounted at /content/drive


In [3]:
df = pd.read_csv("data/responses.csv")

In [4]:
def add_sentiment_scores(df, text_col="response"):
    analyzer = SentimentIntensityAnalyzer()
    df = df.copy()
    df["sentiment"] = df[text_col].apply(lambda x: analyzer.polarity_scores(str(x))["compound"])
    return df

def sentiment_by(df, group_col):
    return df.groupby(group_col)["sentiment"].mean().sort_index()


def sentiment_by_multiple(df, group_cols):
    return df.groupby(group_cols)["sentiment"].mean().reset_index()


def print_sentiment_stats(df, columns):
    for col in columns:
        print(f"\nSentiment by {col}")
        print(df.groupby(col)["sentiment"].mean().sort_index())


In [5]:
df = add_sentiment_scores(df)

In [6]:
print(sentiment_by(df, "age"))
print(sentiment_by(df, "gender"))
print(sentiment_by(df, "religion"))
print(sentiment_by(df, "politics"))
print(sentiment_by(df, "model"))
print(sentiment_by(df, "temperature"))
print(sentiment_by(df, "prompt"))

age
15    0.459296
25    0.441345
50    0.451615
Name: sentiment, dtype: float64
gender
female        0.464025
male          0.431314
non-binary    0.456600
Name: sentiment, dtype: float64
religion
atheist      0.422367
religious    0.479109
Name: sentiment, dtype: float64
politics
democrat      0.451618
republican    0.449894
Name: sentiment, dtype: float64
model
gemini    0.380310
grok      0.300559
qwen      0.688818
Name: sentiment, dtype: float64
temperature
0.95    0.459495
1.50    0.440551
Name: sentiment, dtype: float64
prompt
AI         0.515642
animal     0.091635
art        0.662871
future     0.898061
job        0.754308
problem    0.445300
rules     -0.289863
story      0.424035
Name: sentiment, dtype: float64


In [16]:
import pandas as pd
import numpy as np

def create_characteristic_table(df, characteristic):
    models = sorted(df['model'].unique())
    categories = sorted(df[characteristic].unique())

    results = []

    for model in models:
        model_df = df[df['model'] == model]
        row = {'Model': model}

        for category in categories:
            sentiment = model_df[model_df[characteristic] == category]['sentiment'].mean()
            row[str(category)] = sentiment

        row['Average'] = model_df['sentiment'].mean()

        results.append(row)

    results_df = pd.DataFrame(results)

    numeric_cols = results_df.select_dtypes(include=[np.number]).columns
    results_df[numeric_cols] = results_df[numeric_cols].round(4)

    return results_df


def export_all_characteristic_tables_html(df, output_file='sentiment_tables.html'):
    characteristics = ['age', 'gender', 'religion', 'politics']

    # Define color schemes for each characteristic
    colors = {
        'age': '#3498db',       # Blue
        'gender': '#e74c3c',    # Red
        'religion': '#9b59b6',  # Purple
        'politics': '#f39c12'   # Orange
    }

    html_content = """
    <html>
    <head>
        <style>
            body {
                font-family: Arial, sans-serif;
                margin: 20px;
            }
            table {
                border-collapse: collapse;
                margin-bottom: 30px;
                width: auto;
            }
            th, td {
                border: 1px solid #ddd;
                padding: 8px 12px;
                text-align: center;
            }
            tr:nth-child(even) {
                background-color: #f9f9f9;
            }
            tr:hover:not(.title-row) {
                background-color: #f0f0f0;
            }
            .model-col {
                text-align: left;
                font-weight: bold;
            }

            /* Title row styling */
            .title-row {
                background-color: #2c3e50;
                color: white;
                font-size: 16px;
                font-weight: bold;
            }

            /* Model header - always gray */
            .model-header {
                background-color: #5d6d7e;
                color: white;
                font-weight: bold;
            }

            /* Age headers - blue */
            .age-header {
                background-color: #3498db;
                color: white;
                font-weight: bold;
            }

            /* Gender headers - red */
            .gender-header {
                background-color: #e74c3c;
                color: white;
                font-weight: bold;
            }

            /* Religion headers - purple */
            .religion-header {
                background-color: #9b59b6;
                color: white;
                font-weight: bold;
            }

            /* Politics headers - orange */
            .politics-header {
                background-color: #f39c12;
                color: white;
                font-weight: bold;
            }
        </style>
    </head>
    <body>
        <h1>Sentiment Analysis by Characteristic</h1>
    """

    for char in characteristics:
        char_df = create_characteristic_table(df, char)

        # Start table
        html_content += '<table>\n'

        # Add title row that spans all columns
        num_cols = len(char_df.columns)
        html_content += f'<thead>\n<tr class="title-row">\n<th colspan="{num_cols}">Sentiment by {char.capitalize()}</th>\n</tr>\n'

        # Add column headers
        html_content += '<tr>\n'
        for col in char_df.columns:
            if col == 'Model':
                html_content += f'<th class="model-header">{col}</th>\n'
            else:
                html_content += f'<th class="{char}-header">{col}</th>\n'
        html_content += '</tr>\n</thead>\n'

        # Add data rows
        html_content += '<tbody>\n'
        for _, row in char_df.iterrows():
            html_content += '<tr>\n'
            for col, val in row.items():
                if col == 'Model':
                    html_content += f'<td class="model-col">{val}</td>\n'
                else:
                    html_content += f'<td>{val}</td>\n'
            html_content += '</tr>\n'

        html_content += '</tbody>\n</table>\n\n'

    html_content += """
    </body>
    </html>
    """

    # Save to file
    with open(output_file, 'w') as f:
        f.write(html_content)

    print(f"All tables saved to {output_file}")
    print("Open the file in a browser, then copy each table into Google Docs")


def export_individual_characteristic_tables_html(df):
    characteristics = ['age', 'gender', 'religion', 'politics']

    colors = {
        'age': '#3498db',       # Blue
        'gender': '#e74c3c',    # Red
        'religion': '#9b59b6',  # Purple
        'politics': '#f39c12'   # Orange
    }

    for char in characteristics:
        char_df = create_characteristic_table(df, char)

        html_content = f"""
        <html>
        <head>
            <style>
                table {{
                    border-collapse: collapse;
                    margin: 20px;
                }}
                th, td {{
                    border: 1px solid #ddd;
                    padding: 8px 12px;
                    text-align: center;
                }}
                tr:nth-child(even) {{
                    background-color: #f9f9f9;
                }}
                tr:hover:not(.title-row) {{
                    background-color: #f0f0f0;
                }}
                .model-col {{
                    text-align: left;
                    font-weight: bold;
                }}
                .title-row {{
                    background-color: #2c3e50;
                    color: white;
                    font-size: 16px;
                    font-weight: bold;
                }}
                .model-header {{
                    background-color: #5d6d7e;
                    color: white;
                    font-weight: bold;
                }}
                .data-header {{
                    background-color: {colors[char]};
                    color: white;
                    font-weight: bold;
                }}
            </style>
        </head>
        <body>
        """

        # Build table with title row
        num_cols = len(char_df.columns)
        html_content += '<table>\n<thead>\n'
        html_content += f'<tr class="title-row">\n<th colspan="{num_cols}">Sentiment by {char.capitalize()}</th>\n</tr>\n'

        # Column headers
        html_content += '<tr>\n'
        for col in char_df.columns:
            if col == 'Model':
                html_content += f'<th class="model-header">{col}</th>\n'
            else:
                html_content += f'<th class="data-header">{col}</th>\n'
        html_content += '</tr>\n</thead>\n'

        # Data rows
        html_content += '<tbody>\n'
        for _, row in char_df.iterrows():
            html_content += '<tr>\n'
            for col, val in row.items():
                if col == 'Model':
                    html_content += f'<td class="model-col">{val}</td>\n'
                else:
                    html_content += f'<td>{val}</td>\n'
            html_content += '</tr>\n'

        html_content += '</tbody>\n</table>\n</body>\n</html>'

        filename = f'sentiment_by_{char}.html'
        with open(filename, 'w') as f:
            f.write(html_content)

        print(f"Table saved to {filename}")


export_all_characteristic_tables_html(df, 'sentiment_all_characteristics.html')

All tables saved to sentiment_all_characteristics.html
Open the file in a browser, then copy each table into Google Docs
