In [None]:
import os
import pandas as pd
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from google.cloud import language_v1
from tqdm import tqdm

# Set Google NLP API key
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = r"C:\Users\User\Desktop\Pratt\2025spring\ADV project\final project\nlp-project-459218-a7af92faf71f.json"



In [None]:
# Initialize sentiment analysis tools
vader = SentimentIntensityAnalyzer()
google_client = language_v1.LanguageServiceClient()

# Track total number of API calls
total_api_calls = 0

# Google NLP sentiment analysis function (with API call counter)
def get_google_sentiment(text):
    global total_api_calls
    document = language_v1.Document(content=text, type_=language_v1.Document.Type.PLAIN_TEXT)
    try:
        response = google_client.analyze_sentiment(request={"document": document})
        sentiment = response.document_sentiment
        total_api_calls += 1
        return sentiment.score
    except:
        return 0.0

In [None]:
# Folder path and session numbers
base_path = r"C:\Users\User\Desktop\Pratt\2025spring\ADV project\final project data"
sessions = [1, 2, 3, 4, 5, 6]

In [None]:
# Process each session
for session_num in sessions:
    print(f"\n📄 Processing Session {session_num} ...")
    
    input_file = os.path.join(base_path, f"In_Treatment_Session {session_num}.xlsx")
    output_file = os.path.join(base_path, f"session_{session_num}_with_sentiment.csv")

    # Read file and clean format
    df = pd.read_excel(input_file)
    df['Speaker'] = df['Speaker'].astype(str).str.strip()
    df['Start Time'] = df['Start Time'].apply(lambda x: str(x).split(' --> ')[0])
    df['Start Time'] = pd.to_datetime(df['Start Time'], format='%H:%M:%S,%f')

    # Apply three sentiment analysis models
    df['TextBlob_Score'] = df['Dialogue'].apply(lambda x: TextBlob(str(x)).sentiment.polarity)
    df['VADER_Score'] = df['Dialogue'].apply(lambda x: vader.polarity_scores(str(x))['compound'])
    df['Google_Score'] = [get_google_sentiment(str(x)) for x in tqdm(df['Dialogue'], desc=f"Session {session_num} Google NLP")]

    # Save results
    df.to_csv(output_file, index=False)
    print(f"✅ Session {session_num} saved → {output_file}")

# 🔢 Display total API usage count
print(f"\n📊 Total number of Google NLP API calls used: {total_api_calls}")


📄 處理 Session 1 ...


Session 1 Google NLP: 100%|██████████| 388/388 [00:39<00:00,  9.76it/s]


✅ Session 1 儲存完成 → C:\Users\User\Desktop\Pratt\2025spring\ADV project\final project data\session_1_with_sentiment.csv

📄 處理 Session 2 ...


Session 2 Google NLP: 100%|██████████| 383/383 [00:40<00:00,  9.57it/s]


✅ Session 2 儲存完成 → C:\Users\User\Desktop\Pratt\2025spring\ADV project\final project data\session_2_with_sentiment.csv

📄 處理 Session 3 ...


Session 3 Google NLP: 100%|██████████| 374/374 [00:36<00:00, 10.15it/s]


✅ Session 3 儲存完成 → C:\Users\User\Desktop\Pratt\2025spring\ADV project\final project data\session_3_with_sentiment.csv

📄 處理 Session 4 ...


Session 4 Google NLP: 100%|██████████| 382/382 [00:37<00:00, 10.20it/s]


✅ Session 4 儲存完成 → C:\Users\User\Desktop\Pratt\2025spring\ADV project\final project data\session_4_with_sentiment.csv

📄 處理 Session 5 ...


Session 5 Google NLP: 100%|██████████| 380/380 [00:37<00:00, 10.19it/s]


✅ Session 5 儲存完成 → C:\Users\User\Desktop\Pratt\2025spring\ADV project\final project data\session_5_with_sentiment.csv

📄 處理 Session 6 ...


Session 6 Google NLP: 100%|██████████| 301/301 [00:29<00:00, 10.15it/s]

✅ Session 6 儲存完成 → C:\Users\User\Desktop\Pratt\2025spring\ADV project\final project data\session_6_with_sentiment.csv

📊 本次總共使用 Google NLP API 分析：2198 次





In [None]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
import os

# Set default renderer to display in browser
pio.renderers.default = "browser"

# Session configuration
sessions = [1, 2, 3, 4, 5, 6]
base_path = r"C:\Users\User\Desktop\Pratt\2025spring\ADV project\final project data"

In [None]:
# Color settings for different speakers and models
colors = {
    "Paul_TextBlob": "#87CEFA",
    "Paul_VADER": "#9370DB",
    "Paul_Google": "#4682B4",
    "Laura_TextBlob": "#FF6F61",
    "Laura_VADER": "#FFA500",
    "Laura_Google": "#228B22"
}

# Create subplot layout
fig = make_subplots(
    rows=3, cols=2,
    subplot_titles=[f"Session {i}" for i in sessions],
    shared_xaxes=False,
    shared_yaxes=False,
    vertical_spacing=0.20,
    horizontal_spacing=0.02
)


In [None]:
# Process each session CSV
for idx, session_num in enumerate(sessions, start=1):
    file_path = os.path.join(base_path, f"session_{session_num}_with_sentiment.csv")
    df = pd.read_csv(file_path)

    df['Start Time'] = pd.to_datetime(df['Start Time'])
    row = (idx - 1) // 2 + 1
    col = (idx - 1) % 2 + 1
    show_legend = (idx == 1)

    for speaker in ['Paul', 'Laura']:
        for model in ['TextBlob', 'VADER', 'Google']:
            score_col = f"{model}_Score"
            sub_df = df[df['Speaker'] == speaker]

            fig.add_trace(
                go.Scatter(
                    x=sub_df['Start Time'],
                    y=sub_df[score_col],
                    mode='lines+markers',
                    name=f"{speaker} - {model}",
                    legendgroup=f"{speaker}-{model}",
                    showlegend=show_legend,
                    marker=dict(size=4, color=colors[f"{speaker}_{model}"]),
                    line=dict(width=2, color=colors[f"{speaker}_{model}"]),
                    text=[f"{speaker}: {text}" for text in sub_df['Dialogue']],
                    hoverinfo='text+x+y'
                ),
                row=row, col=col
            )


In [None]:
# Update layout settings
fig.update_layout(
    height=1200,
    width=1800,
    title=dict(
        text="In Treatment Sessions 1–6: Sentiment Comparison (TextBlob vs VADER vs Google NLP)",
        x=0.5,
        xanchor='center',
        font=dict(size=24)
    ),
    template="simple_white",
    hovermode="closest",
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="center",
        x=0.5
    ),
    margin=dict(b=150)  
)

# Add chart usage tips
fig.add_annotation(
    text=(
        "<b>Chart Tips:</b> "
        "Click legend items to show/hide lines · "
        "Drag to zoom in · Use top-right menu to reset view"
    ),
    xref="paper", yref="paper",
    x=1.0, y=1.15,
    showarrow=False,
    font=dict(size=13, color="gray"),
    align="right"
)


In [None]:
# Format x-axis to show time
for axis in fig.layout:
    if axis.startswith("xaxis"):
        fig.layout[axis].tickformat = "%H:%M:%S"

In [None]:
# Save and display chart
fig.write_html("textblob_vader_google_with_tips.html", full_html=True, include_plotlyjs="cdn")
fig.show()