# CSP AI Survey Insights Analysis (Sept 2025)

This notebook analyzes survey responses to identify key insights, trends, and actionable recommendations for the team.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from datetime import datetime
from collections import Counter

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 8)

# Create outputs directory
Path('../outputs/survey-insights').mkdir(parents=True, exist_ok=True)

# Generate date suffix
date_suffix = datetime.now().strftime('%Y_%m_%d')
print(f"Date suffix: {date_suffix}")

## 1. Load and Parse Survey Data

In [None]:
# Load survey responses
survey_df = pd.read_csv('../data/CSP AI Use and Confidence (Sept 2025) (Responses)_2025_10_03.csv')

print(f"Total survey responses: {len(survey_df)}")
print(f"Total questions: {len(survey_df.columns) - 2}")  # Excluding Timestamp and Email

# Define column shortcuts using column indices to avoid quote issues
col_frequency = survey_df.columns[2]
col_contexts = survey_df.columns[3]
col_barriers = survey_df.columns[4]
col_comfort = survey_df.columns[5]
col_understanding = survey_df.columns[6]
col_risks = survey_df.columns[7]
col_growth = survey_df.columns[8]
col_optional = survey_df.columns[9]

print(f"\nColumn names loaded successfully")
survey_df.head()

## 2. Usage Frequency Analysis

In [None]:
# Analyze usage frequency
frequency_counts = survey_df[col_frequency].value_counts()
total_responses = len(survey_df)

print("=" * 80)
print("USAGE FREQUENCY ANALYSIS")
print("=" * 80)
for freq, count in frequency_counts.items():
    print(f"{freq:30s}: {count:3d} ({count/total_responses*100:5.1f}%)")

# Calculate heavy users
heavy_users = frequency_counts.get('Daily', 0) + frequency_counts.get('Multiple times per week', 0)
print(f"\n{'Heavy users (Daily + Multiple/week)':30s}: {heavy_users:3d} ({heavy_users/total_responses*100:5.1f}%)")
print("=" * 80)

# Visualize
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# Pie chart
colors = ['#2ecc71', '#27ae60', '#f39c12', '#e74c3c']
ax1.pie(frequency_counts.values, labels=frequency_counts.index, autopct='%1.1f%%',
        colors=colors, startangle=90, textprops={'fontsize': 10, 'fontweight': 'bold'})
ax1.set_title('AI Tool Usage Frequency', fontsize=14, fontweight='bold')

# Bar chart
ax2.barh(frequency_counts.index, frequency_counts.values, color=colors, alpha=0.8, edgecolor='black')
ax2.set_xlabel('Number of Respondents', fontsize=12, fontweight='bold')
ax2.set_title('AI Tool Usage Frequency Distribution', fontsize=14, fontweight='bold')
for i, v in enumerate(frequency_counts.values):
    ax2.text(v + 1, i, f'{v} ({v/total_responses*100:.1f}%)', va='center', fontsize=10, fontweight='bold')

plt.tight_layout()
output_file = f'../outputs/survey-insights/usage_frequency_{date_suffix}.png'
plt.savefig(output_file, dpi=300, bbox_inches='tight')
plt.show()
print(f"✓ Saved: {output_file}")

## 3. Comfort Level & Confidence Analysis

In [None]:
# Analyze comfort levels
comfort_counts = survey_df[col_comfort].value_counts()
comfort_order = ['Beginner', 'Intermediate', 'Advanced', 'Expert']
comfort_counts = comfort_counts.reindex(comfort_order)

print("=" * 80)
print("COMFORT LEVEL DISTRIBUTION")
print("=" * 80)
for level, count in comfort_counts.items():
    print(f"{level:15s}: {count:3d} ({count/total_responses*100:5.1f}%)")
print("=" * 80)

# Calculate confidence scores
avg_understanding = survey_df[col_understanding].mean()
avg_risks = survey_df[col_risks].mean()

print("\n" + "=" * 80)
print("CONFIDENCE SCORES (1-5 scale)")
print("=" * 80)
print(f"Understanding AI strengths/limitations: {avg_understanding:.2f}/5")
print(f"Understanding AI risks (security, PII):  {avg_risks:.2f}/5")
print("=" * 80)

# Visualize
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Comfort level bar chart
colors_comfort = ['#e74c3c', '#f39c12', '#2ecc71', '#27ae60']
axes[0, 0].bar(comfort_counts.index, comfort_counts.values, color=colors_comfort, alpha=0.8, edgecolor='black')
axes[0, 0].set_ylabel('Number of Respondents', fontsize=12, fontweight='bold')
axes[0, 0].set_title('Comfort Level with AI Tools', fontsize=14, fontweight='bold')
for i, v in enumerate(comfort_counts.values):
    axes[0, 0].text(i, v + 1, f'{v}\n({v/total_responses*100:.1f}%)', ha='center', fontsize=10, fontweight='bold')

# Understanding strengths/limitations
understanding_counts = survey_df[col_understanding].value_counts().sort_index()
axes[0, 1].bar(understanding_counts.index, understanding_counts.values, color='#3498db', alpha=0.8, edgecolor='black')
axes[0, 1].set_xlabel('Confidence Score (1-5)', fontsize=12, fontweight='bold')
axes[0, 1].set_ylabel('Number of Respondents', fontsize=12, fontweight='bold')
axes[0, 1].set_title(f'Understanding AI Strengths/Limitations\n(Average: {avg_understanding:.2f}/5)', 
                     fontsize=14, fontweight='bold')
for i, (score, count) in enumerate(understanding_counts.items()):
    axes[0, 1].text(score, count + 1, f'{count}', ha='center', fontsize=10, fontweight='bold')

# Understanding risks
risks_counts = survey_df[col_risks].value_counts().sort_index()
axes[1, 0].bar(risks_counts.index, risks_counts.values, color='#e67e22', alpha=0.8, edgecolor='black')
axes[1, 0].set_xlabel('Confidence Score (1-5)', fontsize=12, fontweight='bold')
axes[1, 0].set_ylabel('Number of Respondents', fontsize=12, fontweight='bold')
axes[1, 0].set_title(f'Understanding AI Risks (Security, PII)\n(Average: {avg_risks:.2f}/5)', 
                     fontsize=14, fontweight='bold')
for i, (score, count) in enumerate(risks_counts.items()):
    axes[1, 0].text(score, count + 1, f'{count}', ha='center', fontsize=10, fontweight='bold')

# Comparison of confidence scores
categories = ['Understanding\nStrengths/Limitations', 'Understanding\nRisks']
scores = [avg_understanding, avg_risks]
axes[1, 1].bar(categories, scores, color=['#3498db', '#e67e22'], alpha=0.8, edgecolor='black')
axes[1, 1].set_ylabel('Average Score (out of 5)', fontsize=12, fontweight='bold')
axes[1, 1].set_ylim(0, 5)
axes[1, 1].set_title('Average Confidence Scores', fontsize=14, fontweight='bold')
axes[1, 1].axhline(y=3, color='red', linestyle='--', alpha=0.5, label='Baseline (3/5)')
for i, v in enumerate(scores):
    axes[1, 1].text(i, v + 0.1, f'{v:.2f}', ha='center', fontsize=12, fontweight='bold')
axes[1, 1].legend()

plt.tight_layout()
output_file = f'../outputs/survey-insights/confidence_levels_{date_suffix}.png'
plt.savefig(output_file, dpi=300, bbox_inches='tight')
plt.show()
print(f"✓ Saved: {output_file}")

## 4. AI Use Contexts Analysis

In [None]:
# Parse multi-select contexts
contexts_list = []
for response in survey_df[col_contexts]:
    if pd.notna(response):
        contexts_list.extend([item.strip() for item in str(response).split(',')])

# Focus on main categories (filter out partial splits)
main_contexts = {
    'Coding / debugging': 0,
    'Writing tests': 0,
    'Documentation / summarization': 0,
    'Code review / validation': 0,
    'Experimenting / prototyping': 0,
    'Data processing': 0,
    'Multi-step workflows': 0
}

for context in contexts_list:
    for key in main_contexts.keys():
        if key in context:
            main_contexts[key] += 1
            break

# Also count "Generative" separately
generative_count = sum(1 for c in contexts_list if 'Generative' in c or 'images' in c)
main_contexts['Generative (images, docs, ideas)'] = generative_count

contexts_df = pd.DataFrame(list(main_contexts.items()), columns=['Context', 'Count']).sort_values('Count', ascending=False)

print("=" * 80)
print("AI USE CONTEXTS (Top Categories)")
print("=" * 80)
for _, row in contexts_df.iterrows():
    pct = row['Count'] / total_responses * 100
    print(f"{row['Context']:40s}: {row['Count']:3d} ({pct:5.1f}%)")
print("=" * 80)

# Visualize
fig, ax = plt.subplots(figsize=(14, 8))
colors_contexts = plt.cm.viridis(np.linspace(0.3, 0.9, len(contexts_df)))
bars = ax.barh(contexts_df['Context'], contexts_df['Count'], color=colors_contexts, alpha=0.8, edgecolor='black')
ax.set_xlabel('Number of Respondents', fontsize=12, fontweight='bold')
ax.set_title('AI Use Contexts (Multi-select)', fontsize=14, fontweight='bold')
ax.set_xlim(0, max(contexts_df['Count']) * 1.15)

for i, (idx, row) in enumerate(contexts_df.iterrows()):
    pct = row['Count'] / total_responses * 100
    ax.text(row['Count'] + 2, i, f"{row['Count']} ({pct:.1f}%)", va='center', fontsize=10, fontweight='bold')

plt.tight_layout()
output_file = f'../outputs/survey-insights/ai_use_contexts_{date_suffix}.png'
plt.savefig(output_file, dpi=300, bbox_inches='tight')
plt.show()
print(f"✓ Saved: {output_file}")

## 5. Barriers to AI Adoption

In [None]:
# Parse barriers
barriers_list = []
for response in survey_df[col_barriers]:
    if pd.notna(response):
        barriers_list.extend([item.strip() for item in str(response).split(',')])

# Focus on main barriers
main_barriers = {}
barrier_keywords = [
    "I'm actually good",
    "Lack of trust in outputs",
    "Lack of knowledge",
    "Lack of time",
    "Lack of access to the right tools",
    "No relevant use cases"
]

for keyword in barrier_keywords:
    main_barriers[keyword] = sum(1 for b in barriers_list if keyword in b)

# Check for China/geographic issues
china_issues = sum(1 for b in barriers_list if 'China' in b or 'Claude' in b or 'Anthropic' in b)
if china_issues > 0:
    main_barriers['Geographic/Tool access (China)'] = china_issues

barriers_df = pd.DataFrame(list(main_barriers.items()), columns=['Barrier', 'Count']).sort_values('Count', ascending=False)

print("=" * 80)
print("BARRIERS TO AI ADOPTION")
print("=" * 80)
for _, row in barriers_df.iterrows():
    pct = row['Count'] / total_responses * 100
    print(f"{row['Barrier']:40s}: {row['Count']:3d} ({pct:5.1f}%)")
print("=" * 80)

# Visualize
fig, ax = plt.subplots(figsize=(14, 8))
colors_barriers = ['#2ecc71' if 'actually good' in b else '#e74c3c' if 'trust' in b.lower() else '#f39c12' 
                   for b in barriers_df['Barrier']]
bars = ax.barh(barriers_df['Barrier'], barriers_df['Count'], color=colors_barriers, alpha=0.8, edgecolor='black')
ax.set_xlabel('Number of Respondents', fontsize=12, fontweight='bold')
ax.set_title('Barriers to AI Adoption (Multi-select)', fontsize=14, fontweight='bold')
ax.set_xlim(0, max(barriers_df['Count']) * 1.15)

for i, (idx, row) in enumerate(barriers_df.iterrows()):
    pct = row['Count'] / total_responses * 100
    ax.text(row['Count'] + 1, i, f"{row['Count']} ({pct:.1f}%)", va='center', fontsize=10, fontweight='bold')

plt.tight_layout()
output_file = f'../outputs/survey-insights/barriers_{date_suffix}.png'
plt.savefig(output_file, dpi=300, bbox_inches='tight')
plt.show()
print(f"✓ Saved: {output_file}")

## 6. Desired Growth Areas

In [None]:
# Parse growth areas
growth_list = []
for response in survey_df[col_growth]:
    if pd.notna(response):
        growth_list.extend([item.strip() for item in str(response).split(',')])

# Main growth areas (simplified names)
growth_mapping = {
    'Coding & debugging': 'Using AI for coding & debugging',
    'AI-assisted testing': 'AI-assisted testing',
    'Documentation & summarization': 'Documentation & summarization',
    'Code review & validation': 'Code review & validation',
    'Workflow automation': 'Workflow automation',
    'Creative engineering tasks': 'AI for creative engineering tasks',
    'Data analysis': 'AI-powered data analysis',
    'Understanding AI models': 'Understanding AI model capabilities',
    'Security & risk awareness': 'Security & risk awareness',
    'Building AI features': 'Building AI-powered features',
    'Mentoring others': 'Mentoring & teaching others'
}

growth_counts = {}
for short_name, keyword in growth_mapping.items():
    growth_counts[short_name] = sum(1 for g in growth_list if keyword in g)

growth_df = pd.DataFrame(list(growth_counts.items()), columns=['Growth Area', 'Count']).sort_values('Count', ascending=False)

print("=" * 80)
print("DESIRED GROWTH AREAS (Top Priorities)")
print("=" * 80)
for _, row in growth_df.iterrows():
    pct = row['Count'] / total_responses * 100
    print(f"{row['Growth Area']:40s}: {row['Count']:3d} ({pct:5.1f}%)")
print("=" * 80)

# Visualize
fig, ax = plt.subplots(figsize=(14, 10))
colors_growth = plt.cm.plasma(np.linspace(0.2, 0.9, len(growth_df)))
bars = ax.barh(growth_df['Growth Area'], growth_df['Count'], color=colors_growth, alpha=0.8, edgecolor='black')
ax.set_xlabel('Number of Respondents', fontsize=12, fontweight='bold')
ax.set_title('Desired AI Skill Growth Areas (Next 3 Months)', fontsize=14, fontweight='bold')
ax.set_xlim(0, max(growth_df['Count']) * 1.15)

for i, (idx, row) in enumerate(growth_df.iterrows()):
    pct = row['Count'] / total_responses * 100
    ax.text(row['Count'] + 2, i, f"{row['Count']} ({pct:.1f}%)", va='center', fontsize=10, fontweight='bold')

plt.tight_layout()
output_file = f'../outputs/survey-insights/growth_areas_{date_suffix}.png'
plt.savefig(output_file, dpi=300, bbox_inches='tight')
plt.show()
print(f"✓ Saved: {output_file}")

## 7. Notable Quotes & Use Cases

In [None]:
# Extract interesting responses
optional_responses = survey_df[col_optional].dropna()

print("=" * 80)
print(f"NOTABLE USE CASES ({len(optional_responses)} responses)")
print("=" * 80)
for i, response in enumerate(optional_responses.head(10), 1):
    print(f"\n{i}. {response}")
print("\n" + "=" * 80)

## 8. Key Insights Summary

In [None]:
# Calculate key metrics
total_respondents = len(survey_df)
daily_users_pct = (frequency_counts.get('Daily', 0) / total_respondents * 100)
heavy_users_pct = (heavy_users / total_respondents * 100)
intermediate_pct = (comfort_counts.get('Intermediate', 0) / total_respondents * 100)
advanced_plus_pct = ((comfort_counts.get('Advanced', 0) + comfort_counts.get('Expert', 0)) / total_respondents * 100)
no_barriers_pct = (main_barriers.get("I'm actually good", 0) / total_respondents * 100)
trust_issues_pct = (main_barriers.get('Lack of trust in outputs', 0) / total_respondents * 100)

print("=" * 80)
print("KEY INSIGHTS SUMMARY")
print("=" * 80)
print(f"\n📊 ADOPTION & ENGAGEMENT")
print(f"  • Total survey responses: {total_respondents}")
print(f"  • Daily users: {daily_users_pct:.1f}%")
print(f"  • Heavy users (daily + multiple/week): {heavy_users_pct:.1f}%")
print(f"  • Top use cases: Documentation ({contexts_df.iloc[0]['Count']}), Coding ({contexts_df.iloc[1]['Count']}), Testing ({contexts_df.iloc[2]['Count']})")

print(f"\n🎯 SKILL LEVELS & CONFIDENCE")
print(f"  • Intermediate level: {intermediate_pct:.1f}% (largest group)")
print(f"  • Advanced/Expert: {advanced_plus_pct:.1f}%")
print(f"  • Avg understanding of AI limitations: {avg_understanding:.2f}/5")
print(f"  • Avg understanding of AI risks: {avg_risks:.2f}/5")

print(f"\n🚧 BARRIERS & CHALLENGES")
print(f"  • No barriers (learning well): {no_barriers_pct:.1f}%")
print(f"  • Lack of trust in outputs: {trust_issues_pct:.1f}%")
print(f"  • Geographic/tool access issues: {main_barriers.get('Geographic/Tool access (China)', 0)} mentions")

print(f"\n🚀 TOP GROWTH PRIORITIES")
for i, (idx, row) in enumerate(growth_df.head(3).iterrows(), 1):
    print(f"  {i}. {row['Growth Area']} ({row['Count']} responses)")

print("\n" + "=" * 80)

## 9. Export Insights Report

In [None]:
# Export formatted text report
output_file_txt = f'../outputs/survey-insights/insights_summary_{date_suffix}.txt'

with open(output_file_txt, 'w') as f:
    f.write("=" * 80 + "\n")
    f.write("CSP AI USE AND CONFIDENCE SURVEY - INSIGHTS REPORT\n")
    f.write("=" * 80 + "\n")
    f.write(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
    f.write(f"Total Responses: {total_respondents}\n")
    f.write("=" * 80 + "\n\n")
    
    f.write("KEY INSIGHTS\n")
    f.write("-" * 80 + "\n\n")
    
    f.write("1. HIGH ADOPTION & ENGAGEMENT\n")
    f.write(f"   - {heavy_users_pct:.1f}% are heavy users (daily or multiple times per week)\n")
    f.write(f"   - {daily_users_pct:.1f}% use AI tools daily\n")
    f.write(f"   - Top use cases: Documentation, Coding/debugging, Writing tests\n\n")
    
    f.write("2. SKILL LEVELS & CONFIDENCE GAPS\n")
    f.write(f"   - {intermediate_pct:.1f}% at Intermediate level (largest group)\n")
    f.write(f"   - {advanced_plus_pct:.1f}% at Advanced/Expert level\n")
    f.write(f"   - Average understanding of AI limitations: {avg_understanding:.2f}/5\n")
    f.write(f"   - Average understanding of AI risks: {avg_risks:.2f}/5\n")
    f.write(f"   - Opportunity: Move Intermediate → Advanced through targeted training\n\n")
    
    f.write("3. BARRIERS TO ADOPTION\n")
    f.write(f"   - {no_barriers_pct:.1f}% report no barriers (learning well)\n")
    f.write(f"   - {trust_issues_pct:.1f}% lack trust in AI outputs\n")
    f.write(f"   - Geographic challenges: China-based engineers face Claude/Anthropic access issues\n\n")
    
    f.write("4. TOP GROWTH PRIORITIES (Next 3 months)\n")
    for i, (idx, row) in enumerate(growth_df.head(5).iterrows(), 1):
        pct = row['Count'] / total_respondents * 100
        f.write(f"   {i}. {row['Growth Area']}: {row['Count']} responses ({pct:.1f}%)\n")
    f.write("\n")
    
    f.write("=" * 80 + "\n")
    f.write("RECOMMENDATIONS\n")
    f.write("=" * 80 + "\n\n")
    
    f.write("1. TRAINING & EDUCATION\n")
    f.write("   - Create intermediate → advanced learning path\n")
    f.write("   - Focus on: prompt engineering, validation strategies, understanding limitations\n")
    f.write("   - Address trust issues through best practices and case studies\n\n")
    
    f.write("2. TOOL ACCESS & INFRASTRUCTURE\n")
    f.write("   - Resolve geographic access issues for China-based teams\n")
    f.write("   - Ensure equitable tool access across all locations\n")
    f.write("   - Streamline reimbursement processes\n\n")
    
    f.write("3. SKILL DEVELOPMENT PROGRAMS\n")
    f.write("   - Prioritize: Coding & debugging, Building AI features, Understanding models\n")
    f.write("   - Create hands-on workshops and hackathons\n")
    f.write("   - Establish mentorship programs (32 people interested)\n\n")
    
    f.write("4. SECURITY & RISK AWARENESS\n")
    f.write(f"   - Risk understanding score: {avg_risks:.2f}/5 - needs improvement\n")
    f.write("   - Mandatory training on PII/IP protection\n")
    f.write("   - Clear guidelines for responsible AI use\n\n")
    
    f.write("=" * 80 + "\n")

print(f"✓ Saved: {output_file_txt}")

# Export HTML report (Google Docs compatible)
output_file_html = f'../outputs/survey-insights/insights_summary_{date_suffix}.html'

with open(output_file_html, 'w') as f:
    f.write('<!DOCTYPE html>\n<html>\n<head>\n')
    f.write('<meta charset="UTF-8">\n')
    f.write('<title>CSP AI Survey Insights Report</title>\n')
    f.write('<style>\n')
    f.write('body { font-family: Arial, sans-serif; line-height: 1.6; max-width: 800px; margin: 40px auto; padding: 20px; }\n')
    f.write('h1 { color: #2c3e50; border-bottom: 3px solid #3498db; padding-bottom: 10px; }\n')
    f.write('h2 { color: #34495e; margin-top: 30px; border-bottom: 2px solid #95a5a6; padding-bottom: 5px; }\n')
    f.write('h3 { color: #7f8c8d; margin-top: 20px; }\n')
    f.write('.metric { background: #ecf0f1; padding: 15px; margin: 10px 0; border-radius: 5px; }\n')
    f.write('.highlight { background: #fff3cd; padding: 10px; border-left: 4px solid #ffc107; margin: 10px 0; }\n')
    f.write('ul { line-height: 1.8; }\n')
    f.write('</style>\n</head>\n<body>\n')
    
    f.write('<h1>CSP AI Use and Confidence Survey - Insights Report</h1>\n')
    f.write(f'<p><strong>Generated on:</strong> {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}</p>\n')
    f.write(f'<p><strong>Total Responses:</strong> {total_respondents}</p>\n')
    
    f.write('<h2>Key Insights</h2>\n')
    
    f.write('<h3>1. High Adoption & Engagement</h3>\n<ul>\n')
    f.write(f'<li><strong>{heavy_users_pct:.1f}%</strong> are heavy users (daily or multiple times per week)</li>\n')
    f.write(f'<li><strong>{daily_users_pct:.1f}%</strong> use AI tools daily</li>\n')
    f.write(f'<li>Top use cases: <strong>Documentation</strong>, <strong>Coding/debugging</strong>, <strong>Writing tests</strong></li>\n')
    f.write('</ul>\n')
    
    f.write('<h3>2. Skill Levels & Confidence Gaps</h3>\n<ul>\n')
    f.write(f'<li><strong>{intermediate_pct:.1f}%</strong> at Intermediate level (largest group)</li>\n')
    f.write(f'<li><strong>{advanced_plus_pct:.1f}%</strong> at Advanced/Expert level</li>\n')
    f.write(f'<li>Average understanding of AI limitations: <strong>{avg_understanding:.2f}/5</strong></li>\n')
    f.write(f'<li>Average understanding of AI risks: <strong>{avg_risks:.2f}/5</strong></li>\n')
    f.write('<li class="highlight">Opportunity: Move Intermediate → Advanced through targeted training</li>\n')
    f.write('</ul>\n')
    
    f.write('<h3>3. Barriers to Adoption</h3>\n<ul>\n')
    f.write(f'<li><strong>{no_barriers_pct:.1f}%</strong> report no barriers (learning well)</li>\n')
    f.write(f'<li><strong>{trust_issues_pct:.1f}%</strong> lack trust in AI outputs</li>\n')
    f.write(f'<li class="highlight">Geographic challenges: China-based engineers face Claude/Anthropic access issues</li>\n')
    f.write('</ul>\n')
    
    f.write('<h3>4. Top Growth Priorities (Next 3 months)</h3>\n<ol>\n')
    for i, (idx, row) in enumerate(growth_df.head(5).iterrows(), 1):
        pct = row['Count'] / total_respondents * 100
        f.write(f'<li><strong>{row["Growth Area"]}</strong>: {row["Count"]} responses ({pct:.1f}%)</li>\n')
    f.write('</ol>\n')
    
    f.write('<h2>Recommendations</h2>\n')
    
    f.write('<h3>1. Training & Education</h3>\n<ul>\n')
    f.write('<li>Create intermediate → advanced learning path</li>\n')
    f.write('<li>Focus on: prompt engineering, validation strategies, understanding limitations</li>\n')
    f.write('<li>Address trust issues through best practices and case studies</li>\n')
    f.write('</ul>\n')
    
    f.write('<h3>2. Tool Access & Infrastructure</h3>\n<ul>\n')
    f.write('<li>Resolve geographic access issues for China-based teams</li>\n')
    f.write('<li>Ensure equitable tool access across all locations</li>\n')
    f.write('<li>Streamline reimbursement processes</li>\n')
    f.write('</ul>\n')
    
    f.write('<h3>3. Skill Development Programs</h3>\n<ul>\n')
    f.write('<li>Prioritize: Coding & debugging, Building AI features, Understanding models</li>\n')
    f.write('<li>Create hands-on workshops and hackathons</li>\n')
    f.write('<li>Establish mentorship programs (32 people interested)</li>\n')
    f.write('</ul>\n')
    
    f.write('<h3>4. Security & Risk Awareness</h3>\n<ul>\n')
    f.write(f'<li>Risk understanding score: <strong>{avg_risks:.2f}/5</strong> - needs improvement</li>\n')
    f.write('<li>Mandatory training on PII/IP protection</li>\n')
    f.write('<li>Clear guidelines for responsible AI use</li>\n')
    f.write('</ul>\n')
    
    f.write('</body>\n</html>')

print(f"✓ Saved: {output_file_html}")
print("  (Open in browser, then copy-paste into Google Docs)")

# Export CSV data tables
output_file_csv = f'../outputs/survey-insights/insights_data_{date_suffix}.csv'
insights_data = pd.DataFrame({
    'Metric': [
        'Total Responses',
        'Daily Users %',
        'Heavy Users %',
        'Intermediate Level %',
        'Advanced/Expert %',
        'Avg Understanding Score',
        'Avg Risk Awareness Score',
        'No Barriers %',
        'Trust Issues %'
    ],
    'Value': [
        total_respondents,
        round(daily_users_pct, 1),
        round(heavy_users_pct, 1),
        round(intermediate_pct, 1),
        round(advanced_plus_pct, 1),
        round(avg_understanding, 2),
        round(avg_risks, 2),
        round(no_barriers_pct, 1),
        round(trust_issues_pct, 1)
    ]
})
insights_data.to_csv(output_file_csv, index=False)
print(f"✓ Saved: {output_file_csv}")

print("\n" + "=" * 80)
print("ANALYSIS COMPLETE")
print("=" * 80)
print("\nGenerated files:")
print(f"  - outputs/survey-insights/usage_frequency_{date_suffix}.png")
print(f"  - outputs/survey-insights/confidence_levels_{date_suffix}.png")
print(f"  - outputs/survey-insights/ai_use_contexts_{date_suffix}.png")
print(f"  - outputs/survey-insights/barriers_{date_suffix}.png")
print(f"  - outputs/survey-insights/growth_areas_{date_suffix}.png")
print(f"  - outputs/survey-insights/insights_summary_{date_suffix}.txt")
print(f"  - outputs/survey-insights/insights_summary_{date_suffix}.html (Google Docs compatible)")
print(f"  - outputs/survey-insights/insights_data_{date_suffix}.csv")
print("=" * 80)