---
## Setup and Data Loading

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from scipy.stats import chi2_contingency
import warnings
warnings.filterwarnings('ignore')

# Set visualization style
sns.set_style('whitegrid')
plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['font.size'] = 10
plt.rcParams['figure.figsize'] = (12, 6)

In [None]:
# Load full dataset
df = pd.read_csv('../data/AIRS_clean.csv')

print(f"Sample size: N={len(df)}")
print(f"\nVariables loaded: {df.shape[1]} columns")
print(f"\nTool usage variables:")
tool_vars = ['Usage_MSCopilot', 'Usage_ChatGPT', 'Usage_Gemini', 'Usage_Other']
for var in tool_vars:
    if var in df.columns:
        print(f"  ✓ {var}")
    else:
        print(f"  ✗ {var} (NOT FOUND)")

---
## 1. Descriptive Statistics: Tool Usage Frequencies

In [None]:
# Descriptive statistics for each tool
tool_vars = ['Usage_MSCopilot', 'Usage_ChatGPT', 'Usage_Gemini', 'Usage_Other']
tool_labels = ['MS Copilot', 'ChatGPT', 'Google Gemini', 'Other AI Tools']

# Create summary table
desc_stats = pd.DataFrame({
    'Tool': tool_labels,
    'Mean': [df[var].mean() for var in tool_vars],
    'SD': [df[var].std() for var in tool_vars],
    'Median': [df[var].median() for var in tool_vars],
    'Min': [df[var].min() for var in tool_vars],
    'Max': [df[var].max() for var in tool_vars],
    'N_Valid': [df[var].notna().sum() for var in tool_vars]
})

print("\n=== Tool Usage Descriptive Statistics ===")
print(desc_stats.to_string(index=False))

# Rank tools by mean usage
desc_stats_sorted = desc_stats.sort_values('Mean', ascending=False)
print("\n=== Tool Usage Ranking (by mean) ===")
for i, row in desc_stats_sorted.iterrows():
    print(f"{row['Tool']}: M={row['Mean']:.2f}, SD={row['SD']:.2f}")

---
## 2. Tool Usage Frequency Distributions

In [None]:
# Frequency distributions (1-5 scale)
scale_labels = ['1=Never', '2=Rarely', '3=Sometimes', '4=Often', '5=Daily']

freq_table = pd.DataFrame()
for var, label in zip(tool_vars, tool_labels):
    counts = df[var].value_counts(normalize=True).sort_index() * 100
    freq_table[label] = counts

freq_table.index = scale_labels
print("\n=== Tool Usage Frequency Distributions (%) ===")
print(freq_table.round(1))

# Calculate "active users" (Sometimes or more = 3-5)
print("\n=== Active Users (3-5 on scale) ===")
for var, label in zip(tool_vars, tool_labels):
    active_pct = (df[var] >= 3).sum() / len(df) * 100
    print(f"{label}: {active_pct:.1f}%")

In [None]:
# Visualization: Stacked bar chart
fig, ax = plt.subplots(figsize=(12, 6))

# Prepare data for stacked bars
freq_data = pd.DataFrame()
for var in tool_vars:
    freq_data[var] = df[var].value_counts(normalize=True).sort_index() * 100

freq_data.index = ['Never', 'Rarely', 'Sometimes', 'Often', 'Daily']
freq_data.columns = tool_labels

# Plot
freq_data.T.plot(kind='barh', stacked=True, ax=ax, 
                 color=['#d62728', '#ff7f0e', '#ffbb78', '#2ca02c', '#1f77b4'],
                 edgecolor='white', linewidth=0.5)

ax.set_xlabel('Percentage of Users (%)', fontsize=12, fontweight='bold')
ax.set_ylabel('AI Tool', fontsize=12, fontweight='bold')
ax.set_title('AI Tool Usage Frequency Distributions (N=362)', 
             fontsize=14, fontweight='bold', pad=20)
ax.legend(title='Usage Frequency', bbox_to_anchor=(1.05, 1), loc='upper left')
ax.set_xlim(0, 100)

# Add percentage labels
for container in ax.containers:
    ax.bar_label(container, fmt='%.0f%%', label_type='center', fontsize=8)

plt.tight_layout()
plt.savefig('../results/plots/07_tool_usage_distributions.png', bbox_inches='tight', dpi=300)
plt.show()

print("\n✓ Figure saved: 07_tool_usage_distributions.png")

---
## 3. Tool Usage Profiles: Single-Tool vs. Multi-Tool Users

In [None]:
# Define "active use" as 3+ on 1-5 scale (Sometimes or more)
df['Active_MSCopilot'] = (df['Usage_MSCopilot'] >= 3).astype(int)
df['Active_ChatGPT'] = (df['Usage_ChatGPT'] >= 3).astype(int)
df['Active_Gemini'] = (df['Usage_Gemini'] >= 3).astype(int)
df['Active_Other'] = (df['Usage_Other'] >= 3).astype(int)

# Count number of tools actively used
df['Num_Tools_Active'] = (df['Active_MSCopilot'] + df['Active_ChatGPT'] + 
                          df['Active_Gemini'] + df['Active_Other'])

# Create usage profile categories
df['Usage_Profile'] = pd.cut(df['Num_Tools_Active'], 
                              bins=[-0.5, 0.5, 1.5, 4.5],
                              labels=['Non-User', 'Single-Tool', 'Multi-Tool'])

# Profile distribution
profile_counts = df['Usage_Profile'].value_counts()
profile_pcts = df['Usage_Profile'].value_counts(normalize=True) * 100

print("\n=== Tool Usage Profiles ===")
print(f"Non-Users (0 tools): {profile_counts['Non-User']} ({profile_pcts['Non-User']:.1f}%)")
print(f"Single-Tool Users (1 tool): {profile_counts['Single-Tool']} ({profile_pcts['Single-Tool']:.1f}%)")
print(f"Multi-Tool Users (2+ tools): {profile_counts['Multi-Tool']} ({profile_pcts['Multi-Tool']:.1f}%)")

# Mean tools actively used
print(f"\nMean active tools per user: M={df['Num_Tools_Active'].mean():.2f}, SD={df['Num_Tools_Active'].std():.2f}")

In [None]:
# Visualization: Usage profiles pie chart
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

# Pie chart: Usage profiles
colors_profile = ['#d62728', '#ff7f0e', '#2ca02c']
ax1.pie(profile_counts, labels=profile_counts.index, autopct='%1.1f%%',
        colors=colors_profile, startangle=90, textprops={'fontsize': 11, 'fontweight': 'bold'})
ax1.set_title(f'Tool Usage Profiles (N={len(df)})', fontsize=14, fontweight='bold', pad=20)

# Bar chart: Number of tools distribution
num_tools_counts = df['Num_Tools_Active'].value_counts().sort_index()
ax2.bar(num_tools_counts.index, num_tools_counts.values, color='steelblue', edgecolor='black')
ax2.set_xlabel('Number of Tools Actively Used', fontsize=12, fontweight='bold')
ax2.set_ylabel('Number of Users', fontsize=12, fontweight='bold')
ax2.set_title('Distribution of Active Tool Count', fontsize=14, fontweight='bold', pad=20)
ax2.set_xticks(range(5))
ax2.grid(axis='y', alpha=0.3)

# Add value labels on bars
for i, v in enumerate(num_tools_counts.values):
    ax2.text(num_tools_counts.index[i], v + 5, str(v), ha='center', fontweight='bold')

plt.tight_layout()
plt.savefig('../results/plots/07_tool_usage_profiles.png', bbox_inches='tight', dpi=300)
plt.show()

print("\n✓ Figure saved: 07_tool_usage_profiles.png")

---
## 4. Tool Preferences by Context (Role, Education, Industry)

In [None]:
# Tool usage by Role (Student vs. Professional)
print("\n=== Tool Usage by Role ===")
for var, label in zip(tool_vars, tool_labels):
    role_means = df.groupby('Role')[var].agg(['mean', 'std', 'count'])
    print(f"\n{label}:")
    print(role_means)
    
    # T-test
    student = df[df['Role'] == 'Student'][var].dropna()
    professional = df[df['Role'] == 'Professional'][var].dropna()
    t_stat, p_val = stats.ttest_ind(student, professional)
    print(f"  t({len(student)+len(professional)-2}) = {t_stat:.3f}, p = {p_val:.4f}")
    if p_val < 0.05:
        higher_group = 'Students' if student.mean() > professional.mean() else 'Professionals'
        print(f"  ✓ {higher_group} use {label} significantly more (p<.05)")

In [None]:
# Visualization: Tool usage by role
fig, ax = plt.subplots(figsize=(12, 6))

# Prepare data
role_usage = pd.DataFrame()
for var, label in zip(tool_vars, tool_labels):
    role_usage[label] = df.groupby('Role')[var].mean()

# Plot grouped bar chart
role_usage.plot(kind='bar', ax=ax, color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'],
                edgecolor='black', linewidth=0.8)

ax.set_xlabel('Role', fontsize=12, fontweight='bold')
ax.set_ylabel('Mean Usage Frequency (1-5 scale)', fontsize=12, fontweight='bold')
ax.set_title('AI Tool Usage by Role: Students vs. Professionals (N=362)', 
             fontsize=14, fontweight='bold', pad=20)
ax.legend(title='AI Tool', bbox_to_anchor=(1.05, 1), loc='upper left')
ax.set_xticklabels(ax.get_xticklabels(), rotation=0)
ax.set_ylim(0, 5)
ax.axhline(3, color='gray', linestyle='--', linewidth=1, alpha=0.5, label='Active Use Threshold')
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig('../results/plots/07_tool_usage_by_role.png', bbox_inches='tight', dpi=300)
plt.show()

print("\n✓ Figure saved: 07_tool_usage_by_role.png")

In [None]:
# Tool usage by Education level
if 'Education' in df.columns:
    print("\n=== Tool Usage by Education Level ===")
    
    # Define education labels
    edu_labels = {1: 'High School', 2: 'Some College', 3: "Associate's", 
                  4: "Bachelor's", 5: "Master's", 6: 'Doctoral'}
    df['Education_Label'] = df['Education'].map(edu_labels)
    
    for var, label in zip(tool_vars, tool_labels):
        edu_means = df.groupby('Education_Label')[var].agg(['mean', 'count'])
        print(f"\n{label}:")
        print(edu_means)
        
        # ANOVA
        groups = [df[df['Education'] == i][var].dropna() for i in range(1, 7)]
        groups = [g for g in groups if len(g) > 0]  # Remove empty groups
        if len(groups) > 2:
            f_stat, p_val = stats.f_oneway(*groups)
            print(f"  F({len(groups)-1}, {sum(len(g) for g in groups)-len(groups)}) = {f_stat:.3f}, p = {p_val:.4f}")
else:
    print("\n⚠️ Education variable not found in dataset")

In [None]:
# Tool usage by Industry (top 5 industries)
if 'Industry' in df.columns:
    print("\n=== Tool Usage by Industry (Top 5 Industries) ===")
    
    # Identify top 5 industries by sample size
    top_industries = df['Industry'].value_counts().head(5).index
    df_top_ind = df[df['Industry'].isin(top_industries)]
    
    for var, label in zip(tool_vars, tool_labels):
        ind_means = df_top_ind.groupby('Industry')[var].agg(['mean', 'count']).sort_values('mean', ascending=False)
        print(f"\n{label}:")
        print(ind_means)
else:
    print("\n⚠️ Industry variable not found in dataset")

---
## 5. Tool Usage Correlations with AIRS Constructs

In [None]:
# Correlations between tool usage and AIRS constructs
airs_constructs = ['PE2', 'EE1', 'SI1', 'FC1', 'HM2', 'PV2', 'HB2', 'VO1', 
                   'TR2', 'EX1', 'ER2', 'AX1', 'BI']
construct_labels = ['PE', 'EE', 'SI', 'FC', 'HM', 'PV', 'HB', 'VO', 
                    'TR', 'EX', 'ER', 'AX', 'BI']

# Check if BI is mean of 4 items or single item
if 'BI' not in df.columns and all(f'BI{i}' in df.columns for i in range(1, 5)):
    df['BI'] = df[['BI1', 'BI2', 'BI3', 'BI4']].mean(axis=1)
    print("✓ Created BI composite from BI1-BI4")

# Compute correlations
corr_matrix = pd.DataFrame(index=tool_labels, columns=construct_labels)

for tool_var, tool_label in zip(tool_vars, tool_labels):
    for airs_var, airs_label in zip(airs_constructs, construct_labels):
        if airs_var in df.columns:
            r, p = stats.pearsonr(df[tool_var].dropna(), df[airs_var].dropna())
            # Mark significant correlations
            if p < 0.001:
                corr_matrix.loc[tool_label, airs_label] = f"{r:.3f}***"
            elif p < 0.01:
                corr_matrix.loc[tool_label, airs_label] = f"{r:.3f}**"
            elif p < 0.05:
                corr_matrix.loc[tool_label, airs_label] = f"{r:.3f}*"
            else:
                corr_matrix.loc[tool_label, airs_label] = f"{r:.3f}"

print("\n=== Tool Usage Correlations with AIRS Constructs ===")
print(corr_matrix)
print("\nNote: *p<.05, **p<.01, ***p<.001")

In [None]:
# Visualization: Heatmap of correlations
fig, ax = plt.subplots(figsize=(14, 6))

# Extract numeric correlations for heatmap
corr_numeric = corr_matrix.applymap(lambda x: float(x.replace('*', '')))

# Plot heatmap
sns.heatmap(corr_numeric.astype(float), annot=True, fmt='.3f', cmap='RdBu_r', 
            center=0, vmin=-0.5, vmax=0.5, cbar_kws={'label': 'Pearson r'},
            linewidths=0.5, ax=ax)

ax.set_xlabel('AIRS Constructs', fontsize=12, fontweight='bold')
ax.set_ylabel('AI Tool', fontsize=12, fontweight='bold')
ax.set_title('Correlations: Tool Usage × AIRS Constructs (N=362)', 
             fontsize=14, fontweight='bold', pad=20)
ax.set_yticklabels(ax.get_yticklabels(), rotation=0)

plt.tight_layout()
plt.savefig('../results/plots/07_tool_construct_correlations.png', bbox_inches='tight', dpi=300)
plt.show()

print("\n✓ Figure saved: 07_tool_construct_correlations.png")

---
## 6. Multi-Tool Users: AIRS Profile Comparison

In [None]:
# Compare AIRS constructs across usage profiles
print("\n=== AIRS Constructs by Usage Profile ===")

profile_comparison = pd.DataFrame()
for airs_var, airs_label in zip(airs_constructs, construct_labels):
    if airs_var in df.columns:
        profile_means = df.groupby('Usage_Profile')[airs_var].mean()
        profile_comparison[airs_label] = profile_means

print(profile_comparison.round(3))

# ANOVA for each construct
print("\n=== ANOVA: Usage Profile Effects on AIRS Constructs ===")
for airs_var, airs_label in zip(airs_constructs, construct_labels):
    if airs_var in df.columns:
        groups = [df[df['Usage_Profile'] == prof][airs_var].dropna() 
                  for prof in ['Non-User', 'Single-Tool', 'Multi-Tool']]
        f_stat, p_val = stats.f_oneway(*groups)
        sig = '***' if p_val < 0.001 else '**' if p_val < 0.01 else '*' if p_val < 0.05 else 'ns'
        print(f"{airs_label}: F(2, {sum(len(g) for g in groups)-3}) = {f_stat:.3f}, p = {p_val:.4f} {sig}")

In [None]:
# Visualization: Profile comparison radar chart
from math import pi

# Prepare data for radar chart
categories = list(profile_comparison.columns)
N = len(categories)

# Compute angles for radar chart
angles = [n / float(N) * 2 * pi for n in range(N)]
angles += angles[:1]

# Initialize plot
fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(projection='polar'))

# Plot each profile
colors = ['#d62728', '#ff7f0e', '#2ca02c']
for (profile, color) in zip(['Non-User', 'Single-Tool', 'Multi-Tool'], colors):
    values = profile_comparison.loc[profile].tolist()
    values += values[:1]
    ax.plot(angles, values, 'o-', linewidth=2, label=profile, color=color)
    ax.fill(angles, values, alpha=0.15, color=color)

# Formatting
ax.set_xticks(angles[:-1])
ax.set_xticklabels(categories, fontsize=11, fontweight='bold')
ax.set_ylim(0, 5)
ax.set_yticks([1, 2, 3, 4, 5])
ax.set_yticklabels(['1', '2', '3', '4', '5'], fontsize=9)
ax.grid(True, linestyle='--', alpha=0.5)
ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1), fontsize=11)
ax.set_title('AIRS Construct Profiles by Tool Usage Pattern (N=362)', 
             fontsize=14, fontweight='bold', pad=30)

plt.tight_layout()
plt.savefig('../results/plots/07_usage_profile_radar.png', bbox_inches='tight', dpi=300)
plt.show()

print("\n✓ Figure saved: 07_usage_profile_radar.png")

---
## 7. Summary of Key Findings

In [None]:
# Generate summary statistics
print("\n" + "="*70)
print("PHASE 7 SUMMARY: Tool Usage Patterns (RQ6)")
print("="*70)

print("\n1. TOOL POPULARITY RANKING:")
for i, row in enumerate(desc_stats_sorted.itertuples(), 1):
    print(f"   {i}. {row.Tool}: M={row.Mean:.2f}, SD={row.SD:.2f}")

print("\n2. USER SEGMENTATION:")
print(f"   Non-Users: {profile_pcts['Non-User']:.1f}% (N={profile_counts['Non-User']})")
print(f"   Single-Tool Users: {profile_pcts['Single-Tool']:.1f}% (N={profile_counts['Single-Tool']})")
print(f"   Multi-Tool Users: {profile_pcts['Multi-Tool']:.1f}% (N={profile_counts['Multi-Tool']})")

print("\n3. ROLE DIFFERENCES (Students vs. Professionals):")
for var, label in zip(tool_vars, tool_labels):
    student_mean = df[df['Role'] == 'Student'][var].mean()
    prof_mean = df[df['Role'] == 'Professional'][var].mean()
    diff = student_mean - prof_mean
    direction = 'higher' if diff > 0 else 'lower'
    print(f"   {label}: Students {direction} by {abs(diff):.2f} points")

print("\n4. STRONGEST CORRELATIONS WITH BI (Behavioral Intention):")
if 'BI' in df.columns:
    bi_corrs = [(label, stats.pearsonr(df[var].dropna(), df['BI'].dropna())[0]) 
                for var, label in zip(tool_vars, tool_labels)]
    bi_corrs_sorted = sorted(bi_corrs, key=lambda x: abs(x[1]), reverse=True)
    for tool, r in bi_corrs_sorted:
        print(f"   {tool}: r = {r:.3f}")

print("\n5. MULTI-TOOL ADVANTAGE (ANOVA results):")
print("   Constructs showing significant differences across usage profiles:")
sig_constructs = []
for airs_var, airs_label in zip(airs_constructs, construct_labels):
    if airs_var in df.columns:
        groups = [df[df['Usage_Profile'] == prof][airs_var].dropna() 
                  for prof in ['Non-User', 'Single-Tool', 'Multi-Tool']]
        f_stat, p_val = stats.f_oneway(*groups)
        if p_val < 0.05:
            sig_constructs.append((airs_label, p_val))
for construct, p in sorted(sig_constructs, key=lambda x: x[1]):
    print(f"   - {construct} (p = {p:.4f})")

print("\n" + "="*70)
print("Analysis complete. Figures saved to results/plots/")
print("="*70)

---
## Interpretation Notes

**Key Questions Answered**:
1. Which AI tools are most widely adopted? (Frequency rankings)
2. Do students and professionals prefer different tools? (Role comparisons)
3. Are multi-tool users more AI-ready than single-tool users? (Profile comparisons)
4. How does tool usage relate to AIRS constructs? (Correlation patterns)

**Expected Patterns**:
- **ChatGPT dominance**: Likely highest usage due to accessibility and familiarity
- **MS Copilot in workplace**: Professionals may prefer enterprise-integrated tools
- **Multi-tool advantage**: Users with diverse tool experience likely show higher BI, lower AX
- **Habit correlation**: Tool usage frequency should correlate strongly with HB (Habit)

**Limitations**:
- Cross-sectional data (cannot establish causality)
- Self-reported usage (potential recall bias)
- Tool landscape evolving rapidly (findings time-sensitive)

**Next Steps**:
- Integrate findings into Chapter 4 Results section
- Discuss implications for targeted interventions (tool-specific training)
- Proceed to Phase 7b: Qualitative Feedback Analysis (RQ10)