## 1. Setup and Data Loading

In [29]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from scipy.stats import chi2_contingency
import warnings
warnings.filterwarnings('ignore')

# Set random seed for reproducibility
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

print("Libraries imported successfully")
print(f"Random seed: {RANDOM_SEED}")

Libraries imported successfully
Random seed: 42


In [30]:
# Load preprocessed data
df = pd.read_csv('../data/AIRS_clean.csv')

print(f"Total sample: N = {len(df)}")
print(f"Variables: {len(df.columns)}")
print(f"\nFirst few columns: {df.columns[:10].tolist()}")

Total sample: N = 318
Variables: 45

First few columns: ['Duration_minutes', 'PE1', 'PE2', 'EE1', 'EE2', 'SI1', 'SI2', 'FC1', 'FC2', 'HM1']


## 2. Pre-Split Distributions

Document baseline distributions before splitting:

In [31]:
print("="*70)
print("PRE-SPLIT DISTRIBUTIONS")
print("="*70)

# Work Context
print("\n1. Work Context:")
work_context_dist = df['Work_Context'].value_counts()
print(work_context_dist)
print(f"Proportions: {(work_context_dist / len(df) * 100).round(1).to_dict()}")

# AI Adoption
print("\n2. AI Adoption:")
ai_adoption_dist = df['AI_Adoption'].value_counts()
print(f"Adopters (1): {ai_adoption_dist.get(1, 0)} ({ai_adoption_dist.get(1, 0)/len(df)*100:.1f}%)")
print(f"Non-Adopters (0): {ai_adoption_dist.get(0, 0)} ({ai_adoption_dist.get(0, 0)/len(df)*100:.1f}%)")

# Usage Intensity
print("\n3. Usage Intensity:")
usage_intensity_dist = df['Usage_Intensity'].value_counts()
print(usage_intensity_dist)

# Experience Level
print("\n4. Experience Level:")
experience_dist = df['Experience_Level'].value_counts()
print(experience_dist)

print("\n" + "="*70)

PRE-SPLIT DISTRIBUTIONS

1. Work Context:
Work_Context
Professional        168
Academic-Student    105
Academic-Faculty     45
Name: count, dtype: int64
Proportions: {'Professional': 52.8, 'Academic-Student': 33.0, 'Academic-Faculty': 14.2}

2. AI Adoption:
Adopters (1): 285 (89.6%)
Non-Adopters (0): 33 (10.4%)

3. Usage Intensity:
Usage_Intensity
Medium      102
Low          93
High         90
Non-User     33
Name: count, dtype: int64

4. Experience Level:
Experience_Level
Expert    110
Entry      59
Early      57
Mid        52
Senior     40
Name: count, dtype: int64



## 3. Create Stratified Split

Stratify by **Work Context × AI Adoption** to ensure balanced representation:

In [32]:
# Create stratification variable (Work Context × AI Adoption)
df['stratify_key'] = df['Work_Context'].astype(str) + '_' + df['AI_Adoption'].astype(str)

print("Stratification groups (Work Context × AI Adoption):")
print(df['stratify_key'].value_counts().sort_index())
print(f"\nTotal unique strata: {df['stratify_key'].nunique()}")

Stratification groups (Work Context × AI Adoption):
stratify_key
Academic-Faculty_0      2
Academic-Faculty_1     43
Academic-Student_0      5
Academic-Student_1    100
Professional_0         26
Professional_1        142
Name: count, dtype: int64

Total unique strata: 6


In [33]:
# Perform stratified 50/50 split
df_dev, df_holdout = train_test_split(
    df,
    test_size=0.50,
    stratify=df['stratify_key'],
    random_state=RANDOM_SEED
)

print("="*70)
print("SPLIT-SAMPLE CREATION COMPLETE")
print("="*70)
print(f"\nDevelopment sample: N = {len(df_dev)} ({len(df_dev)/len(df)*100:.1f}%)")
print(f"Holdout sample:     N = {len(df_holdout)} ({len(df_holdout)/len(df)*100:.1f}%)")
print(f"Total:              N = {len(df)}")
print("\n✓ Stratified by: Work Context × AI Adoption")
print(f"✓ Random seed: {RANDOM_SEED} (reproducible)")

SPLIT-SAMPLE CREATION COMPLETE

Development sample: N = 159 (50.0%)
Holdout sample:     N = 159 (50.0%)
Total:              N = 318

✓ Stratified by: Work Context × AI Adoption
✓ Random seed: 42 (reproducible)


## 4. Validation: Post-Split Balance

Verify stratification succeeded with chi-square tests:

In [34]:
def test_balance(variable_name, dev_sample, holdout_sample):
    """
    Test if variable distribution differs between development and holdout samples.
    H0: Distributions are equal (balanced split)
    """
    # Create proper contingency table
    dev_counts = dev_sample[variable_name].value_counts().sort_index()
    holdout_counts = holdout_sample[variable_name].value_counts().sort_index()
    
    contingency_table = pd.DataFrame({
        'Development': dev_counts,
        'Holdout': holdout_counts
    }).fillna(0)
    
    chi2, p_value, dof, expected = chi2_contingency(contingency_table)
    
    print(f"\n{variable_name}:")
    print(contingency_table)
    print(f"\nχ² = {chi2:.3f}, df = {dof}, p = {p_value:.4f}")
    
    if p_value > 0.05:
        print(f"✓ Balanced (p > 0.05): No significant difference between samples")
    else:
        print(f"⚠ Imbalanced (p ≤ 0.05): Significant difference detected")
    
    return p_value

print("="*70)
print("POST-SPLIT BALANCE TESTS (Chi-Square)")
print("="*70)

p_work_context = test_balance('Work_Context', df_dev, df_holdout)
p_ai_adoption = test_balance('AI_Adoption', df_dev, df_holdout)

POST-SPLIT BALANCE TESTS (Chi-Square)

Work_Context:
                  Development  Holdout
Work_Context                          
Academic-Faculty           22       23
Academic-Student           53       52
Professional               84       84

χ² = 0.032, df = 2, p = 0.9843
✓ Balanced (p > 0.05): No significant difference between samples

AI_Adoption:
             Development  Holdout
AI_Adoption                      
0                     17       16
1                    142      143

χ² = 0.000, df = 1, p = 1.0000
✓ Balanced (p > 0.05): No significant difference between samples


## 5. Descriptive Statistics: Compare Samples

Compare key Likert item means to ensure no systematic bias:

In [35]:
# Define construct items
construct_items = {
    'PE': ['PE1', 'PE2'],
    'EE': ['EE1', 'EE2'],
    'SI': ['SI1', 'SI2'],
    'FC': ['FC1', 'FC2'],
    'HM': ['HM1', 'HM2'],
    'PV': ['PV1', 'PV2'],
    'HB': ['HB1', 'HB2'],
    'VO': ['VO1', 'VO2'],
    'TR': ['TR1', 'TR2'],
    'EX': ['EX1', 'EX2'],
    'ER': ['ER1', 'ER2'],
    'AX': ['AX1', 'AX2'],
    'BI': ['BI1', 'BI2', 'BI3', 'BI4']
}

all_items = [item for items in construct_items.values() for item in items]

print("="*70)
print("LIKERT ITEM MEANS: Development vs. Holdout")
print("="*70)

comparison = pd.DataFrame({
    'Development_M': df_dev[all_items].mean(),
    'Development_SD': df_dev[all_items].std(),
    'Holdout_M': df_holdout[all_items].mean(),
    'Holdout_SD': df_holdout[all_items].std(),
    'Diff': df_dev[all_items].mean() - df_holdout[all_items].mean()
})

comparison['Abs_Diff'] = comparison['Diff'].abs()

print("\nSample of items (first 8):")
print(comparison.head(8).round(3))

print(f"\nMean absolute difference across all items: {comparison['Abs_Diff'].mean():.3f}")
print(f"Max absolute difference: {comparison['Abs_Diff'].max():.3f} ({comparison['Abs_Diff'].idxmax()})")

if comparison['Abs_Diff'].mean() < 0.10:
    print("\n✓ Excellent balance: Mean difference < 0.10 scale points")
elif comparison['Abs_Diff'].mean() < 0.20:
    print("\n✓ Good balance: Mean difference < 0.20 scale points")
else:
    print("\n⚠ Review: Mean difference ≥ 0.20 scale points")

LIKERT ITEM MEANS: Development vs. Holdout

Sample of items (first 8):
     Development_M  Development_SD  Holdout_M  Holdout_SD   Diff  Abs_Diff
PE1          3.597           1.131      3.616       1.152 -0.019     0.019
PE2          3.340           1.195      3.346       1.222 -0.006     0.006
EE1          3.748           0.968      3.786       1.040 -0.038     0.038
EE2          3.610           1.055      3.629       1.003 -0.019     0.019
SI1          3.063           1.178      3.138       1.214 -0.075     0.075
SI2          3.283           1.175      3.415       1.075 -0.132     0.132
FC1          3.270           1.221      3.195       1.193  0.075     0.075
FC2          3.434           1.040      3.440       1.083 -0.006     0.006

Mean absolute difference across all items: 0.062
Max absolute difference: 0.220 (AX1)

✓ Excellent balance: Mean difference < 0.10 scale points


## 6. Save Split Samples

In [36]:
# Remove temporary stratification key
df_dev_clean = df_dev.drop(columns=['stratify_key'])
df_holdout_clean = df_holdout.drop(columns=['stratify_key'])

# Save to CSV
df_dev_clean.to_csv('../data/AIRS_clean_dev.csv', index=False)
df_holdout_clean.to_csv('../data/AIRS_clean_holdout.csv', index=False)

print("="*70)
print("SAMPLES SAVED")
print("="*70)
print(f"\n✓ Development sample: data/AIRS_clean_dev.csv")
print(f"  N = {len(df_dev_clean)}, Variables = {len(df_dev_clean.columns)}")
print(f"\n✓ Holdout sample: data/AIRS_clean_holdout.csv")
print(f"  N = {len(df_holdout_clean)}, Variables = {len(df_holdout_clean.columns)}")
print("\n✓ Stratification key removed from saved files")
print("\n" + "="*70)

SAMPLES SAVED

✓ Development sample: data/AIRS_clean_dev.csv
  N = 159, Variables = 45

✓ Holdout sample: data/AIRS_clean_holdout.csv
  N = 159, Variables = 45

✓ Stratification key removed from saved files



## 7. Summary and Quality Checklist

In [37]:
print("="*70)
print("SPLIT-SAMPLE VALIDATION SUMMARY")
print("="*70)

checks = [
    ("Sample sizes approximately equal", abs(len(df_dev) - len(df_holdout)) <= 2),
    ("Development sample ≥ 150 (adequate for EFA)", len(df_dev) >= 150),
    ("Holdout sample ≥ 150 (adequate for CFA)", len(df_holdout) >= 150),
    ("Work Context balanced (p > 0.05)", p_work_context > 0.05),
    ("AI Adoption balanced (p > 0.05)", p_ai_adoption > 0.05),
    ("Mean item difference < 0.20", comparison['Abs_Diff'].mean() < 0.20),
    ("Files saved successfully", True)  # If we got here, it succeeded
]

print("\nQuality Checklist:")
for check, passed in checks:
    status = "✓" if passed else "✗"
    print(f"  {status} {check}")

all_passed = all(passed for _, passed in checks)

if all_passed:
    print("\n" + "="*70)
    print("✅ ALL VALIDATION CHECKS PASSED")
    print("="*70)
    print("\nReady for Phase 1: Exploratory Factor Analysis (EFA)")
    print("Next notebook: 01_EFA_Split_Sample_Development.ipynb")
else:
    print("\n" + "="*70)
    print("⚠ REVIEW REQUIRED")
    print("="*70)
    print("\nSome validation checks failed. Review results above.")

SPLIT-SAMPLE VALIDATION SUMMARY

Quality Checklist:
  ✓ Sample sizes approximately equal
  ✓ Development sample ≥ 150 (adequate for EFA)
  ✓ Holdout sample ≥ 150 (adequate for CFA)
  ✓ Work Context balanced (p > 0.05)
  ✓ AI Adoption balanced (p > 0.05)
  ✓ Mean item difference < 0.20
  ✓ Files saved successfully

✅ ALL VALIDATION CHECKS PASSED

Ready for Phase 1: Exploratory Factor Analysis (EFA)
Next notebook: 01_EFA_Split_Sample_Development.ipynb


---

## Notes

**Stratification Strategy**:
- Stratified by Work Context × AI Adoption (6 groups)
- Ensures balanced representation of key moderator variables
- Critical for moderation analysis (H4)

**Sample Size Adequacy**:
- N ≈ 159 per sample supports 12-factor CFA (N:q ratio ≈ 6.6:1 for 24 items)
- Minimum recommended: 5:1 for stable solutions (Bentler & Chou, 1987)
- Both samples adequate for planned analyses

**Random Seed**: 42 (reproducible splits for dissertation transparency)

**Next Steps**:
1. Run EFA on development sample (polychoric correlations)
2. Select items based on loadings ≥ 0.50, cross-loadings < 0.30
3. Test measurement model with CFA on holdout sample
4. Proceed to structural modeling if fit indices acceptable

---