# WVS Wave 7 Data Analysis - Detailed EFA Run

This script performs a detailed Exploratory Factor Analysis (EFA) on the aggregated WVS data. It includes suitability tests (Bartlett's, KMO), diagnostic plots (Scree, Parallel Analysis), factor extraction, interpretation of loadings, factor score plotting (optional), and reporting variance explained for a specified EFA model.


In [101]:
# =============================================================================
# Imports
# =============================================================================
from __future__ import annotations
import argparse
import datetime as dt
import os
import sys
import warnings
import pathlib

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from factor_analyzer import FactorAnalyzer
from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity, calculate_kmo
from matplotlib.ticker import MaxNLocator

In [None]:
# =============================================================================
# Configuration / Constants
# =============================================================================
DATA_DIR = pathlib.Path("./data")
OUTPUT_DIR_ROOT = pathlib.Path("./output/efa_detailed")

# --- Input Data ---
# AGGREGATED_DATA_PATH = DATA_DIR / "wvs_wave7_aggregated_by_demographics.csv"
AGGREGATED_DATA_PATH = DATA_DIR / "new_median_wvs_wave7_aggregated_by_demographics.csv"
# File containing variable codes and question text for interpretation
VARIABLE_INFO_PATH = DATA_DIR / "variable_info.csv" # Ensure this matches previous scripts

In [None]:
# --- Default Parameters ---
RANDOM_STATE_DEFAULT = 42
DEFAULT_N_FACTORS = 5 # Chosen based on interpretation from comparative analysis
DEFAULT_ROTATION = 'promax' # Chosen from comparison
DEFAULT_LOADING_THRESHOLD = 0.50 # For interpreting
DEFAULT_DEMO_COLOR_COL = 'B_COUNTRY_ALPHA'

In [None]:
# =============================================================================
# Helper Functions
# =============================================================================

def ensure_output_dir(root: pathlib.Path, prefix: str = "") -> pathlib.Path:
    """Creates a date-stamped output directory."""
    today = dt.date.today().isoformat().replace("-", "")
    dir_name = f"{prefix}{today}" if prefix else today
    out_path = root / dir_name
    out_path.mkdir(parents=True, exist_ok=True)
    print(f"Output directory created/ensured: {out_path}")
    return out_path

def create_scree_plot(eigenvalues: np.ndarray, out_path: pathlib.Path) -> None:
    """Generates and saves a scree plot."""
    print("Generating Scree Plot...")
    fig, ax = plt.subplots(figsize=(8, 5))
    ax.plot(range(1, len(eigenvalues) + 1), eigenvalues, marker="o", linestyle='-', color='blue')
    ax.set_xlabel("Component Number")
    ax.set_ylabel("Eigenvalue")
    ax.set_title("Scree Plot of Eigenvalues")
    ax.grid(True, linestyle='--', alpha=0.6)
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    plt.tight_layout()
    try:
        filepath = out_path / "1_scree_plot.png"
        fig.savefig(filepath, dpi=300)
        print(f"Saved Scree Plot to: {filepath}")
    except Exception as e:
        print(f"Error saving Scree Plot: {e}")
    plt.close(fig)

def run_parallel_analysis(data: pd.DataFrame, n_iter: int = 100, random_state: int = 42) -> tuple[np.ndarray, np.ndarray]:
    """Performs parallel analysis based on random data generation."""
    print(f"Running Parallel Analysis (n_iter={n_iter})...")
    # Calculate observed eigenvalues from data correlation matrix
    corr_matrix = data.corr().values
    obs_eig, _ = np.linalg.eig(corr_matrix)
    obs_eig = np.sort(obs_eig)[::-1] # Sort descending

    # Generate random data and calculate eigenvalues
    rng = np.random.default_rng(random_state)
    n_rows, n_cols = data.shape
    rand_eigs_all = np.zeros((n_iter, n_cols))

    for i in range(n_iter):
        random_data = rng.standard_normal(size=(n_rows, n_cols))
        rand_corr = np.corrcoef(random_data, rowvar=False)
        rand_eig, _ = np.linalg.eig(rand_corr)
        rand_eigs_all[i, :] = np.sort(rand_eig)[::-1]

    # Calculate mean of random eigenvalues
    rand_eig_mean = rand_eigs_all.mean(axis=0)
    print("Parallel Analysis complete.")
    return obs_eig, rand_eig_mean

def plot_parallel_analysis_results(obs_eig: np.ndarray, rand_eig: np.ndarray, out_path: pathlib.Path) -> None:
    """Generates and saves a parallel analysis plot."""
    print("Generating Parallel Analysis Plot...")
    fig, ax = plt.subplots(figsize=(8, 5))
    x_axis = range(1, len(obs_eig) + 1)
    ax.plot(x_axis, obs_eig, marker="o", linestyle='-', color='blue', label="Observed Eigenvalues")
    ax.plot(x_axis, rand_eig, marker="^", linestyle='--', color='red', label="Mean Random Eigenvalues")
    # Add Kaiser criterion line (Eigenvalue = 1)
    ax.axhline(1, color='grey', linestyle=':', label='Kaiser Criterion (Eigenvalue=1)')

    ax.set_xlabel("Component Number")
    ax.set_ylabel("Eigenvalue")
    ax.set_title("Parallel Analysis")
    ax.legend()
    ax.grid(True, linestyle='--', alpha=0.6)
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    plt.tight_layout()
    try:
        filepath = out_path / "2_parallel_analysis.png"
        fig.savefig(filepath, dpi=300)
        print(f"Saved Parallel Analysis Plot to: {filepath}")
    except Exception as e:
        print(f"Error saving Parallel Analysis Plot: {e}")
    plt.close(fig)

def plot_factor_scores(scores: np.ndarray, color_data: pd.Series | None, factor_indices: tuple[int, int], out_path: pathlib.Path) -> None:
    """Generates and saves a scatter plot of factor scores."""
    idx1, idx2 = factor_indices
    if scores.shape[1] <= max(idx1, idx2):
        print(f"Warning: Not enough factors ({scores.shape[1]}) to plot factors {idx1+1} and {idx2+1}. Skipping score plot.")
        return
    if color_data is None:
        print("Warning: No color data provided. Skipping factor score plot.")
        return
    # Ensure color_data length matches scores length
    if len(color_data) != scores.shape[0]:
         print(f"Warning: Length mismatch between scores ({scores.shape[0]}) and color data ({len(color_data)}). Skipping score plot.")
         return

    print(f"Generating Factor Score Plot (Factor {idx1+1} vs Factor {idx2+1})...")
    fig, ax = plt.subplots(figsize=(8, 6))

    # pandas factorize to get numerical categories for coloring
    color_codes, uniques = pd.factorize(color_data)
    scatter = ax.scatter(scores[:, idx1], scores[:, idx2], c=color_codes, s=15, cmap='viridis', alpha=0.7)
    ax.set_xlabel(f"Factor {idx1+1} Score")
    ax.set_ylabel(f"Factor {idx2+1} Score")
    ax.set_title("Factor Score Scatter Plot")
    ax.grid(True, linestyle='--', alpha=0.6)

    # Will add a legend if there are a reasonable number of unique categories
    if len(uniques) <= 15: # threshold
         legend_handles = [plt.Line2D([0], [0], marker='o', color='w', label=label,
                                      markerfacecolor=scatter.cmap(scatter.norm(i)), markersize=8)
                           for i, label in enumerate(uniques)]
         ax.legend(handles=legend_handles, title=color_data.name, bbox_to_anchor=(1.05, 1), loc='upper left')

    plt.tight_layout(rect=[0, 0, 0.85, 1]) # to make space for legend if outside
    try:
        filepath = out_path / f"5_factor_score_scatter_F{idx1+1}_vs_F{idx2+1}.png"
        fig.savefig(filepath, dpi=300)
        print(f"Saved Factor Score Plot to: {filepath}")
    except Exception as e:
        print(f"Error saving Factor Score Plot: {e}")
    plt.close(fig)

def load_data(aggregated_path: pathlib.Path, variable_info_path: pathlib.Path, demo_color_col: str | None) -> tuple[pd.DataFrame, pd.DataFrame, pd.Series | None, dict]:
    """Loads aggregated data and variable info, identifies survey columns."""
    print("--- Loading Data ---")
    # Load aggregated data
    if not aggregated_path.exists():
        raise FileNotFoundError(f"Aggregated data file not found: {aggregated_path}")
    try:
        df_agg = pd.read_csv(aggregated_path)
        print(f"Loaded aggregated data: {df_agg.shape}")
    except Exception as e:
        raise IOError(f"Error reading aggregated data file '{aggregated_path}': {e}")

    # Load variable information for interpretation and column identification
    qmap_dict = {}
    if variable_info_path.exists():
         try:
             var_info_df = pd.read_csv(variable_info_path)
             print(f"Loaded variable info: {var_info_df.shape}")
             # Create question text map
             qmap_dict = var_info_df.set_index("Variable_Code")["Question_Text"].to_dict()
         except Exception as e:
             print(f"Warning: Could not load or process variable info file '{variable_info_path}': {e}")
             var_info_df = pd.DataFrame()
    else:
        print(f"Warning: Variable info file not found at '{variable_info_path}'. Question text mapping disabled.")
        var_info_df = pd.DataFrame()

    # ---- Note ----
    # Identify survey question columns (assume they are all non-demographic columns)
    # This requires knowing which columns in df_agg are NOT demographics.
    # We infer this by finding columns that ARE in the variable info file.
    
    if not var_info_df.empty and 'Variable_Code' in var_info_df.columns:
         known_vars = var_info_df['Variable_Code'].tolist()
         survey_cols = [col for col in df_agg.columns if col in known_vars]
         print(f"Identified {len(survey_cols)} survey columns based on variable info file.")
    else:
         # Fallback: assume all numeric columns are survey questions if var_info unavailable
         survey_cols = df_agg.select_dtypes(include=np.number).columns.tolist()
         print(f"Warning: Variable info incomplete. Assuming all {len(survey_cols)} numeric columns are survey questions.")

    if not survey_cols:
         raise ValueError("Could not identify survey question columns for EFA.")

    df_efa_input = df_agg[survey_cols]

    # Check for NaNs - aggregated data should ideally be complete
    if df_efa_input.isnull().any().any():
         print("Warning: Missing values found in aggregated data intended for EFA. EFA might fail or produce unreliable results.")
         # Consider adding imputation here IF it's expected, but it usually shouldn't be needed after aggregation.

    # Extract color series if requested and available
    color_series = None
    if demo_color_col:
        if demo_color_col in df_agg.columns:
            color_series = df_agg[demo_color_col]
            print(f"Using column '{demo_color_col}' for scatter plot coloring.")
        else:
            print(f"Warning: Specified demo color column '{demo_color_col}' not found in aggregated data.")

    return df_agg, df_efa_input, color_series, qmap_dict

In [None]:
# =============================================================================
# Main EFA Pipeline Function
# =============================================================================

def run_efa_analysis(
    df_efa_input: pd.DataFrame, # Data for EFA (survey columns only)
    df_aggregated: pd.DataFrame, # Full aggregated data (for context/coloring)
    color_series: pd.Series | None, # Column for coloring plots
    qmap_dict: dict, # Variable_Code -> Question_Text mapping
    n_factors: int,
    rotation: str,
    loading_threshold: float,
    out_dir: pathlib.Path
) -> None:
    """Performs the core EFA steps and generates outputs."""
    print("\n--- Starting EFA Analysis ---")
    print(f"Parameters: n_factors={n_factors}, rotation='{rotation}', loading_threshold={loading_threshold}")

    # 1. Suitability Tests
    print("\n--- 1. Suitability Tests ---")
    try:
        chi_square_value, p_value = calculate_bartlett_sphericity(df_efa_input)
        print(f"Bartlett's Test: Chi-Square = {chi_square_value:.3f}, p-value = {p_value:.4g}")
        if p_value > 0.05:
             print("Warning: Bartlett's test is non-significant (p > 0.05), EFA may not be appropriate.")

        kmo_per_variable, kmo_overall = calculate_kmo(df_efa_input)
        print(f"Kaiser-Meyer-Olkin (KMO) Test: Overall KMO = {kmo_overall:.3f}")
        if kmo_overall < 0.6:
             print("Warning: Overall KMO < 0.6, indicating data suitability for EFA is questionable.")
        # Save KMO per variable
        # pd.Series(kmo_per_variable, index=df_efa_input.columns).to_csv(out_dir / "kmo_per_variable.csv")

    except Exception as e:
        print(f"Error during suitability tests: {e}")

    # 2. Diagnostic Plots (Scree and Parallel Analysis)
    print("\n--- 2. Diagnostic Plots ---")
    try:
        # Scree Plot (based on initial fit to get eigenvalues)
        fa_diag = FactorAnalyzer(n_factors=df_efa_input.shape[1], rotation=None) # Fit all factors for eigenvalues
        fa_diag.fit(df_efa_input)
        ev, _ = fa_diag.get_eigenvalues()
        create_scree_plot(ev, out_dir)

        # Parallel Analysis
        obs_eig, rand_eig = run_parallel_analysis(df_efa_input, random_state=RANDOM_STATE_DEFAULT)
        plot_parallel_analysis_results(obs_eig, rand_eig, out_dir)
        
        # Determine suggested number of factors from parallel analysis
        suggested_n = np.sum(obs_eig > rand_eig)
        print(f"Parallel Analysis suggests retaining {suggested_n} factors (Observed > Random).")
        if suggested_n != n_factors:
             print(f"Note: Suggested factors ({suggested_n}) differs from requested factors ({n_factors}).")

    except Exception as e:
        print(f"Error during diagnostic plot generation: {e}")

    # 3. Factor Extraction and Rotation
    print(f"\n--- 3. Factor Extraction (n_factors={n_factors}, rotation='{rotation}') ---")
    try:
        fa = FactorAnalyzer(
            n_factors=n_factors,
            rotation=rotation,
            method="principal", # Others: 'minres', 'ml'
            use_smc=True, # Use squared multiple correlation as starting communalities
            rotation_kwargs={"max_iter": 1000} # For rotations like promax/oblimin
        )
        fa.fit(df_efa_input)
        print("Factor analysis fitting complete.")

        # Get Loadings (Pattern Matrix for oblique, Factor Matrix for orthogonal)
        loadings = fa.loadings_
        pattern_matrix = pd.DataFrame(
            loadings,
            index=df_efa_input.columns,
            columns=[f"Factor{i+1}" for i in range(n_factors)],
        )
        loadings_filename = f"3_loadings_{rotation}.csv"
        pattern_matrix.to_csv(out_dir / loadings_filename)
        print(f"Saved Factor Loadings to: {out_dir / loadings_filename}")

    except Exception as e:
        print(f"Error during factor extraction/rotation: {e}")
        return # Stop if factor extraction fails

    # 4. Interpret Strongest Loadings
    print("\n--- 4. Interpretation of Strongest Loadings ---")
    print(f"(Threshold for 'strong' loading: >= {loading_threshold:.2f})\n")
    interpretation_lines = []
    for factor_col in pattern_matrix.columns:
        strong_loadings = pattern_matrix[factor_col][pattern_matrix[factor_col].abs() >= loading_threshold]
        strong_loadings = strong_loadings.sort_values(key=np.abs, ascending=False)

        print(f"--- {factor_col} ---")
        interpretation_lines.append(f"--- {factor_col} ---")
        if strong_loadings.empty:
            print("(No loadings meeting threshold)")
            interpretation_lines.append("(No loadings meeting threshold)")
        else:
            for var_code, loading in strong_loadings.items():
                q_text = qmap_dict.get(var_code, "(Question text not found)")
                line = f"  - {loading:.3f} : {q_text} [{var_code}]"
                print(line)
                interpretation_lines.append(line)
        print("-" * (len(factor_col) + 6))
        interpretation_lines.append("-" * (len(factor_col) + 6) + "\n")

    # Save interpretation to a text file
    try:
        interp_path = out_dir / "4_strongest_loadings_interpretation.txt"
        with open(interp_path, 'w', encoding='utf-8') as f:
            f.write('\n'.join(interpretation_lines))
        print(f"\nSaved interpretation summary to: {interp_path}")
    except Exception as e:
        print(f"Error saving interpretation summary: {e}")


    # 5. Factor Scores Plot
    print("\n--- 5. Factor Scores Plot ---")
    try:
        # Transform data to get factor scores
        factor_scores = fa.transform(df_efa_input)
        # Plot scores for the first two factors
        plot_factor_scores(factor_scores, color_series, (0, 1), out_dir)
    except Exception as e:
        print(f"Error generating or plotting factor scores: {e}")


    # 6. Variance Explained & Factor Correlations
    print("\n--- 6. Variance Explained and Factor Correlations ---")
    try:
        variance_info = fa.get_factor_variance() # Returns SSL, Proportion Var, Cumulative Var
        variance_df = pd.DataFrame({
            "Factor": [f"Factor{i+1}" for i in range(n_factors)],
            "Sum of Squared Loadings (SSL)": variance_info[0],
            "Proportion Variance Explained": variance_info[1],
            "Cumulative Variance Explained": variance_info[2],
        })
        var_path = out_dir / "6_variance_explained.csv"
        variance_df.to_csv(var_path, index=False)
        print(f"Saved Variance Explained to: {var_path}")
        print("\nVariance Summary:")
        print(variance_df.to_string(index=False, float_format="{:.3f}".format))
        print(f"\nTotal variance explained by {n_factors} factors: {variance_df['Proportion Variance Explained'].sum()*100:.2f}%")

        # Factor Correlation Matrix (Phi) for oblique rotations
        if fa.phi_ is not None:
            phi_matrix = pd.DataFrame(
                fa.phi_,
                index=[f"Factor{i+1}" for i in range(n_factors)],
                columns=[f"Factor{i+1}" for i in range(n_factors)]
            )
            phi_path = out_dir / "7_factor_correlations_phi.csv"
            phi_matrix.to_csv(phi_path)
            print(f"\nSaved Factor Correlation Matrix (Phi) to: {phi_path}")
            # print("\nFactor Correlation Matrix (Phi):")
            # print(phi_matrix.to_string(float_format="{:.3f}".format))
        else:
            print("\nFactor Correlation Matrix (Phi): Not applicable (orthogonal rotation).")

    except Exception as e:
        print(f"Error calculating/saving variance or factor correlations: {e}")

    print(f"\n✓ Detailed EFA analysis complete. Results saved in: {out_dir}")

In [None]:
# # =============================================================================
# # Entry Point
# # =============================================================================

# Saving this for future use, if needed.
# if __name__ == "__main__":
#     parser = argparse.ArgumentParser(description="Run a detailed Exploratory Factor Analysis (EFA) on aggregated data.")
#     parser.add_argument("--n_factors", type=int, default=DEFAULT_N_FACTORS,
#                         help=f"Number of factors to extract (default: {DEFAULT_N_FACTORS}). Diagnostics might suggest alternatives.")
#     parser.add_argument("--rotation", choices=["promax", "oblimin", "varimax", None], default=DEFAULT_ROTATION,
#                         help=f"Rotation method (default: {DEFAULT_ROTATION}). Use None for unrotated solution.")
#     parser.add_argument("--loading_threshold", type=float, default=DEFAULT_LOADING_THRESHOLD,
#                         help=f"Absolute loading threshold for interpretation (default: {DEFAULT_LOADING_THRESHOLD:.2f}).")
#     parser.add_argument("--random_state", type=int, default=RANDOM_STATE_DEFAULT,
#                         help=f"Random state for reproducibility (default: {RANDOM_STATE_DEFAULT}).")
#     parser.add_argument("--color_col", default=DEFAULT_DEMO_COLOR_COL,
#                         help=f"Column name from aggregated data for factor score plot coloring (default: {DEFAULT_DEMO_COLOR_COL}). Set to '' to disable.")
#     parser.add_argument("--agg_file", default=str(AGGREGATED_DATA_PATH),
#                         help=f"Path to the input aggregated CSV file (default: {AGGREGATED_DATA_PATH}).")
#     parser.add_argument("--var_file", default=str(VARIABLE_INFO_PATH),
#                         help=f"Path to the variable info CSV file (default: {VARIABLE_INFO_PATH}).")
#     parser.add_argument("--out_dir", default=str(OUTPUT_DIR_ROOT),
#                          help=f"Root directory for output (default: {OUTPUT_DIR_ROOT}). A date-stamped subfolder will be created.")
#     parser.add_argument("--out_prefix", default="01_",
#                          help="Optional prefix for the date-stamped output subfolder.")


#     args = parser.parse_args()

#     # Handle potential None rotation from argparse
#     rotation_arg = args.rotation if args.rotation != "None" else None
#     color_col_arg = args.color_col if args.color_col else None # Convert empty string to None

#     # --- Execute Pipeline ---
#     try:
#         # Create specific output directory for this run
#         output_directory = ensure_output_dir(pathlib.Path(args.out_dir), prefix=args.out_prefix)

#         # Load data
#         df_aggregated, df_efa_input, color_series, qmap_dict = load_data(
#             pathlib.Path(args.agg_file), pathlib.Path(args.var_file), color_col_arg
#         )

#         # Run EFA
#         run_efa_analysis(
#             df_efa_input=df_efa_input,
#             df_aggregated=df_aggregated,
#             color_series=color_series,
#             qmap_dict=qmap_dict,
#             n_factors=args.n_factors,
#             rotation=rotation_arg,
#             loading_threshold=args.loading_threshold,
#             out_dir=output_directory
#         )

#     except FileNotFoundError as e:
#         print(f"\nError: Input file not found. {e}", file=sys.stderr)
#         sys.exit(1)
#     except ValueError as e:
#         print(f"\nError: Data validation or configuration issue. {e}", file=sys.stderr)
#         sys.exit(1)
#     except Exception as e:
#         print(f"\nAn unexpected error occurred: {e}", file=sys.stderr)
#         # import traceback
#         # traceback.print_exc()
#         sys.exit(1)

In [None]:
# =============================================================================
# Entry Point
# =============================================================================
if __name__ == "__main__":
    # Default configuration values
    n_factors = DEFAULT_N_FACTORS
    rotation = DEFAULT_ROTATION
    loading_threshold = DEFAULT_LOADING_THRESHOLD
    random_state = RANDOM_STATE_DEFAULT
    color_col = DEFAULT_DEMO_COLOR_COL
    agg_file = str(AGGREGATED_DATA_PATH)
    var_file = str(VARIABLE_INFO_PATH)
    out_dir = str(OUTPUT_DIR_ROOT)
    out_prefix = "01_"
    
    # Handle potential None rotation
    rotation_arg = rotation if rotation != "None" else None
    color_col_arg = color_col if color_col else None  # Convert empty string to None
    
    # --- Execute Pipeline ---
    try:
        # Create specific output directory for this run
        output_directory = ensure_output_dir(pathlib.Path(out_dir), prefix=out_prefix)
        
        # Load data
        df_aggregated, df_efa_input, color_series, qmap_dict = load_data(
            pathlib.Path(agg_file), pathlib.Path(var_file), color_col_arg
        )
        
        # Run EFA
        run_efa_analysis(
            df_efa_input=df_efa_input,
            df_aggregated=df_aggregated,
            color_series=color_series,
            qmap_dict=qmap_dict,
            n_factors=n_factors,
            rotation=rotation_arg,
            loading_threshold=loading_threshold,
            out_dir=output_directory
        )
    except FileNotFoundError as e:
        print(f"\nError: Input file not found. {e}", file=sys.stderr)
        sys.exit(1)
    except ValueError as e:
        print(f"\nError: Data validation or configuration issue. {e}", file=sys.stderr)
        sys.exit(1)
    except Exception as e:
        print(f"\nAn unexpected error occurred: {e}", file=sys.stderr)
        # import traceback # Uncomment for detailed traceback
        sys.exit(1)


Output directory created/ensured: output\efa_detailed\01_20250506
--- Loading Data ---
Loaded aggregated data: (2250, 102)
Loaded variable info: (227, 4)
Identified 97 survey columns based on variable info file.
Using column 'B_COUNTRY_ALPHA' for scatter plot coloring.

--- Starting EFA Analysis ---
Parameters: n_factors=5, rotation='promax', loading_threshold=0.5

--- 1. Suitability Tests ---
Bartlett's Test: Chi-Square = 119363.633, p-value = 0
Kaiser-Meyer-Olkin (KMO) Test: Overall KMO = 0.943

--- 2. Diagnostic Plots ---




Generating Scree Plot...
Saved Scree Plot to: output\efa_detailed\01_20250506\1_scree_plot.png
Running Parallel Analysis (n_iter=100)...
Parallel Analysis complete.
Generating Parallel Analysis Plot...
Saved Parallel Analysis Plot to: output\efa_detailed\01_20250506\2_parallel_analysis.png
Parallel Analysis suggests retaining 16 factors (Observed > Random).
Note: Suggested factors (16) differs from requested factors (5).

--- 3. Factor Extraction (n_factors=5, rotation='promax') ---
Factor analysis fitting complete.
Saved Factor Loadings to: output\efa_detailed\01_20250506\3_loadings_promax.csv

--- 4. Interpretation of Strongest Loadings ---
(Threshold for 'strong' loading: >= 0.50)

--- Factor1 ---
  - 0.825 : Importance of God [Q164]
  - 0.756 : How often to you pray (+) [Q172P]
  - 0.734 : Important in life: Religion (+) [Q6P]
  - 0.682 : Religious person (+) [Q173P]
  - 0.669 : How often do you attend religious services (+) [Q171P]
  - -0.658 : Justifiable: Abortion [Q184]
  - 0.6

```python
Generating Scree Plot...
Saved Scree Plot to: output\efa_detailed\01_20250506\1_scree_plot.png
Running Parallel Analysis (n_iter=100)...
Parallel Analysis complete.
Generating Parallel Analysis Plot...
Saved Parallel Analysis Plot to: output\efa_detailed\01_20250506\2_parallel_analysis.png
Parallel Analysis suggests retaining 16 factors (Observed > Random).
Note: Suggested factors (16) differs from requested factors (5).

--- 3. Factor Extraction (n_factors=5, rotation='promax') ---
Factor analysis fitting complete.
Saved Factor Loadings to: output\efa_detailed\01_20250506\3_loadings_promax.csv

--- 4. Interpretation of Strongest Loadings ---
(Threshold for 'strong' loading: >= 0.50)

--- Factor1 ---
  - 0.825 : Importance of God [Q164]
  - 0.756 : How often to you pray (+) [Q172P]
  - 0.734 : Important in life: Religion (+) [Q6P]
  - 0.682 : Religious person (+) [Q173P]
  - 0.669 : How often do you attend religious services (+) [Q171P]
  - -0.658 : Justifiable: Abortion [Q184]
  - 0.653 : Believe in: God (+) [Q165P]
  - 0.648 : Confidence: Churches (+) [Q64P]
  - -0.632 : Justifiable: Euthanasia [Q188]
  - 0.567 : How proud of nationality (+) [Q254P]
  - -0.544 : Justifiable: Death penalty [Q195]
  - 0.522 : Important in life: Work (+) [Q5P]
  - 0.507 : One of main goals in life has been to make my parents proud (+) [Q27P]
-------------
--- Factor2 ---
  - 0.833 : Confidence: Parliament (+) [Q73P]
  - 0.800 : Confidence: The Political Parties (+) [Q72P]
  - 0.754 : Confidence: The Government (+) [Q71P]
  - 0.683 : Confidence: The Press (+) [Q66P]
  - 0.639 : Confidence: Major Companies (+) [Q77P]
  - 0.635 : Confidence: Labor Unions (+) [Q68P]
  - 0.534 : Confidence: Justice System/Courts (+) [Q70P]
-------------
--- Factor3 ---
  - 0.726 : Democracy: People choose their leaders in free elections [Q243]
  - 0.698 : Democracy: Civil rights protect people’s liberty against oppression [Q246]
  - 0.609 : Importance of democracy [Q250]
  - 0.561 : Democracy: Women have the same rights as men [Q249]
  - -0.531 : Justifiable: Cheating on taxes [Q180]
  - 0.523 : Political system: Having a democratic political system (+) [Q238P]
  - -0.518 : Competition good or harmful [Q109]
-------------
--- Factor4 ---
  - 0.776 : Jobs scarce: Men should have more right to a job than women (+) [Q33P]
  - 0.704 : Men make better political leaders than women do (+) [Q29P]
  - 0.682 : The only acceptable religion  is my religion (+) [Q170P]
  - 0.672 : Jobs scarce: Employers should give priority to (nation) people than immigrants (+) [Q34P]
  - 0.631 : Neighbors: Homosexuals (+) [Q22P]
  - 0.625 : Neighbors: Unmarried couples living together (+) [Q25P]
  - 0.619 : Duty towards society to have children (+) [Q37P]
  - 0.587 : It is child's duty to take care of ill parent (+) [Q38P]
  - 0.581 : Whenever science and religion conflict,  religion is always right (+) [Q169P]
  - -0.577 : Justifiable: Sex before marriage [Q186]
  - -0.561 : Justifiable: Homosexuality [Q182]
  - 0.537 : Government has the right: Monitor all e-mails and any other information exchanged on the Internet (+) [Q197P]
  - 0.519 : Political system: Having the army rule (+) [Q237P]
  - 0.500 : Work should come first even if it means less spare time (+) [Q41P]
-------------
--- Factor5 ---
  - 0.791 : Neighbors: People of a different race (+) [Q19P]
  - 0.765 : Neighbors: People who speak a different language (+) [Q26P]
  - 0.734 : Neighbors: Immigrants/foreign workers (+) [Q21P]
  - 0.631 : Neighbors: People of a different religion (+) [Q23P]
-------------

Saved interpretation summary to: output\efa_detailed\01_20250506\4_strongest_loadings_interpretation.txt

--- 5. Factor Scores Plot ---
Generating Factor Score Plot (Factor 1 vs Factor 2)...
Saved Factor Score Plot to: output\efa_detailed\01_20250506\5_factor_score_scatter_F1_vs_F2.png

--- 6. Variance Explained and Factor Correlations ---
Saved Variance Explained to: output\efa_detailed\01_20250506\6_variance_explained.csv

Variance Summary:
 Factor  Sum of Squared Loadings (SSL)  Proportion Variance Explained  Cumulative Variance Explained
Factor1                         10.139                          0.105                          0.105
Factor2                          6.011                          0.062                          0.166
Factor3                          4.916                          0.051                          0.217
Factor4                         10.084                          0.104                          0.321
Factor5                          3.018                          0.031                          0.352

Total variance explained by 5 factors: 35.22%

Saved Factor Correlation Matrix (Phi) to: output\efa_detailed\01_20250506\7_factor_correlations_phi.csv

✓ Detailed EFA analysis complete. Results saved in: output\efa_detailed\01_20250506
```