In [1]:
# Normality: Data in each group must be normally distributed. This test is sensitive to deviations from univariate normality.

# Independence: The samples for each group must be independent of one another.

# Random Sampling: Each group should be a random sample from its respective population.

# Scale: Data must be interval or ratio (i.e., numeric data with meaningful differences, like temperature or height). This is because the test relies on calculating variance, which isn't valid for categorical or ranked (ordinal) data.

In [2]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity


In [3]:
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)

In [4]:
df.columns


Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',
       'petal width (cm)'],
      dtype='object')

In [5]:

df.dropna(inplace=True)

In [6]:
# Factor Analysis (Bartlett's Test of Sphericity):
# It tests whether your correlation matrix is significantly different from an identity matrix.
# An identity matrix would mean your variables are completely uncorrelated, making a factor analysis pointless.

chi_square_value,p_value=calculate_bartlett_sphericity(df)
chi_square_value, p_value

# The null hypothesis is that the variables are uncorrelated (i.e., the correlation matrix is an identity matrix).
# The p-value tells you the probability of observing your data (or more extreme data) if the null hypothesis were true. 
# You compare this p-value to a pre-determined significance level (alpha, or α), which is typically set at 0.05. lower p-value means less chance for no correlation.

(706.9592430234756, 1.9226796044143468e-149)