In [1]:
# Dependencies
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [2]:
# Generate sample data
np.random.seed(42)
num_samples = 1000

# Latent variables (unobserved)
latent1 = np.random.normal(0, 1, num_samples)
latent2 = np.random.normal(0, 1, num_samples)
latent3 = np.random.normal(0, 1, num_samples)

# Observed variables (indicators)
observed1 = 0.5 * latent1 + np.random.normal(0, 0.5, num_samples)
observed2 = 0.7 * latent1 + np.random.normal(0, 0.7, num_samples)

observed3 = 0.3 * latent2 + np.random.normal(0, 0.3, num_samples)
observed4 = 0.6 * latent2 + np.random.normal(0, 0.6, num_samples)

observed5 = 0.4 * latent3 + np.random.normal(0, 0.4, num_samples)
observed6 = 0.8 * latent3 + np.random.normal(0, 0.8, num_samples)

# Create a DataFrame
data = pd.DataFrame({
    'Observed1': observed1,
    'Observed2': observed2,
    'Observed3': observed3,
    'Observed4': observed4,
    'Observed5': observed5,
    'Observed6': observed6,
})

In [3]:
# Build the SEM model
model = sm.OLS(endog=data[['Observed1', 'Observed2', 'Observed3', 'Observed4', 'Observed5', 'Observed6']],
               exog=sm.add_constant(data[['Observed1', 'Observed3', 'Observed5']]))  # Define the structural paths here
result = model.fit()

In [4]:
# Print the model summary
print(result)

<statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x000001B870DC7F10>


### Example 2

In [None]:
# pip install semopy

In [5]:
import pandas as pd
import numpy as np
import semopy


In [6]:
# Generate sample data
np.random.seed(42)
num_samples = 1000

# Latent variables (unobserved)
latent1 = np.random.normal(0, 1, num_samples)
latent2 = np.random.normal(0, 1, num_samples)
latent3 = np.random.normal(0, 1, num_samples)

# Observed variables (indicators)
observed1 = 0.5 * latent1 + np.random.normal(0, 0.5, num_samples)
observed2 = 0.7 * latent1 + np.random.normal(0, 0.7, num_samples)

observed3 = 0.3 * latent2 + np.random.normal(0, 0.3, num_samples)
observed4 = 0.6 * latent2 + np.random.normal(0, 0.6, num_samples)

observed5 = 0.4 * latent3 + np.random.normal(0, 0.4, num_samples)
observed6 = 0.8 * latent3 + np.random.normal(0, 0.8, num_samples)

# Create a DataFrame
data = pd.DataFrame({
    'Observed1': observed1,
    'Observed2': observed2,
    'Observed3': observed3,
    'Observed4': observed4,
    'Observed5': observed5,
    'Observed6': observed6,
})

In [7]:
# Build the SEM model
model_spec = '''
    Latent1 =~ Observed1 + Observed2 + Observed3 + Observed4 + Observed5 + Observed6
    Latent2 =~ Observed3 + Observed4
    Latent3 =~ Observed5 + Observed6
'''

model = semopy.Model(model_spec)
model.fit(data)

# Print the model summary
# print(model.summary())


SolverResult(fun=0.0027734972288557813, success=True, n_it=46, x=array([ 1.93643179e+00,  3.62816674e-01,  2.97151410e-01,  9.99684524e-01,
       -8.91823543e-02, -1.54303338e-01,  9.74353051e-01,  2.03989552e-20,
        2.89189042e-01,  5.47874117e-01,  9.77539293e-01,  3.27492937e-01,
        9.58517847e-20,  2.14946761e-01, -5.17959098e-05, -7.22776360e-02,
        3.19870004e-01,  3.36758548e-02,  1.68607028e-01]), message='Optimization terminated successfully', name_method='SLSQP', name_obj='MLW')

In [8]:
from semopy import Optimizer
opt2 = Optimizer(model)

obj_func_val = opt2.optimize()

In [9]:
from semopy.inspector import inspect
inspect(opt2)



Unnamed: 0,lval,op,rval,Estimate,Std. Err,z-value,p-value
0,Observed1,~,Latent1,1.0,-,-,-
1,Observed2,~,Latent1,1.936437,0.828016,2.338647,0.019354
2,Observed3,~,Latent1,0.3628071,0.18072,2.007565,0.04469
3,Observed3,~,Latent2,1.0,-,-,-
4,Observed4,~,Latent1,0.2971633,0.213386,1.392608,0.163738
5,Observed4,~,Latent2,0.9996858,0.909842,1.098746,0.271879
6,Observed5,~,Latent1,-0.08917426,0.091362,-0.976055,0.329037
7,Observed5,~,Latent3,1.0,-,-,-
8,Observed6,~,Latent1,-0.1543112,0.115964,-1.330678,0.183295
9,Observed6,~,Latent3,0.9743502,0.930875,1.046704,0.295236


In [10]:
from semopy import gather_statistics

stats = gather_statistics(opt2)
print (stats)



SEMStatistics(dof=2, ml=0.0027733837961863017, fun=0.0027733837961863017, chi2=(2.7733837961863017, 0.24990063550852315), dof_baseline=15, chi2_baseline=838.4568319508445, rmsea=0.019674322785555316, cfi=0.999060808331198, gfi=0.9966922759878604, agfi=0.9751920699089527, nfi=0.9966922759878604, tli=0.9929560624839853, aic=37.99445323240763, bic=131.24180353306824, params=[ParametersStatistics(value=1.936436542629728, se=0.8280158217721154, zscore=2.3386467887570523, pvalue=0.01935372116962708), ParametersStatistics(value=0.3628070739621391, se=0.1807199640375913, zscore=2.007564996442034, pvalue=0.044689537328554074), ParametersStatistics(value=0.29716328761637895, se=0.21338623656281688, zscore=1.3926075664561761, pvalue=0.16373849369233628), ParametersStatistics(value=0.9996858386517502, se=0.9098424218581179, zscore=1.0987461285977977, pvalue=0.2718788159084691), ParametersStatistics(value=-0.08917425920316367, se=0.09136191080918461, zscore=-0.9760551023110051, pvalue=0.32903715890