# SEM Quick Start

In [1]:
import semopy

In [2]:
import semopy
import pandas as pd
desc = semopy.examples.political_democracy.get_model()
print(desc)

# measurement model
ind60 =~ x1 + x2 + x3
dem60 =~ y1 + y2 + y3 + y4
dem65 =~ y5 + y6 + y7 + y8
# regressions
dem60 ~ ind60
dem65 ~ ind60 + dem60
# residual correlations
y1 ~~ y5
y2 ~~ y4 + y6
y3 ~~ y7
y4 ~~ y8
y6 ~~ y8


In [3]:
data = semopy.examples.political_democracy.get_data()
print(data.head())

      y1        y2        y3        y4        y5        y6        y7  \
1   2.50  0.000000  3.333333  0.000000  1.250000  0.000000  3.726360   
2   1.25  0.000000  3.333333  0.000000  6.250000  1.100000  6.666666   
3   7.50  8.800000  9.999998  9.199991  8.750000  8.094061  9.999998   
4   8.90  8.800000  9.999998  9.199991  8.907948  8.127979  9.999998   
5  10.00  3.333333  9.999998  6.666666  7.500000  3.333333  9.999998   

         y8        x1        x2        x3  
1  3.333333  4.442651  3.637586  2.557615  
2  0.736999  5.384495  5.062595  3.568079  
3  8.211809  5.961005  6.255750  5.224433  
4  4.615086  6.285998  7.567863  6.267495  
5  6.666666  5.863631  6.818924  4.573679  


In [4]:
mod = semopy.Model(desc)
res = mod.fit(data)
print(res)

Name of objective: MLW
Optimization method: SLSQP
Optimization successful.
Optimization terminated successfully
Objective value: 0.508
Number of iterations: 52
Params: 2.180 1.819 1.257 1.058 1.265 1.186 1.280 1.266 1.482 0.572 0.838 0.624 1.893 1.320 2.156 7.385 0.793 5.067 0.347 3.148 1.357 4.954 0.172 0.082 2.352 3.430 0.120 0.467 3.256 3.951 0.448


In [5]:
ins = mod.inspect()
print(ins)

     lval  op   rval  Estimate  Std. Err    z-value   p-value
0   dem60   ~  ind60  1.482379  0.399024   3.715017  0.000203
1   dem65   ~  ind60  0.571912  0.221383   2.583364  0.009784
2   dem65   ~  dem60  0.837574  0.098446   8.507992       0.0
3      x1   ~  ind60  1.000000         -          -         -
4      x2   ~  ind60  2.180494  0.138565  15.736254       0.0
5      x3   ~  ind60  1.818546  0.151993   11.96465       0.0
6      y1   ~  dem60  1.000000         -          -         -
7      y2   ~  dem60  1.256819  0.182687   6.879647       0.0
8      y3   ~  dem60  1.058174  0.151521   6.983699       0.0
9      y4   ~  dem60  1.265186  0.145151   8.716344       0.0
10     y5   ~  dem65  1.000000         -          -         -
11     y6   ~  dem65  1.185743  0.168908   7.020032       0.0
12     y7   ~  dem65  1.279717  0.159996    7.99841       0.0
13     y8   ~  dem65  1.266084  0.158238   8.001141       0.0
14  dem65  ~~  dem65  0.172210  0.214861   0.801494  0.422846
15  dem6

# Model Class

In [6]:
from semopy import Model
model = Model(desc)
opt_res = model.fit(data)
estimates = model.inspect()

# Displaying results

In [7]:
# Built-in examples
# Univariate regression

from semopy import Model
from semopy.examples import univariate_regression

desc = univariate_regression.get_model()
data = univariate_regression.get_data()
print(desc)

y ~ x


In [13]:
# Fit model to data:
mod = Model(desc)
res_opt = mod.fit(data)
estimates = mod.inspect()
print(res_opt)

Name of objective: MLW
Optimization method: SLSQP
Optimization successful.
Optimization terminated successfully
Objective value: 0.000
Number of iterations: 11
Params: -1.221 0.670


In [15]:
# Printing parameter estimates by print(estimates):\
print(estimates)

  lval  op rval  Estimate  Std. Err    z-value       p-value
0    y   ~    x -1.221069  0.083165 -14.682538  0.000000e+00
1    y  ~~    y  0.670367  0.094804   7.071068  1.537437e-12


# Univariate regression with multiple regressors

In [16]:
# import model description and data:
from semopy import Model
from semopy.examples import univariate_regression_many

desc = univariate_regression_many.get_model()
data = univariate_regression_many.get_data()
print(desc)

y ~ x1 + x2 + x3


In [17]:
# Fit model to data:
mod = Model(desc)
res_opt = mod.fit(data)
estimates = mod.inspect()

In [18]:
# Inspecting optimization information by print(res_opt):
print(res_opt)

Name of objective: MLW
Optimization method: SLSQP
Optimization successful.
Optimization terminated successfully
Objective value: 0.000
Number of iterations: 13
Params: 1.400 0.451 1.190 0.878


In [19]:
# Printing parameter estimates by print(estimates):
print(estimates)

  lval  op rval  Estimate  Std. Err    z-value       p-value
0    y   ~   x1  1.399551  0.091138  15.356385  0.000000e+00
1    y   ~   x2  0.450561  0.097883   4.603051  4.163465e-06
2    y   ~   x3  1.190470  0.086499  13.762839  0.000000e+00
3    y  ~~    y  0.878486  0.124237   7.071068  1.537437e-12


# Multivariate regression

In [20]:
# Import model description and data:
from semopy import Model
from semopy.examples import multivariate_regression

desc = multivariate_regression.get_model()
data = multivariate_regression.get_data()
print(desc)

y1, y2, y3 ~ x1 + x2 + x3


In [21]:
# Fit model to data:
mod = Model(desc)
res_opt = mod.fit(data)
estimates = mod.inspect()

In [22]:
# Inspecting optimization information by print(res_opt):
print(res_opt)

Name of objective: MLW
Optimization method: SLSQP
Optimization successful.
Optimization terminated successfully
Objective value: 0.068
Number of iterations: 17
Params: -1.390 -1.138 -0.318 -0.746 1.074 -1.131 0.703 1.235 -0.920 0.489 1.136 0.638


In [23]:
# Printing parameter estimates by print(estimates):
print(estimates)

   lval  op rval  Estimate  Std. Err    z-value       p-value
0    y1   ~   x1 -1.389754  0.073417 -18.929470  0.000000e+00
1    y1   ~   x2 -1.138405  0.087966 -12.941462  0.000000e+00
2    y1   ~   x3 -0.317893  0.072576  -4.380132  1.186073e-05
3    y2   ~   x1 -0.745837  0.097974  -7.612623  2.686740e-14
4    y2   ~   x2  1.074436  0.117388   9.152855  0.000000e+00
5    y2   ~   x3 -1.130890  0.096851 -11.676597  0.000000e+00
6    y3   ~   x1  0.702778  0.064270  10.934755  0.000000e+00
7    y3   ~   x2  1.235044  0.077006  16.038334  0.000000e+00
8    y3   ~   x3 -0.920469  0.063534 -14.487925  0.000000e+00
9    y3  ~~   y3  0.488735  0.069118   7.071068  1.537437e-12
10   y2  ~~   y2  1.135729  0.160616   7.071068  1.537437e-12
11   y1  ~~   y1  0.637755  0.090192   7.071068  1.537437e-12


# A classic Hozlinger-Swineford dataset and CFA model.

In [24]:
# Import model description and data:
from semopy import Model
from semopy.examples import holzinger39

desc = holzinger39.get_model()
data = holzinger39.get_data()
print(desc)

visual =~ x1 + x2 + x3
textual =~ x4 + x5 + x6
speed =~ x7 + x8 + x9


In [25]:
# Fit model to data:
mod = Model(desc)
res_opt = mod.fit(data)
estimates = mod.inspect()

In [26]:
# Inspecting optimization information by print(res_opt):
print(res_opt)

Name of objective: MLW
Optimization method: SLSQP
Optimization successful.
Optimization terminated successfully
Objective value: 0.283
Number of iterations: 28
Params: 0.554 0.731 1.113 0.926 1.180 1.083 0.550 0.356 0.800 0.488 1.133 0.844 0.446 0.566 0.371 0.383 0.262 0.174 0.808 0.408 0.980


In [27]:
# Printing parameter estimates by print(estimates):
print(estimates)

       lval  op     rval  Estimate  Std. Err    z-value   p-value
0        x1   ~   visual  1.000000         -          -         -
1        x2   ~   visual  0.554421  0.099727   5.559413       0.0
2        x3   ~   visual  0.730526   0.10918   6.691009       0.0
3        x4   ~  textual  1.000000         -          -         -
4        x5   ~  textual  1.113076  0.065392  17.021522       0.0
5        x6   ~  textual  0.926120  0.055425  16.709493       0.0
6        x7   ~    speed  1.000000         -          -         -
7        x8   ~    speed  1.179980  0.165045   7.149459       0.0
8        x9   ~    speed  1.082517  0.151354   7.152197       0.0
9     speed  ~~    speed  0.383377  0.086171   4.449045  0.000009
10    speed  ~~   visual  0.262135  0.056252   4.659977  0.000003
11    speed  ~~  textual  0.173603  0.049316   3.520223  0.000431
12   visual  ~~   visual  0.808310  0.145287   5.563548       0.0
13   visual  ~~  textual  0.408277  0.073527    5.55273       0.0
14  textua

# Bollen's Data on Industrialization and Political Democracy is a common benchmark amongst SEM tools.

In [28]:
# Import model description and data:
from semopy import Model
from semopy.examples import political_democracy

desc = political_democracy.get_model()
data = political_democracy.get_data()
print(desc)

# measurement model
ind60 =~ x1 + x2 + x3
dem60 =~ y1 + y2 + y3 + y4
dem65 =~ y5 + y6 + y7 + y8
# regressions
dem60 ~ ind60
dem65 ~ ind60 + dem60
# residual correlations
y1 ~~ y5
y2 ~~ y4 + y6
y3 ~~ y7
y4 ~~ y8
y6 ~~ y8


In [29]:
# Fit model to data:
mod = Model(desc)
res_opt = mod.fit(data)
estimates = mod.inspect()

In [30]:
# Inspecting optimization information by print(res_opt):
print(res_opt)

Name of objective: MLW
Optimization method: SLSQP
Optimization successful.
Optimization terminated successfully
Objective value: 0.508
Number of iterations: 52
Params: 2.180 1.819 1.257 1.058 1.265 1.186 1.280 1.266 1.482 0.572 0.838 0.624 1.893 1.320 2.156 7.385 0.793 5.067 0.347 3.148 1.357 4.954 0.172 0.082 2.352 3.430 0.120 0.467 3.256 3.951 0.448


In [31]:
# Printing parameter estimates by print(estimates):
print(estimates)

     lval  op   rval  Estimate  Std. Err    z-value   p-value
0   dem60   ~  ind60  1.482379  0.399024   3.715017  0.000203
1   dem65   ~  ind60  0.571912  0.221383   2.583364  0.009784
2   dem65   ~  dem60  0.837574  0.098446   8.507992       0.0
3      x1   ~  ind60  1.000000         -          -         -
4      x2   ~  ind60  2.180494  0.138565  15.736254       0.0
5      x3   ~  ind60  1.818546  0.151993   11.96465       0.0
6      y1   ~  dem60  1.000000         -          -         -
7      y2   ~  dem60  1.256819  0.182687   6.879647       0.0
8      y3   ~  dem60  1.058174  0.151521   6.983699       0.0
9      y4   ~  dem60  1.265186  0.145151   8.716344       0.0
10     y5   ~  dem65  1.000000         -          -         -
11     y6   ~  dem65  1.185743  0.168908   7.020032       0.0
12     y7   ~  dem65  1.279717  0.159996    7.99841       0.0
13     y8   ~  dem65  1.266084  0.158238   8.001141       0.0
14  dem65  ~~  dem65  0.172210  0.214861   0.801494  0.422846
15  dem6

# SEM example model

In [51]:
# Import model description and data:
from semopy import model
from semopy.examples import example_model

desc = examples_model.get_model()
data = examples_model.get_data()

ImportError: cannot import name 'example_model' from 'semopy.examples' (C:\Users\SAMUEL\anaconda3\lib\site-packages\semopy\examples\__init__.py)

In [48]:
# Fit model to data:
mod = Model(desc)
res_opt = mod.fit(data)
estimates = mod.inspect()

In [49]:
# Inspecting optimization information by print(res_opt):
print(res_opt)

Name of objective: MLW
Optimization method: SLSQP
Optimization successful.
Optimization terminated successfully
Objective value: 0.508
Number of iterations: 52
Params: 2.180 1.819 1.257 1.058 1.265 1.186 1.280 1.266 1.482 0.572 0.838 0.624 1.893 1.320 2.156 7.385 0.793 5.067 0.347 3.148 1.357 4.954 0.172 0.082 2.352 3.430 0.120 0.467 3.256 3.951 0.448


In [50]:
# Printing parameter estimates by print(estimates):
print(estimates)

     lval  op   rval  Estimate  Std. Err    z-value   p-value
0   dem60   ~  ind60  1.482379  0.399024   3.715017  0.000203
1   dem65   ~  ind60  0.571912  0.221383   2.583364  0.009784
2   dem65   ~  dem60  0.837574  0.098446   8.507992       0.0
3      x1   ~  ind60  1.000000         -          -         -
4      x2   ~  ind60  2.180494  0.138565  15.736254       0.0
5      x3   ~  ind60  1.818546  0.151993   11.96465       0.0
6      y1   ~  dem60  1.000000         -          -         -
7      y2   ~  dem60  1.256819  0.182687   6.879647       0.0
8      y3   ~  dem60  1.058174  0.151521   6.983699       0.0
9      y4   ~  dem60  1.265186  0.145151   8.716344       0.0
10     y5   ~  dem65  1.000000         -          -         -
11     y6   ~  dem65  1.185743  0.168908   7.020032       0.0
12     y7   ~  dem65  1.279717  0.159996    7.99841       0.0
13     y8   ~  dem65  1.266084  0.158238   8.001141       0.0
14  dem65  ~~  dem65  0.172210  0.214861   0.801494  0.422846
15  dem6

# Syntax and Constraints
Naming and fixing parameters

# Operations and constraints

In [69]:
# Parameters can be fixed to a constant value by specifying a float multiplier:
from semopy import Model
from semopy.examples import \
     multivariate_regression

desc = '''y ~ x1 + 6*x2 + x3'''
data = multivariate_regression.get_data()
mod = Model(desc)
mod.fit(data)
print(mod.inspect())

KeyError: 'Variables y are missing from data.'

# Prediction and imputation

In [70]:
from semopy.examples import political_democracy
from semopy import ModelMeans
import numpy as np

desc = political_democracy.get_model()
data = political_democracy.get_data()

i, v = 0, 'x1'
x = data[v].values[i]

data[v].values[i] = float('nan')
model = ModelMeans(desc)
model.fit(data, )
preds = model.predict(data)
diff = np.abs((x - preds[v].values[i])/x)
print('{:.2f}%'.format(diff * 100))

2.17%


# Factor scores

In [71]:
from semopy.examples import political_democracy
from semopy import Model

desc = political_democracy.get_model()
data = political_democracy.get_data()

model = Model(desc)
model.fit(data)
factors = model.predict_factors(data)
print(factors.head())

      dem60     dem65     ind60
0 -2.539528 -2.368729 -0.536216
1 -2.345362 -1.513466  0.151093
2  2.925690  2.998107  0.740218
3  2.883632  3.021921  1.244115
4  2.428383  2.422393  0.831255
