In [1]:
import pandas as pd
import statsmodels.formula.api as smf
import statsmodels.api as sm
from statsmodels.formula.api import mixedlm

In [2]:
### Data for regression model
democracy_dimensions = pd.read_csv('/Users/yvette/Desktop/data/Final/cos_sim_democracy.csv')
dimensions_dimensions = pd.read_csv('/Users/yvette/Desktop/data/Final/cos_sim_dimensions.csv')
democracy_dimensions_bootstrap_mean = pd.read_csv('/Users/yvette/Desktop/data/Final/agg_df_democracy.csv')
dimensions_dimensions_bootstrap_mean = pd.read_csv('/Users/yvette/Desktop/data/Final/agg_df_dimension.csv')

In [3]:
### Dependent variable: Cosine Similarity between democracy and dimensions
democracy_dimensions['Journal']= democracy_dimensions['Subcorpus'].apply(lambda x: 'American' if 'American' in x else 'British')
democracy_dimensions['Decade'] = democracy_dimensions['Subcorpus'].apply(lambda x: x.split('_')[0][:4] + 's')
democracy_dimensions = democracy_dimensions[democracy_dimensions['Dimension'] != 'Democracy  vs.  Dictatorship']
democracy_dimensions['Dimension'] = democracy_dimensions['Dimension'].astype('category')
democracy_dimensions['Journal_dummy'] = democracy_dimensions['Journal'].map({'American': 0, 'British': 1})
decade_mapping = {
    '1971s': 1,
    '1981s': 2,
    '1991s': 3,
    '2001s': 4,
    '2011s': 5,
    '2021s': 6
}

democracy_dimensions['Decade_linear'] = democracy_dimensions['Decade'].map(decade_mapping)
democracy_dimensions_regression = democracy_dimensions[['Subcorpus','Dimension', 'Journal_dummy', 'Decade_linear', 'Cosine_Similarity']].copy()
democracy_dimensions_regression['Dimension']  = democracy_dimensions_regression['Dimension'].cat.reorder_categories(
    ['Electoral',
     'Liberal', 
     'Deliberative', 
     'Participatory',
     'Egalitarian'], 
    ordered=False
)

In [4]:
### Dependent variable: Cosine Similarity between democracy and dimensions but with bootstrap mean
democracy_dimensions_bootstrap_mean['Journal']= democracy_dimensions_bootstrap_mean['Subcorpus'].apply(lambda x: 'American' if 'American' in x else 'British')
democracy_dimensions_bootstrap_mean['Decade'] = democracy_dimensions_bootstrap_mean['Subcorpus'].apply(lambda x: x.split('_')[0][:4] + 's')
democracy_dimensions_bootstrap_mean = democracy_dimensions_bootstrap_mean[democracy_dimensions_bootstrap_mean['Dimension'] != 'Democracy  vs.  Dictatorship']
democracy_dimensions_bootstrap_mean['Dimension'] = democracy_dimensions_bootstrap_mean['Dimension'].astype('category')
democracy_dimensions_bootstrap_mean['Journal_dummy'] = democracy_dimensions_bootstrap_mean['Journal'].map({'American': 0, 'British': 1})
democracy_dimensions_bootstrap_mean['Decade_linear'] = democracy_dimensions_bootstrap_mean['Decade'].map(decade_mapping)
democracy_dimensions_bootstrap_mean_regression = democracy_dimensions_bootstrap_mean[['Dimension', 'Journal_dummy', 'Decade_linear', 'mean']].copy()
democracy_dimensions_bootstrap_mean_regression['Dimension']  = democracy_dimensions_bootstrap_mean_regression['Dimension'].cat.reorder_categories(
    ['Electoral',
     'Liberal', 
     'Deliberative', 
     'Participatory',
     'Egalitarian'], 
    ordered=False
)

In [5]:
### Dependent variable: Cosine Similarity between dimensions and dimensions
dimensions_dimensions['Journal']= dimensions_dimensions['Subcorpus'].apply(lambda x: 'American' if 'American' in x else 'British')
dimensions_dimensions['Decade'] = dimensions_dimensions['Subcorpus'].apply(lambda x: x.split('_')[0][:4] + 's')
dimensions_dimensions = dimensions_dimensions[dimensions_dimensions['Dimension 1'] == 'Democracy  vs.  Dictatorship']
dimensions_dimensions['Journal_dummy'] = dimensions_dimensions['Journal'].map({'American': 0, 'British': 1})
dimensions_dimensions['Decade_linear'] = dimensions_dimensions['Decade'].map(decade_mapping)
dimensions_dimensions['Dimension_Pair'] = dimensions_dimensions['Dimension 1'] + ' & ' + dimensions_dimensions['Dimension 2']
dimensions_dimensions_regression = dimensions_dimensions[['Subcorpus','Dimension_Pair', 'Journal_dummy', 'Decade_linear', 'Cosine_Similarity']].copy()
dimensions_dimensions_regression['Dimension_Pair']  = dimensions_dimensions_regression['Dimension_Pair'].astype('category')
dimensions_dimensions_regression['Dimension_Pair']  = dimensions_dimensions_regression['Dimension_Pair'].cat.reorder_categories(
    ['Democracy  vs.  Dictatorship & Electoral',
     'Democracy  vs.  Dictatorship & Liberal', 
     'Democracy  vs.  Dictatorship & Deliberative', 
     'Democracy  vs.  Dictatorship & Participatory',
     'Democracy  vs.  Dictatorship & Egalitarian'], 
    ordered=False
)

In [6]:
### Dependent variable: Cosine Similarity between dimensions and dimensions but with bootstrap mean
dimensions_dimensions_bootstrap_mean['Journal']= dimensions_dimensions_bootstrap_mean['Subcorpus'].apply(lambda x: 'American' if 'American' in x else 'British')
dimensions_dimensions_bootstrap_mean['Decade'] = dimensions_dimensions_bootstrap_mean['Subcorpus'].apply(lambda x: x.split('_')[0][:4] + 's')
dimensions_dimensions_bootstrap_mean = dimensions_dimensions_bootstrap_mean[
dimensions_dimensions_bootstrap_mean['Dimension_Pair'].str.contains('Democracy  vs.  Dictatorship')]
dimensions_dimensions_bootstrap_mean['Journal_dummy'] = dimensions_dimensions_bootstrap_mean['Journal'].map({'American': 0, 'British': 1})
dimensions_dimensions_bootstrap_mean['Decade_linear'] = dimensions_dimensions_bootstrap_mean['Decade'].map(decade_mapping)
dimensions_dimensions_bootstrap_mean_regression = dimensions_dimensions_bootstrap_mean[['Dimension_Pair', 'Journal_dummy', 'Decade_linear', 'mean']].copy()
dimensions_dimensions_bootstrap_mean_regression['Dimension_Pair']  = dimensions_dimensions_bootstrap_mean_regression['Dimension_Pair'].astype('category')
dimensions_dimensions_bootstrap_mean_regression['Dimension_Pair']  = dimensions_dimensions_bootstrap_mean_regression['Dimension_Pair'].cat.reorder_categories(
    ['Democracy  vs.  Dictatorship & Electoral',
     'Democracy  vs.  Dictatorship & Liberal', 
     'Democracy  vs.  Dictatorship & Deliberative', 
     'Democracy  vs.  Dictatorship & Participatory',
     'Democracy  vs.  Dictatorship & Egalitarian'], 
    ordered=False
)

In [None]:
### ols regression model with Dependent variable: Cosine Similarity between democracy and dimensions bootstrap mean all 
dimension_dummies = pd.get_dummies(democracy_dimensions_bootstrap_mean_regression['Dimension'], drop_first=True)
dimension_dummies = dimension_dummies.astype('int64')
# Add the dummy variables to the independent variables (X)
X = democracy_dimensions_bootstrap_mean_regression[['Journal_dummy', 'Decade_linear']].join(dimension_dummies)
# Create the interaction term between 'Dimension' and 'Decade_linear'
for dimension in dimension_dummies.columns:
    X[f'{dimension}_x_Decade'] = X[dimension] * X['Decade_linear']

for dimension in dimension_dummies.columns:
     X[f'{dimension}_x_Journal'] = X[dimension] * X['Journal_dummy']
# Ensure all columns in X are numeric
X = X.apply(pd.to_numeric, errors='coerce')
y = democracy_dimensions_bootstrap_mean_regression['mean']
# Add a constant (intercept) to the model
X = sm.add_constant(X)

# Fit the OLS regression model
ols_model = sm.OLS(y, X).fit()

# Print the regression results
print(ols_model.summary())


# Get dimension dummies 
dimension_dummies = pd.get_dummies(democracy_dimensions_bootstrap_mean_regression['Dimension'], drop_first=True).astype('int64')

# dimension only
X1_1 = dimension_dummies.copy()
X1_1 = sm.add_constant(X1_1)  
ols_model1_1 = sm.OLS(y, X1_1).fit()
print(ols_model1_1.summary())

# decade only
X1_2 = democracy_dimensions_bootstrap_mean_regression[['Decade_linear']].copy()
X1_2 = sm.add_constant(X1_2)
ols_model1_2 = sm.OLS(y, X1_2).fit()
print(ols_model1_2.summary())

# journal only
X1_3 = democracy_dimensions_bootstrap_mean_regression[['Journal_dummy']].copy()
X1_3 = sm.add_constant(X1_3)
ols_model1_3 = sm.OLS(y, X1_3).fit()
print(ols_model1_3.summary())

# decade x dimension
X1_4 = democracy_dimensions_bootstrap_mean_regression[['Decade_linear']].join(dimension_dummies)
# Add interaction terms
for dimension in dimension_dummies.columns:
    X1_4[f'{dimension}_x_Decade'] = X1_4[dimension] * X1_4['Decade_linear']
X1_4 = sm.add_constant(X1_4)
ols_model1_4 = sm.OLS(y, X1_4).fit()
print(ols_model1_4.summary())

# X1_5 = pd.DataFrame({'const': 1}, index=democracy_dimensions_bootstrap_mean_regression.index)
# ols_model1_5 = sm.OLS(y, X1_5).fit()
# print(ols_model1_5.summary())


# journal x dimension
X_journal_dim_interaction = democracy_dimensions_bootstrap_mean_regression[['Journal_dummy']].join(dimension_dummies)

# Add interaction terms between Journal_dummy and each Dimension dummy
for dimension in dimension_dummies.columns:
    # The interaction term is the product of the journal dummy (0 or 1) and the dimension dummy (0 or 1)
    X_journal_dim_interaction[f'{dimension}_x_Journal'] = X_journal_dim_interaction[dimension] * X_journal_dim_interaction['Journal_dummy']

# Add a constant (intercept) to the model
X_journal_dim_interaction = sm.add_constant(X_journal_dim_interaction)

# Fit the OLS regression model
ols_model_journal_dim_interaction = sm.OLS(y, X_journal_dim_interaction).fit()

# Print the regression results
print("\n--- OLS Model (New): Journal, Dimensions, and Journal x Dimension Interaction ---")
print(ols_model_journal_dim_interaction.summary())


                            OLS Regression Results                            
Dep. Variable:                   mean   R-squared:                       0.691
Model:                            OLS   Adj. R-squared:                  0.595
Method:                 Least Squares   F-statistic:                     7.187
Date:                Mon, 21 Apr 2025   Prob (F-statistic):           1.74e-07
Time:                        11:31:04   Log-Likelihood:                 130.68
No. Observations:                  60   AIC:                            -231.4
Df Residuals:                      45   BIC:                            -199.9
Df Model:                          14                                         
Covariance Type:            nonrobust                                         
                              coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------
const                     

In [None]:
### ols regression model with Dependent variable: Cosine Similarity between democracy and dimensions bootstrap mean
dimension_pair_dummies = pd.get_dummies(dimensions_dimensions_bootstrap_mean_regression['Dimension_Pair'], drop_first=True)
dimension_pair_dummies = dimension_pair_dummies.astype('int64')
# Add the dummy variables to the independent variables (X)
X = dimensions_dimensions_bootstrap_mean_regression[['Journal_dummy', 'Decade_linear']].join(dimension_pair_dummies)
# Create the interaction term between 'Dimension' and 'Decade_linear'
for dimension_pair in dimension_pair_dummies.columns:
    X[f'{dimension_pair}_x_Decade'] = X[dimension_pair] * X['Decade_linear']

for dimension_pair in dimension_pair_dummies.columns:
    X[f'{dimension_pair}_x_Journal'] = X[dimension_pair] * X['Journal_dummy']
# Ensure all columns in X are numeric
X = X.apply(pd.to_numeric, errors='coerce')
y = dimensions_dimensions_bootstrap_mean_regression['mean']
# Add a constant (intercept) to the model
X = sm.add_constant(X)

# Fit the OLS regression model
ols_model2 = sm.OLS(y, X).fit()

# Print the regression results
print(ols_model2.summary())

X2_1 = dimension_pair_dummies.copy()
X2_1 = sm.add_constant(X2_1)  # add intercept
ols_model2_1 = sm.OLS(y, X2_1).fit()
print(ols_model2_1.summary())

X2_2 = dimensions_dimensions_bootstrap_mean_regression[['Decade_linear']].copy()
X2_2 = sm.add_constant(X2_2)
ols_model2_2 = sm.OLS(y, X2_2).fit()
print(ols_model2_2.summary())

X2_3 = dimensions_dimensions_bootstrap_mean_regression[['Journal_dummy']].copy()
X2_3 = sm.add_constant(X2_3)
ols_model2_3 = sm.OLS(y, X2_3).fit()
print(ols_model2_3.summary())

# Reuse the dimension_dummies created above
X2_4 = dimensions_dimensions_bootstrap_mean_regression[['Decade_linear']].join(dimension_pair_dummies)

# Add interaction terms: Dimension_Pair x Decade_linear
for dimension_pair in dimension_pair_dummies.columns:
    X2_4[f'{dimension_pair}_x_Decade'] = X2_4[dimension_pair] * X2_4['Decade_linear']

X2_4 = sm.add_constant(X2_4)
ols_model2_4 = sm.OLS(y, X2_4).fit()
print(ols_model2_4.summary())

#X2_5 = pd.DataFrame({'const': 1}, index=dimensions_dimensions_bootstrap_mean_regression.index)
#ols_model2_5 = sm.OLS(y, X2_5).fit()
#print(ols_model2_5.summary())

# journal x dimension
X = dimensions_dimensions_bootstrap_mean_regression[['Journal_dummy']].join(dimension_pair_dummies)
# Create the interaction term between 'Dimension' and 'Decade_linear'
for dimension_pair in dimension_pair_dummies.columns:
    X[f'{dimension_pair}_x_Journal'] = X[dimension_pair] * X['Journal_dummy']
# Add a constant (intercept) to the model
X_journal_dim_interaction = sm.add_constant(X)
# Fit the OLS regression model
ols_model_journal_dim_interaction = sm.OLS(y, X_journal_dim_interaction).fit()

# Print the regression results
print("\n--- OLS Model (New): Journal, Dimensions, and Journal x Dimension Interaction ---")
print(ols_model_journal_dim_interaction.summary())


                            OLS Regression Results                            
Dep. Variable:                   mean   R-squared:                       0.146
Model:                            OLS   Adj. R-squared:                 -0.120
Method:                 Least Squares   F-statistic:                    0.5493
Date:                Mon, 21 Apr 2025   Prob (F-statistic):              0.889
Time:                        11:58:11   Log-Likelihood:                 111.31
No. Observations:                  60   AIC:                            -192.6
Df Residuals:                      45   BIC:                            -161.2
Df Model:                          14                                         
Covariance Type:            nonrobust                                         
                                                             coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------

In [10]:
import statsmodels.formula.api as smf
import pandas as pd

# Assuming democracy_dimensions_regression DataFrame is loaded and preprocessed
# with necessary columns (Cosine_Similarity, Decade_linear, Journal_dummy, C(Dimension), Subcorpus)

print("--- Models for Cosine Similarity between Democracy and Dimensions (ML) ---")

# Full Model (ML)
model = smf.mixedlm("Cosine_Similarity ~ C(Dimension) * Decade_linear + C(Dimension) * Journal_dummy",
                data=democracy_dimensions_regression,
                groups=democracy_dimensions_regression["Subcorpus"])
result_ml = model.fit(reml=False) # Fit using ML
print("\nFull Model Summary (ML):")
print(result_ml.summary()) # Summary includes AIC and BIC

# Access and print AIC and BIC
print(f"Full Model (ML) AIC: {result_ml.aic}")
print(f"Full Model (ML) BIC: {result_ml.bic}")


# Dimension-Only Model (ML)
model1_1 = smf.mixedlm("Cosine_Similarity ~ C(Dimension)",
                data=democracy_dimensions_regression,
                groups=democracy_dimensions_regression["Subcorpus"])
result1_1_ml = model1_1.fit(reml=False) # Fit using ML
print("\nDimension-Only Model Summary (ML):")
print(result1_1_ml.summary()) # Summary includes AIC and BIC

# Access and print AIC and BIC
print(f"Dimension-Only Model (ML) AIC: {result1_1_ml.aic}")
print(f"Dimension-Only Model (ML) BIC: {result1_1_ml.bic}")


# Decade-Only Model (ML)
model1_2 = smf.mixedlm("Cosine_Similarity ~ Decade_linear",
                data=democracy_dimensions_regression,
                groups=democracy_dimensions_regression["Subcorpus"])
result1_2_ml = model1_2.fit(reml=False) # Fit using ML
print("\nDecade-Only Model Summary (ML):")
print(result1_2_ml.summary()) # Summary includes AIC and BIC

# Access and print AIC and BIC
print(f"Decade-Only Model (ML) AIC: {result1_2_ml.aic}")
print(f"Decade-Only Model (ML) BIC: {result1_2_ml.bic}")


# Journal-Only Model (ML)
model1_3 = smf.mixedlm("Cosine_Similarity ~ Journal_dummy",
                data=democracy_dimensions_regression,
                groups=democracy_dimensions_regression["Subcorpus"])
result1_3_ml = model1_3.fit(reml=False) # Fit using ML
print("\nJournal-Only Model Summary (ML):")
print(result1_3_ml.summary()) # Summary includes AIC and BIC

# Access and print AIC and BIC
print(f"Journal-Only Model (ML) AIC: {result1_3_ml.aic}")
print(f"Journal-Only Model (ML) BIC: {result1_3_ml.bic}")


# Full Model (Without Journal) (ML)
model1_4 = smf.mixedlm("Cosine_Similarity ~ C(Dimension) * Decade_linear",
                data=democracy_dimensions_regression,
                groups=democracy_dimensions_regression["Subcorpus"])
result1_4_ml = model1_4.fit(reml=False) # Fit using ML
print("\nFull Model (Without Journal) Summary (ML):")
print(result1_4_ml.summary()) # Summary includes AIC and BIC

# Access and print AIC and BIC
print(f"Full Model (Without Journal) (ML) AIC: {result1_4_ml.aic}")
print(f"Full Model (Without Journal) (ML) BIC: {result1_4_ml.bic}")


# Intercept-Only Model (ML)
model1_5 = smf.mixedlm("Cosine_Similarity ~ 1",
                data=democracy_dimensions_regression,
                groups=democracy_dimensions_regression["Subcorpus"])
result1_5_ml = model1_5.fit(reml=False) # Fit using ML
print("\nIntercept-Only Model Summary (ML):")
print(result1_5_ml.summary()) # Summary includes AIC and BIC

# Access and print AIC and BIC
print(f"Intercept-Only Model (ML) AIC: {result1_5_ml.aic}")
print(f"Intercept-Only Model (ML) BIC: {result1_5_ml.bic}")

result1_6_ml = smf.mixedlm("Cosine_Similarity ~ C(Dimension) * Journal_dummy",
                data=democracy_dimensions_regression,
                groups=democracy_dimensions_regression["Subcorpus"])
result1_6_ml = result1_6_ml.fit(reml=False) # Fit using ML
print("\n Model Summary (ML):")
print(result1_6_ml.summary()) # Summary includes AIC and BIC
# Access and print AIC and BIC  
print(f"Model (ML) AIC: {result1_6_ml.aic}")
print(f"Model (ML) BIC: {result1_6_ml.bic}")   


--- Models for Cosine Similarity between Democracy and Dimensions (ML) ---

Full Model Summary (ML):
                         Mixed Linear Model Regression Results
Model:                    MixedLM         Dependent Variable:         Cosine_Similarity
No. Observations:         1494            Method:                     ML               
No. Groups:               12              Scale:                      0.0025           
Min. group size:          119             Log-Likelihood:             2340.6530        
Max. group size:          125             Converged:                  Yes              
Mean group size:          124.5                                                        
---------------------------------------------------------------------------------------
                                            Coef.  Std.Err.    z    P>|z| [0.025 0.975]
---------------------------------------------------------------------------------------
Intercept                                   



                         Mixed Linear Model Regression Results
Model:                    MixedLM         Dependent Variable:         Cosine_Similarity
No. Observations:         1494            Method:                     ML               
No. Groups:               12              Scale:                      0.0026           
Min. group size:          119             Log-Likelihood:             2322.9180        
Max. group size:          125             Converged:                  Yes              
Mean group size:          124.5                                                        
---------------------------------------------------------------------------------------
                                            Coef.  Std.Err.    z    P>|z| [0.025 0.975]
---------------------------------------------------------------------------------------
Intercept                                    0.092    0.012   7.450 0.000  0.068  0.116
C(Dimension)[T.Liberal]                     -0.145    0.0



In [16]:
import statsmodels.formula.api as smf
import pandas as pd

# Assuming dimensions_dimensions_regression DataFrame is already loaded and preprocessed
# with necessary columns (Cosine_Similarity, Decade_linear, Journal_dummy, C(Dimension_Pair), Subcorpus)

print("--- Models for Cosine Similarity between Dimensions and Dimensions (ML) ---")

# Intercept-Only Model (ML)
#model2_5 = smf.mixedlm("Cosine_Similarity ~ 1",
                   #data=dimensions_dimensions_regression,
                   #groups=dimensions_dimensions_regression["Subcorpus"])
#result2_5_ml = model2_5.fit(reml=False) # Fit using ML
#print("\nIntercept-Only Model Summary (ML):")
#print(result2_5_ml.summary()) # Summary includes AIC and BIC

# Access and print AIC and BIC
#print(f"Intercept-Only Model (ML) AIC: {result2_5_ml.aic}")
#print(f"Intercept-Only Model (ML) BIC: {result2_5_ml.bic}")


# Time-Only Model (ML)
model2_2 = smf.mixedlm("Cosine_Similarity ~ Decade_linear",
                   data=dimensions_dimensions_regression,
                   groups=dimensions_dimensions_regression["Subcorpus"])
result2_2_ml = model2_2.fit(reml=False) # Fit using ML
print("\nTime-Only Model Summary (ML):")
print(result2_2_ml.summary()) # Summary includes AIC and BIC

# Access and print AIC and BIC
print(f"Time-Only Model (ML) AIC: {result2_2_ml.aic}")
print(f"Time-Only Model (ML) BIC: {result2_2_ml.bic}")


# Journal-Only Model (ML)
model2_3 = smf.mixedlm("Cosine_Similarity ~ Journal_dummy",
                   data=dimensions_dimensions_regression,
                   groups=dimensions_dimensions_regression["Subcorpus"])
result2_3_ml = model2_3.fit(reml=False) # Fit using ML
print("\nJournal-Only Model Summary (ML):")
print(result2_3_ml.summary()) # Summary includes AIC and BIC

# Access and print AIC and BIC
print(f"Journal-Only Model (ML) AIC: {result2_3_ml.aic}")
print(f"Journal-Only Model (ML) BIC: {result2_3_ml.bic}")


# dimension pair Model (ML)
model2_1 = smf.mixedlm("Cosine_Similarity ~ C(Dimension_Pair)",
                   data=dimensions_dimensions_regression,
                   groups=dimensions_dimensions_regression["Subcorpus"])
result2_1_ml = model2_1.fit(reml=False) # Fit using ML
print("\nPrinciples-Only Model Summary (ML):")
print(result2_1_ml.summary()) # Summary includes AIC and BIC

# Access and print AIC and BIC
print(f"Principles-Only Model (ML) AIC: {result2_1_ml.aic}")
print(f"Principles-Only Model (ML) BIC: {result2_1_ml.bic}")


# dimension pair x decade (ML)
model2_4 = smf.mixedlm("Cosine_Similarity ~ C(Dimension_Pair) * Decade_linear",
                   data=dimensions_dimensions_regression,
                   groups=dimensions_dimensions_regression["Subcorpus"])
result2_4_ml = model2_4.fit(reml=False) # Fit using ML
print("\nFull Model (Without Journal) Summary (ML):")
print(result2_4_ml.summary()) # Summary includes AIC and BIC

# Access and print AIC and BIC
print(f"Full Model (Without Journal) (ML) AIC: {result2_4_ml.aic}")
print(f"Full Model (Without Journal) (ML) BIC: {result2_4_ml.bic}")

# dimension pair x journal (ML)
model2_6 = smf.mixedlm("Cosine_Similarity ~ C(Dimension_Pair) * Journal_dummy",
                   data=dimensions_dimensions_regression,
                   groups=dimensions_dimensions_regression["Subcorpus"])
result2_6_ml = model2_6.fit(reml=False) # Fit using ML
print("\nFull Model (Without Journal) Summary (ML):")
print(result2_6_ml.summary()) # Summary includes AIC and BIC
# Access and print AIC and BIC
print(f"Full Model (Without Journal) (ML) AIC: {result2_6_ml.aic}")
print(f"Full Model (Without Journal) (ML) BIC: {result2_6_ml.bic}")

# Full Model  (ML)
model2 = smf.mixedlm("Cosine_Similarity ~ C(Dimension_Pair) * Decade_linear + C(Dimension_Pair) * Journal_dummy",
                 data=dimensions_dimensions_regression,
                 groups=dimensions_dimensions_regression["Subcorpus"])
result2_ml = model2.fit(reml=False) # Fit using ML
print("\nFull Model (With Journal & Interactions) Summary (ML):")
print(result2_ml.summary()) # Summary includes AIC and BIC

# Access and print AIC and BIC
print(f"Full Model (With Journal & Interactions) (ML) AIC: {result2_ml.aic}")
print(f"Full Model (With Journal & Interactions) (ML) BIC: {result2_ml.bic}")

--- Models for Cosine Similarity between Dimensions and Dimensions (ML) ---

Time-Only Model Summary (ML):
             Mixed Linear Model Regression Results
Model:            MixedLM Dependent Variable: Cosine_Similarity
No. Observations: 1494    Method:             ML               
No. Groups:       12      Scale:              0.0040           
Min. group size:  119     Log-Likelihood:     1981.7919        
Max. group size:  125     Converged:          Yes              
Mean group size:  124.5                                        
----------------------------------------------------------------
                   Coef.  Std.Err.    z    P>|z|  [0.025  0.975]
----------------------------------------------------------------
Intercept          0.058     0.015  3.897  0.000   0.029   0.088
Decade_linear      0.002     0.004  0.481  0.630  -0.006   0.009
Group Var          0.000     0.003                              

Time-Only Model (ML) AIC: -3955.583855535295
Time-Only Model (ML) B




Full Model (Without Journal) Summary (ML):
                                          Mixed Linear Model Regression Results
Model:                                 MixedLM                    Dependent Variable:                    Cosine_Similarity
No. Observations:                      1494                       Method:                                ML               
No. Groups:                            12                         Scale:                                 0.0039           
Min. group size:                       119                        Log-Likelihood:                        2007.5981        
Max. group size:                       125                        Converged:                             Yes              
Mean group size:                       124.5                                                                              
--------------------------------------------------------------------------------------------------------------------------
               

