In [1]:
from semopy import Model, report
from semopy import semplot
import pandas as pd

from tools.preprocessing_data import encode_data_to_numeric, get_data_since_date, load_data, min_max_scale_data, fill_nan_individually
from tools.add_external_data import add_external_data

In [62]:
model = """
    # Latent Variables:
    Information_Awareness =~ F5aA1_1 + F5aA2_1 + F5aA3_1 + F5bA1_1 + F5bA2_1 + F5bA3_1 + F5bA4_1 + F5bA5_1 + F5A10_1 + F5A11_1 + F5A12_1 + F5A13_1 + F5A14_1

    Investment_Behavior =~ F3A21_1 + F5A10_2
    
    Energy_Crisis_Sentiment =~ F1A13_1 + F1A14_1
    
    Economical_Indices =~ inflation_rate + interest_rate + dax_points + MSCI_world

    Ukraine_Sentiment =~ F2A14 + F2A6

    Corona_Sentiment =~ F3A16_1 + F3A17_1
        
    # Regression:
    Investment_Behavior ~ Information_Awareness + Energy_Crisis_Sentiment + Ukraine_Sentiment + Corona_Sentiment
    Economical_Indices ~ Energy_Crisis_Sentiment + Ukraine_Sentiment + Corona_Sentiment
   
    # Correlations:
    inflation_rate ~~ dax_points
    inflation_rate ~~ interest_rate
    dax_points ~~ interest_rate
    MSCI_world ~~ dax_points
    MSCI_world ~~ interest_rate
    MSCI_world ~~ inflation_rate
    F3A21_1 ~~ F5A10_2
    F5bA4_1 ~~ F5aA1_1
    F5bA2_1 ~~ F5aA3_1
    F5bA3_1 ~~ F5bA2_1
    F5A11_1 ~~ F5A10_1
    F5A10_1 ~~ F5A13_1
    F5A13_1 ~~ F5A11_1
    F3A17_1 ~~ F3A16_1
    F1A13_1 ~~ F1A14_1
    
"""

data = load_data()
data = add_external_data(data)
data = get_data_since_date(data, '2023-04-05')

bins = [0, 4, 6]  # Bins for 1-3, 4, and 5-6
labels = ['1', '2']
data['Group'] = pd.cut(data['F7g'], bins=bins, labels=labels, right=True)

data = encode_data_to_numeric(data)

relevant_columns = ['inflation_rate', 'interest_rate', 'dax_points', 'MSCI_world', 'F3A21_1', 'F5A10_2', 'F5aA1_1', 'F5aA2_1', 'F5aA3_1', 'F5bA1_1', 'F5bA2_1', 'F5bA3_1', 'F5bA4_1', 'F5bA5_1', 'F5A10_1', 'F5A11_1', 'F5A12_1', 'F5A13_1', 'F5A14_1', 'F3A16_1', 'F3A17_1', 'F2A6', 'F2A14', 'F1A13_1', 'F1A14_1', 'Group']

data = data[relevant_columns]
data['inflation_rate'] = -data['inflation_rate']
data['F1A14_1'] = -data['F1A14_1']

data = fill_nan_individually(data)

i = 1

educations = sorted(data['Group'].unique())

for edu in educations:
    # Filter the data for the current income class
    class_data = data[data['Group'] == edu]
    mod = Model(model)
    class_data = min_max_scale_data(class_data)

    class_data.drop('Group', axis=1, inplace=True)

    r=mod.fit(class_data)
    
    if i == 1:
        param_est = mod.inspect()[['lval', 'op', 'rval', 'Estimate']]
        df_est = pd.DataFrame(param_est[param_est['op'] != '~~'])
        df_est.rename(columns={'Estimate': 'Estimate1'}, inplace=True)

        param_p = mod.inspect()[['lval', 'op', 'rval', 'p-value']]
        df_p = pd.DataFrame(param_p[param_p['op'] != '~~'])
        df_p.rename(columns={'p-value': 'p-value1'}, inplace=True)
    else:
        new = mod.inspect()[['Estimate', 'op']]
        new_df_est = pd.DataFrame(new[new['op'] != '~~'])
        df_est['Estimate' + str(i)] = new_df_est['Estimate']
        
        new = mod.inspect()[['p-value', 'op']]
        new_df_p = pd.DataFrame(new[new['op'] != '~~'])
        df_p['p-value' + str(i)] = new_df_p['p-value']

    g = semplot(mod, "../results/sem_education/model_" + str(i) + ".png")
    report(mod, "../results/sem_education/model_" + str(i) + "_report")
    i = i + 1

df_est.to_csv('/Users/inagege/Documents/00_Uni/SeminarSocialSentimentInTimesOfCrisis/results/sem_education/all_estimates', index=False)
df_p.to_csv('/Users/inagege/Documents/00_Uni/SeminarSocialSentimentInTimesOfCrisis/results/sem_education/all_p', index=False)


print(df_p.head())

  return pd.read_csv("../Data/data_sample_700_SOSEC_dataset_germany.csv")
[*********************100%***********************]  1 of 1 completed


                  lval op                     rval  p-value1  p-value2
0  Investment_Behavior  ~    Information_Awareness  0.000002    0.6703
1  Investment_Behavior  ~  Energy_Crisis_Sentiment  0.000018  0.185507
2  Investment_Behavior  ~        Ukraine_Sentiment  0.000022  0.164405
3  Investment_Behavior  ~         Corona_Sentiment  0.000621    0.5504
4   Economical_Indices  ~  Energy_Crisis_Sentiment       0.0       0.0


In [45]:
data = load_data()
data = add_external_data(data)
data = get_data_since_date(data, '2023-04-05')

print(data['F7g'].value_counts())

bins = [0, 4, 6]  # Bins for 1-3, 4, and 5-6
labels = ['1', '2']
data['Group'] = pd.cut(data['F7g'], bins=bins, labels=labels, right=True)

print(data['Group'].value_counts())

  return pd.read_csv("../Data/data_sample_700_SOSEC_dataset_germany.csv")
[*********************100%***********************]  1 of 1 completed


F7g
6.0    10014
4.0     8309
5.0     3113
3.0     2295
7.0      411
2.0      156
1.0       26
Name: count, dtype: int64
Group
2    13127
1    10786
Name: count, dtype: int64


In [60]:
model = """
    # Latent Variables:
    Information_Awareness =~ F5aA1_1 + F5aA2_1 + F5aA3_1 + F5bA1_1 + F5bA2_1 + F5bA3_1 + F5bA4_1 + F5bA5_1 + F5A10_1 + F5A11_1 + F5A12_1 + F5A13_1 + F5A14_1

    Investment_Behavior =~ F3A21_1 + F5A10_2
    
    Energy_Crisis_Sentiment =~ F1A13_1 + F1A14_1
    
    Economical_Indices =~ inflation_rate + interest_rate + dax_points + MSCI_world

    Ukraine_Sentiment =~ F2A14 + F2A6

    Corona_Sentiment =~ F3A16_1 + F3A17_1
        
    # Regression:
    Investment_Behavior ~ Information_Awareness + Energy_Crisis_Sentiment + Ukraine_Sentiment + Corona_Sentiment
    Economical_Indices ~ Energy_Crisis_Sentiment + Ukraine_Sentiment + Corona_Sentiment
   
    # Correlations:
    inflation_rate ~~ dax_points
    inflation_rate ~~ interest_rate
    dax_points ~~ interest_rate
    MSCI_world ~~ dax_points
    MSCI_world ~~ interest_rate
    MSCI_world ~~ inflation_rate
    F3A21_1 ~~ F5A10_2
    F5bA4_1 ~~ F5aA1_1
    F5bA2_1 ~~ F5aA3_1
    F5bA3_1 ~~ F5bA2_1
    F5A11_1 ~~ F5A10_1
    F5A10_1 ~~ F5A13_1
    F5A13_1 ~~ F5A11_1
    F3A17_1 ~~ F3A16_1
    F1A13_1 ~~ F1A14_1
    
"""

data = load_data()
data = add_external_data(data)
data = get_data_since_date(data, '2023-04-05')

cutoff_date = pd.Timestamp('2024-04-06')

# Current year
current_year = 2024

# Update F7cA1 values where I_START is on or after the cutoff date
data.loc[data['i_START'] >= cutoff_date, 'F7cA1'] = (current_year - data.loc[data['i_START'] >= cutoff_date, 'F7cA1'])

# Calculate quantiles for splitting
q1, q2 = data['F7cA1'].quantile([1/3, 2/3])

# Assign groups based on the quantiles
data['Group'] = pd.cut(
    data['F7cA1'], 
    bins=[-float('inf'), q1, q2, float('inf')], 
    labels=['1', '2', '3']
)

data = encode_data_to_numeric(data)

relevant_columns = ['inflation_rate', 'interest_rate', 'dax_points', 'MSCI_world', 'F3A21_1', 'F5A10_2', 'F5aA1_1', 'F5aA2_1', 'F5aA3_1', 'F5bA1_1', 'F5bA2_1', 'F5bA3_1', 'F5bA4_1', 'F5bA5_1', 'F5A10_1', 'F5A11_1', 'F5A12_1', 'F5A13_1', 'F5A14_1', 'F3A16_1', 'F3A17_1', 'F2A6', 'F2A14', 'F1A13_1', 'F1A14_1', 'Group']

data = data[relevant_columns]
data['inflation_rate'] = -data['inflation_rate']
data['F1A14_1'] = -data['F1A14_1']

data = fill_nan_individually(data)

i = 1

ages = sorted(data['Group'].unique())

for age in ages:
    # Filter the data for the current income class
    class_data = data[data['Group'] == age]
    mod = Model(model)
    class_data = min_max_scale_data(class_data)

    class_data.drop('Group', axis=1, inplace=True)

    r=mod.fit(class_data)
    
    if i == 1:
        param_est = mod.inspect()[['lval', 'op', 'rval', 'Estimate']]
        df_est = pd.DataFrame(param_est[param_est['op'] != '~~'])
        df_est.rename(columns={'Estimate': 'Estimate1'}, inplace=True)

        param_p = mod.inspect()[['lval', 'op', 'rval', 'p-value']]
        df_p = pd.DataFrame(param_p[param_p['op'] != '~~'])
        df_p.rename(columns={'p-value': 'p-value1'}, inplace=True)
    else:
        new = mod.inspect()[['Estimate', 'op']]
        new_df_est = pd.DataFrame(new[new['op'] != '~~'])
        df_est['Estimate' + str(i)] = new_df_est['Estimate']
        
        new = mod.inspect()[['p-value', 'op']]
        new_df_p = pd.DataFrame(new[new['op'] != '~~'])
        df_p['p-value' + str(i)] = new_df_p['p-value']

    g = semplot(mod, "../results/sem_age/model_" + str(i) + ".png")
    report(mod, "../results/sem_age/model_" + str(i) + "_report")
    i = i + 1

df_est.to_csv('/Users/inagege/Documents/00_Uni/SeminarSocialSentimentInTimesOfCrisis/results/sem_age/all_estimates', index=False)
df_p.to_csv('/Users/inagege/Documents/00_Uni/SeminarSocialSentimentInTimesOfCrisis/results/sem_age/all_p', index=False)

print(df_p.head())

  return pd.read_csv("../Data/data_sample_700_SOSEC_dataset_germany.csv")
[*********************100%***********************]  1 of 1 completed


                  lval op                     rval  p-value1  p-value2  \
0  Investment_Behavior  ~    Information_Awareness  0.982174  0.000052   
1  Investment_Behavior  ~  Energy_Crisis_Sentiment  0.995521  0.031622   
2  Investment_Behavior  ~        Ukraine_Sentiment  0.990624  0.032751   
3  Investment_Behavior  ~         Corona_Sentiment  0.970046  0.001006   
4   Economical_Indices  ~  Energy_Crisis_Sentiment       0.0   0.00017   

   p-value3  
0  0.156466  
1  0.384318  
2  0.873303  
3  0.447735  
4  0.392975  


In [63]:
model = """
    # Latent Variables:
    Information_Awareness =~ F5aA1_1 + F5aA2_1 + F5aA3_1 + F5bA1_1 + F5bA2_1 + F5bA3_1 + F5bA4_1 + F5bA5_1 + F5A10_1 + F5A11_1 + F5A12_1 + F5A13_1 + F5A14_1

    Investment_Behavior =~ F3A21_1 + F5A10_2
    
    Energy_Crisis_Sentiment =~ F1A13_1 + F1A14_1
    
    Economical_Indices =~ inflation_rate + interest_rate + dax_points + MSCI_world

    Ukraine_Sentiment =~ F2A14 + F2A6

    Corona_Sentiment =~ F3A16_1 + F3A17_1
        
    # Regression:
    Investment_Behavior ~ Information_Awareness + Energy_Crisis_Sentiment + Ukraine_Sentiment + Corona_Sentiment
    Economical_Indices ~ Energy_Crisis_Sentiment + Ukraine_Sentiment + Corona_Sentiment
   
    # Correlations:
    inflation_rate ~~ dax_points
    inflation_rate ~~ interest_rate
    dax_points ~~ interest_rate
    MSCI_world ~~ dax_points
    MSCI_world ~~ interest_rate
    MSCI_world ~~ inflation_rate
    F3A21_1 ~~ F5A10_2
    F5bA4_1 ~~ F5aA1_1
    F5bA2_1 ~~ F5aA3_1
    F5bA3_1 ~~ F5bA2_1
    F5A11_1 ~~ F5A10_1
    F5A10_1 ~~ F5A13_1
    F5A13_1 ~~ F5A11_1
    F3A17_1 ~~ F3A16_1
    F1A13_1 ~~ F1A14_1
    
"""

data = load_data()
data = add_external_data(data)
data = get_data_since_date(data, '2023-04-05')
data = encode_data_to_numeric(data)

relevant_columns = ['inflation_rate', 'interest_rate', 'dax_points', 'MSCI_world', 'F3A21_1', 'F5A10_2', 'F5aA1_1', 'F5aA2_1', 'F5aA3_1', 'F5bA1_1', 'F5bA2_1', 'F5bA3_1', 'F5bA4_1', 'F5bA5_1', 'F5A10_1', 'F5A11_1', 'F5A12_1', 'F5A13_1', 'F5A14_1', 'F3A16_1', 'F3A17_1', 'F2A6', 'F2A14', 'F1A13_1', 'F1A14_1', 'state']

data = data[relevant_columns]
data['inflation_rate'] = -data['inflation_rate']
data['F1A14_1'] = -data['F1A14_1']

data = load_data()
data = add_external_data(data)
data = get_data_since_date(data, '2023-04-05')

data = fill_nan_individually(data)

states_new = ['Sachsen-Anhalt', 'Thüringen', 'Sachsen', 'Mecklenburg-Vorpommern', 'Brandenburg']

states_old = ['Bayern', 'Baden-Württemberg', 'Nordrhein-Westfalen', 'Niedersachsen', 'Schleswig-Holstein', 'Berlin',
              'Rheinland-Pfalz', 'Bremen', 'Hessen', 'Saarland', 'Hamburg']

data['old_new'] = data['state'].apply(lambda x: 'new' if x in states_new else 'old' if x in states_old else 'unknown')

data = encode_data_to_numeric(data)

relevant_columns = ['inflation_rate', 'interest_rate', 'dax_points', 'MSCI_world', 'F3A21_1', 'F5A10_2', 'F5aA1_1',
                    'F5aA2_1', 'F5aA3_1', 'F5bA1_1', 'F5bA2_1', 'F5bA3_1', 'F5bA4_1', 'F5bA5_1', 'F5A10_1', 'F5A11_1',
                    'F5A12_1', 'F5A13_1', 'F5A14_1', 'F3A16_1', 'F3A17_1', 'F2A6', 'F2A14', 'F1A13_1', 'F1A14_1',
                    'old_new']

data = data[relevant_columns]
data['inflation_rate'] = -data['inflation_rate']
data['F1A14_1'] = -data['F1A14_1']

states = sorted(data['old_new'].unique())

i = 1

for state in states:
    print(state)
    # Filter the data for the current income class
    class_data = data[data['old_new'] == state]
    class_data.drop('old_new', inplace=True, axis=1)
    mod = Model(model)
    class_data = min_max_scale_data(class_data)

    r=mod.fit(class_data)
    
    if i == 1:
        param_est = mod.inspect()[['lval', 'op', 'rval', 'Estimate']]
        df_est = pd.DataFrame(param_est[param_est['op'] != '~~'])
        df_est.rename(columns={'Estimate': 'Estimate1'}, inplace=True)

        param_p = mod.inspect()[['lval', 'op', 'rval', 'p-value']]
        df_p = pd.DataFrame(param_p[param_p['op'] != '~~'])
        df_p.rename(columns={'p-value': 'p-value1'}, inplace=True)
    else:
        new = mod.inspect()[['Estimate', 'op']]
        new_df_est = pd.DataFrame(new[new['op'] != '~~'])
        df_est['Estimate' + str(i)] = new_df_est['Estimate']
        
        new = mod.inspect()[['p-value', 'op']]
        new_df_p = pd.DataFrame(new[new['op'] != '~~'])
        df_p['p-value' + str(i)] = new_df_p['p-value']

    g = semplot(mod, "../results/sem_states/model_" + state + ".png")
    report(mod, "../results/sem_states/model_" + state + "_report")
    i = i + 1

df_est.to_csv('/Users/inagege/Documents/00_Uni/SeminarSocialSentimentInTimesOfCrisis/results/sem_states/all_estimates', index=False)
df_p.to_csv('/Users/inagege/Documents/00_Uni/SeminarSocialSentimentInTimesOfCrisis/results/sem_states/all_p', index=False)

print(df_p.head())

  return pd.read_csv("../Data/data_sample_700_SOSEC_dataset_germany.csv")
[*********************100%***********************]  1 of 1 completed
  return pd.read_csv("../Data/data_sample_700_SOSEC_dataset_germany.csv")
[*********************100%***********************]  1 of 1 completed
  data.fillna({col: data[col].mode()[0]}, inplace=True)


new


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  class_data.drop('old_new', inplace=True, axis=1)


old


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  class_data.drop('old_new', inplace=True, axis=1)


                  lval op                     rval  p-value1  p-value2
0  Investment_Behavior  ~    Information_Awareness  0.003909  0.318949
1  Investment_Behavior  ~  Energy_Crisis_Sentiment  0.028176  0.074495
2  Investment_Behavior  ~        Ukraine_Sentiment  0.105211  0.038695
3  Investment_Behavior  ~         Corona_Sentiment  0.323425  0.083185
4   Economical_Indices  ~  Energy_Crisis_Sentiment  0.252466       0.0


In [8]:
for state in states:
    print(data[data['old_new'] == state].shape, state)

(21097, 26) old
(3227, 26) new


In [59]:
model = """
    # Latent Variables:
    Information_Awareness =~ F5aA1_1 + F5aA2_1 + F5aA3_1 + F5bA1_1 + F5bA2_1 + F5bA3_1 + F5bA4_1 + F5bA5_1 + F5A10_1 + F5A11_1 + F5A12_1 + F5A13_1 + F5A14_1

    Investment_Behavior =~ F3A21_1 + F5A10_2
    
    Energy_Crisis_Sentiment =~ F1A13_1 + F1A14_1
    
    Economical_Indices =~ inflation_rate + interest_rate + dax_points + MSCI_world

    Ukraine_Sentiment =~ F2A14 + F2A6

    Corona_Sentiment =~ F3A16_1 + F3A17_1
        
    # Regression:
    Investment_Behavior ~ Information_Awareness + Energy_Crisis_Sentiment + Ukraine_Sentiment + Corona_Sentiment
    Economical_Indices ~ Energy_Crisis_Sentiment + Ukraine_Sentiment + Corona_Sentiment
   
    # Correlations:
    inflation_rate ~~ dax_points
    inflation_rate ~~ interest_rate
    dax_points ~~ interest_rate
    MSCI_world ~~ dax_points
    MSCI_world ~~ interest_rate
    MSCI_world ~~ inflation_rate
    F3A21_1 ~~ F5A10_2
    F5bA4_1 ~~ F5aA1_1
    F5bA2_1 ~~ F5aA3_1
    F5bA3_1 ~~ F5bA2_1
    F5A11_1 ~~ F5A10_1
    F5A10_1 ~~ F5A13_1
    F5A13_1 ~~ F5A11_1
    F3A17_1 ~~ F3A16_1
    F1A13_1 ~~ F1A14_1
    
"""

data = load_data()
data = add_external_data(data)
data = get_data_since_date(data, '2023-04-05')
data = encode_data_to_numeric(data)

relevant_columns = ['inflation_rate', 'interest_rate', 'dax_points', 'MSCI_world', 'F3A21_1', 'F5A10_2', 'F5aA1_1', 'F5aA2_1', 'F5aA3_1', 'F5bA1_1', 'F5bA2_1', 'F5bA3_1', 'F5bA4_1', 'F5bA5_1', 'F5A10_1', 'F5A11_1', 'F5A12_1', 'F5A13_1', 'F5A14_1', 'F3A16_1', 'F3A17_1', 'F2A6', 'F2A14', 'F1A13_1', 'F1A14_1', 'einkommen']

data = data[relevant_columns]
data['inflation_rate'] = -data['inflation_rate']
data['F1A14_1'] = -data['F1A14_1']

data = fill_nan_individually(data)

income_classes = sorted(data['einkommen'].unique())

# Define custom group boundaries
group1 = income_classes[:4]   
group2 = income_classes[4:7]   
group3 = income_classes[7:]

# Create a mapping for each income class to its group
new_income_classes = {}
for income in group1:
    new_income_classes[income] = 1
for income in group2:
    new_income_classes[income] = 2
for income in group3:
    new_income_classes[income] = 3
    
data['einkommen'] = data['einkommen'].map(new_income_classes)

income_classes = sorted(data['einkommen'].unique())

print(income_classes)

i = 1

for income_class in income_classes:
    # Filter the data for the current income class
    class_data = data[data['einkommen'] == income_class]
    mod = Model(model)
    class_data = min_max_scale_data(class_data)

    r=mod.fit(class_data)
    
    if i == 1:
        param_est = mod.inspect()[['lval', 'op', 'rval', 'Estimate']]
        df_est = pd.DataFrame(param_est[param_est['op'] != '~~'])
        df_est.rename(columns={'Estimate': 'Estimate1'}, inplace=True)

        param_p = mod.inspect()[['lval', 'op', 'rval', 'p-value']]
        df_p = pd.DataFrame(param_p[param_p['op'] != '~~'])
        df_p.rename(columns={'p-value': 'p-value1'}, inplace=True)
    else:
        new = mod.inspect()[['Estimate', 'op']]
        new_df_est = pd.DataFrame(new[new['op'] != '~~'])
        df_est['Estimate' + str(i)] = new_df_est['Estimate']
        
        new = mod.inspect()[['p-value', 'op']]
        new_df_p = pd.DataFrame(new[new['op'] != '~~'])
        df_p['p-value' + str(i)] = new_df_p['p-value']

    g = semplot(mod, "../results/sem_income/model" + str(i) + ".png")
    report(mod, "../results/sem_income/model" + str(i) + "_report")
    i = i + 1

df_p.to_csv('/Users/inagege/Documents/00_Uni/SeminarSocialSentimentInTimesOfCrisis/results/sem_income/all_p', index=False)
df_est.to_csv('/Users/inagege/Documents/00_Uni/SeminarSocialSentimentInTimesOfCrisis/results/sem_income/all_estimates', index=False)

print(df_p.head())
print(df_est.head())

  return pd.read_csv("../Data/data_sample_700_SOSEC_dataset_germany.csv")
[*********************100%***********************]  1 of 1 completed


[np.int64(1), np.int64(2), np.int64(3)]




                  lval op                     rval  p-value1  p-value2  \
0  Investment_Behavior  ~    Information_Awareness   0.00634  0.000001   
1  Investment_Behavior  ~  Energy_Crisis_Sentiment   0.04554  0.936174   
2  Investment_Behavior  ~        Ukraine_Sentiment  0.000445  0.514583   
3  Investment_Behavior  ~         Corona_Sentiment  0.176968  0.002195   
4   Economical_Indices  ~  Energy_Crisis_Sentiment       0.0  0.934902   

   p-value3  
0  0.420264  
1  0.333222  
2  0.421846  
3  0.454917  
4       0.0  
                  lval op                     rval  Estimate1  Estimate2  \
0  Investment_Behavior  ~    Information_Awareness   0.746189   0.464573   
1  Investment_Behavior  ~  Energy_Crisis_Sentiment  -0.446417  -0.020812   
2  Investment_Behavior  ~        Ukraine_Sentiment  -0.178666  -0.066953   
3  Investment_Behavior  ~         Corona_Sentiment  -1.000215  -1.393165   
4   Economical_Indices  ~  Energy_Crisis_Sentiment   0.701055  -0.012217   

   Estimate3  

In [53]:
model = """
    # Latent Variables:
    Information_Awareness =~ F5aA1_1 + F5aA2_1 + F5aA3_1 + F5bA1_1 + F5bA2_1 + F5bA3_1 + F5bA4_1 + F5bA5_1 + F5A10_1 + F5A11_1 + F5A12_1 + F5A13_1 + F5A14_1

    Investment_Behavior =~ F3A21_1 + F5A10_2
    
    Energy_Crisis_Sentiment =~ F1A13_1 + F1A14_1
    
    Economical_Indices =~ inflation_rate + interest_rate + dax_points + MSCI_world

    Ukraine_Sentiment =~ F2A14 + F2A6

    Corona_Sentiment =~ F3A16_1 + F3A17_1
        
    # Regression:
    Investment_Behavior ~ Information_Awareness + Energy_Crisis_Sentiment + Ukraine_Sentiment + Corona_Sentiment
    Economical_Indices ~ Energy_Crisis_Sentiment + Ukraine_Sentiment + Corona_Sentiment
   
    # Correlations:
    inflation_rate ~~ dax_points
    inflation_rate ~~ interest_rate
    dax_points ~~ interest_rate
    MSCI_world ~~ dax_points
    MSCI_world ~~ interest_rate
    MSCI_world ~~ inflation_rate
    F3A21_1 ~~ F5A10_2
    F5bA4_1 ~~ F5aA1_1
    F5bA2_1 ~~ F5aA3_1
    F5bA3_1 ~~ F5bA2_1
    F5A11_1 ~~ F5A10_1
    F5A10_1 ~~ F5A13_1
    F5A13_1 ~~ F5A11_1
    F3A17_1 ~~ F3A16_1
    F1A13_1 ~~ F1A14_1
    
"""

data = load_data()
data = add_external_data(data)
data = get_data_since_date(data, '2023-04-05')
data = encode_data_to_numeric(data)

relevant_columns = ['inflation_rate', 'interest_rate', 'dax_points', 'MSCI_world', 'F3A21_1', 'F5A10_2', 'F5aA1_1', 'F5aA2_1', 'F5aA3_1', 'F5bA1_1', 'F5bA2_1', 'F5bA3_1', 'F5bA4_1', 'F5bA5_1', 'F5A10_1', 'F5A11_1', 'F5A12_1', 'F5A13_1', 'F5A14_1', 'F3A16_1', 'F3A17_1', 'F2A6', 'F2A14', 'F1A13_1', 'F1A14_1']

data = data[relevant_columns]
data['inflation_rate'] = -data['inflation_rate']
data['F1A14_1'] = -data['F1A14_1']

data = fill_nan_individually(data)
data = min_max_scale_data(data)

mod = Model(model)


r=mod.fit(data)
    

param_est = mod.inspect()[['lval', 'op', 'rval', 'Estimate']]
param_p = mod.inspect()[['lval', 'op', 'rval', 'p-value']]
df_est = pd.DataFrame(param_est[param_est['op'] != '~~'])
df_p = pd.DataFrame(param_p[param_p['op'] != '~~'])


#g = semplot(mod, "../results/sem/model" + str(i) + ".png")
#report(mod, "../results/sem/model" + str(i) + "_report")

df_p.to_csv('/Users/inagege/Documents/00_Uni/SeminarSocialSentimentInTimesOfCrisis/results/sem/all_p', index=False)
df_est.to_csv('/Users/inagege/Documents/00_Uni/SeminarSocialSentimentInTimesOfCrisis/results/sem/all_estimates', index=False)
df_est.head()

  return pd.read_csv("../Data/data_sample_700_SOSEC_dataset_germany.csv")
[*********************100%***********************]  1 of 1 completed


Unnamed: 0,lval,op,rval,Estimate
0,Investment_Behavior,~,Information_Awareness,0.514206
1,Investment_Behavior,~,Energy_Crisis_Sentiment,-1.110628
2,Investment_Behavior,~,Ukraine_Sentiment,-0.495249
3,Investment_Behavior,~,Corona_Sentiment,-1.499596
4,Economical_Indices,~,Energy_Crisis_Sentiment,0.595679
