In [26]:
from semopy import Model, report, calc_stats
from semopy import semplot
import pandas as pd

from tools.preprocessing_data import encode_data_to_numeric, get_data_since_date, load_data, min_max_scale_data, fill_nan_individually
from tools.add_external_data import add_external_data

In [45]:
model = """
    # Latent Variables:
    Information_Awareness =~ F5aA1_1 + F5aA2_1 + F5aA3_1 + F5bA1_1 + F5bA2_1 + F5bA3_1 + F5bA4_1 + F5bA5_1 + F5A10_1 + F5A11_1 + F5A12_1 + F5A13_1 + F5A14_1

    Investment_Opinion =~ F3A21_1 + F5A10_2
    
    Energy_Crisis_Sentiment =~ F1A13_1 + F1A14_1
    
    Economical_Indices =~ inflation_rate + interest_rate + dax_points + MSCI_world

    Ukraine_Sentiment =~ F2A14 + F2A6

    Corona_Sentiment =~ F3A16_1 + F3A17_1
        
    # Regression:
    Investment_Opinion ~ Information_Awareness + Energy_Crisis_Sentiment + Ukraine_Sentiment + Corona_Sentiment
    Economical_Indices ~ Energy_Crisis_Sentiment + Ukraine_Sentiment + Corona_Sentiment
   
    # Correlations:
    inflation_rate ~~ dax_points
    inflation_rate ~~ interest_rate
    dax_points ~~ interest_rate
    MSCI_world ~~ dax_points
    MSCI_world ~~ interest_rate
    MSCI_world ~~ inflation_rate
    F3A21_1 ~~ F5A10_2
    F5bA4_1 ~~ F5aA1_1
    F5bA2_1 ~~ F5aA3_1
    F5bA3_1 ~~ F5bA2_1
    F5A11_1 ~~ F5A10_1
    F5A10_1 ~~ F5A13_1
    F5A13_1 ~~ F5A11_1
    F3A17_1 ~~ F3A16_1
    F1A13_1 ~~ F1A14_1
    
"""

data = load_data()
data = add_external_data(data)
data = get_data_since_date(data, '2023-04-05')

bins = [0, 4, 6]  # Bins for 1-3, 4-6
labels = ['1', '2']
data['Group'] = pd.cut(data['F7g'], bins=bins, labels=labels, right=True)

#delete rows in data where column 'Group' is NaN
data = data.dropna(subset=['Group'])

data = encode_data_to_numeric(data)

relevant_columns = ['inflation_rate', 'interest_rate', 'dax_points', 'MSCI_world', 'F3A21_1', 'F5A10_2', 'F5aA1_1', 'F5aA2_1', 'F5aA3_1', 'F5bA1_1', 'F5bA2_1', 'F5bA3_1', 'F5bA4_1', 'F5bA5_1', 'F5A10_1', 'F5A11_1', 'F5A12_1', 'F5A13_1', 'F5A14_1', 'F3A16_1', 'F3A17_1', 'F2A6', 'F2A14', 'F1A13_1', 'F1A14_1','Group'] # 'F7g']

data = data[relevant_columns]
data['inflation_rate'] = -data['inflation_rate']
data['F1A14_1'] = -data['F1A14_1']

data = min_max_scale_data(data)

i = 1

educations = sorted(data['Group'].unique())
#educations = sorted(data['F7g'].unique())

print(educations)

for edu in educations:
    # Filter the data for the current income class
    class_data = data[data['Group'] == edu]
    class_data = fill_nan_individually(class_data)

    #class_data = data[data['F7g'] == edu]
    mod = Model(model)

    class_data.drop('Group', axis=1, inplace=True)

    r=mod.fit(class_data)
    
    if i == 1:
        param_est = mod.inspect()[['lval', 'op', 'rval', 'Estimate']]
        df_est = pd.DataFrame(param_est[param_est['op'] != '~~'])
        df_est.rename(columns={'Estimate': 'Estimate1'}, inplace=True)

        param_p = mod.inspect()[['lval', 'op', 'rval', 'p-value']]
        df_p = pd.DataFrame(param_p[param_p['op'] != '~~'])
        df_p.rename(columns={'p-value': 'p-value1'}, inplace=True)
    else:
        new = mod.inspect()[['Estimate', 'op']]
        new_df_est = pd.DataFrame(new[new['op'] != '~~'])
        df_est['Estimate' + str(i)] = new_df_est['Estimate']
        
        new = mod.inspect()[['p-value', 'op']]
        new_df_p = pd.DataFrame(new[new['op'] != '~~'])
        df_p['p-value' + str(i)] = new_df_p['p-value']

    #g = semplot(mod, "../results/sem_education/model_" + str(i) + ".png")
    #report(mod, "../results/sem_education/model_" + str(i) + "_report")
    i = i + 1

#df_est.to_csv('/Users/inagege/Documents/00_Uni/SeminarSocialSentimentInTimesOfCrisis/results/sem_education/all_estimates', index=False)
#df_p.to_csv('/Users/inagege/Documents/00_Uni/SeminarSocialSentimentInTimesOfCrisis/results/sem_education/all_p', index=False)


print(df_p.head())

  return pd.read_csv("../Data/data_sample_700_SOSEC_dataset_germany.csv")
[*********************100%***********************]  1 of 1 completed


[np.float64(-1.103195634597876), np.float64(0.9064575390243532)]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.fillna({col: data[col].mode()[0]}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  class_data.drop('Group', axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.fillna({col: data[col].mode()[0]}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  class_data.dr

                 lval op                     rval  p-value1  p-value2
0  Investment_Opinion  ~    Information_Awareness  0.000134  0.905898
1  Investment_Opinion  ~  Energy_Crisis_Sentiment   0.00006  0.860546
2  Investment_Opinion  ~        Ukraine_Sentiment  0.000143   0.83297
3  Investment_Opinion  ~         Corona_Sentiment  0.003982  0.904497
4  Economical_Indices  ~  Energy_Crisis_Sentiment       0.0       0.0


In [18]:
data = load_data()
data = add_external_data(data)
data = get_data_since_date(data, '2023-04-05')

print(data['F7g'].value_counts())

bins = [0, 4, 6]  # Bins for 1-3, 4, and 5-6
labels = ['1', '2']
data['Group'] = pd.cut(data['F7g'], bins=bins, labels=labels, right=True)

print(data['Group'].value_counts())

  return pd.read_csv("../Data/data_sample_700_SOSEC_dataset_germany.csv")
[*********************100%***********************]  1 of 1 completed


F7g
6.0    10014
4.0     8309
5.0     3113
3.0     2295
7.0      411
2.0      156
1.0       26
Name: count, dtype: int64
Group
2    13127
1    10786
Name: count, dtype: int64


In [30]:
model = """
    # Latent Variables:
    Information_Awareness =~ F5aA1_1 + F5aA2_1 + F5aA3_1 + F5bA1_1 + F5bA2_1 + F5bA3_1 + F5bA4_1 + F5bA5_1 + F5A10_1 + F5A11_1 + F5A12_1 + F5A13_1 + F5A14_1

    Investment_Opinion =~ F3A21_1 + F5A10_2
    
    Energy_Crisis_Sentiment =~ F1A13_1 + F1A14_1
    
    Economical_Indices =~ inflation_rate + interest_rate + dax_points + MSCI_world

    Ukraine_Sentiment =~ F2A14 + F2A6

    Corona_Sentiment =~ F3A16_1 + F3A17_1
        
    # Regression:
    Investment_Opinion ~ Information_Awareness + Energy_Crisis_Sentiment + Ukraine_Sentiment + Corona_Sentiment
    Economical_Indices ~ Energy_Crisis_Sentiment + Ukraine_Sentiment + Corona_Sentiment
   
    # Correlations:
    inflation_rate ~~ dax_points
    inflation_rate ~~ interest_rate
    dax_points ~~ interest_rate
    MSCI_world ~~ dax_points
    MSCI_world ~~ interest_rate
    MSCI_world ~~ inflation_rate
    F3A21_1 ~~ F5A10_2
    F5bA4_1 ~~ F5aA1_1
    F5bA2_1 ~~ F5aA3_1
    F5bA3_1 ~~ F5bA2_1
    F5A11_1 ~~ F5A10_1
    F5A10_1 ~~ F5A13_1
    F5A13_1 ~~ F5A11_1
    F3A17_1 ~~ F3A16_1
    F1A13_1 ~~ F1A14_1
    
"""

data = load_data()
data = add_external_data(data)
data = get_data_since_date(data, '2023-04-05')

cutoff_date = pd.Timestamp('2024-04-06')

# Current year
current_year = 2024

# Update F7cA1 values where I_START is on or after the cutoff date
data.loc[data['i_START'] >= cutoff_date, 'F7cA1'] = (current_year - data.loc[data['i_START'] >= cutoff_date, 'F7cA1'])

# Calculate quantiles for splitting
q1, q2 = data['F7cA1'].quantile([1/3, 2/3])

# Assign groups based on the quantiles
data['Group'] = pd.cut(
    data['F7cA1'], 
    bins=[-float('inf'), q1, q2, float('inf')], 
    labels=['1', '2', '3']
)

data = encode_data_to_numeric(data)

relevant_columns = ['inflation_rate', 'interest_rate', 'dax_points', 'MSCI_world', 'F3A21_1', 'F5A10_2', 'F5aA1_1', 'F5aA2_1', 'F5aA3_1', 'F5bA1_1', 'F5bA2_1', 'F5bA3_1', 'F5bA4_1', 'F5bA5_1', 'F5A10_1', 'F5A11_1', 'F5A12_1', 'F5A13_1', 'F5A14_1', 'F3A16_1', 'F3A17_1', 'F2A6', 'F2A14', 'F1A13_1', 'F1A14_1', 'Group']

data = data[relevant_columns]
data['inflation_rate'] = -data['inflation_rate']
data['F1A14_1'] = -data['F1A14_1']

data = fill_nan_individually(data)
data = min_max_scale_data(data)

i = 1

ages = sorted(data['Group'].unique())

for age in ages:
    # Filter the data for the current income class
    class_data = data[data['Group'] == age]
    mod = Model(model)

    class_data.drop('Group', axis=1, inplace=True)

    r=mod.fit(class_data)
    
    if i == 1:
        param_est = mod.inspect()[['lval', 'op', 'rval', 'Estimate']]
        df_est = pd.DataFrame(param_est[param_est['op'] != '~~'])
        df_est.rename(columns={'Estimate': 'Estimate1'}, inplace=True)

        param_p = mod.inspect()[['lval', 'op', 'rval', 'p-value']]
        df_p = pd.DataFrame(param_p[param_p['op'] != '~~'])
        df_p.rename(columns={'p-value': 'p-value1'}, inplace=True)
    else:
        new = mod.inspect()[['Estimate', 'op']]
        new_df_est = pd.DataFrame(new[new['op'] != '~~'])
        df_est['Estimate' + str(i)] = new_df_est['Estimate']
        
        new = mod.inspect()[['p-value', 'op']]
        new_df_p = pd.DataFrame(new[new['op'] != '~~'])
        df_p['p-value' + str(i)] = new_df_p['p-value']

    g = semplot(mod, "../results/sem_age/model_" + str(i) + ".png")
    report(mod, "../results/sem_age/model_" + str(i) + "_report")
    i = i + 1

df_est.to_csv('/Users/inagege/Documents/00_Uni/SeminarSocialSentimentInTimesOfCrisis/results/sem_age/all_estimates', index=False)
df_p.to_csv('/Users/inagege/Documents/00_Uni/SeminarSocialSentimentInTimesOfCrisis/results/sem_age/all_p', index=False)

print(df_p.head())

  return pd.read_csv("../Data/data_sample_700_SOSEC_dataset_germany.csv")
[*********************100%***********************]  1 of 1 completed
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  class_data.drop('Group', axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  class_data.drop('Group', axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  class_data.drop('Group', axis=1, inplace=True)


                 lval op                     rval  p-value1  p-value2  \
0  Investment_Opinion  ~    Information_Awareness       0.0  0.000109   
1  Investment_Opinion  ~  Energy_Crisis_Sentiment  0.000468  0.034404   
2  Investment_Opinion  ~        Ukraine_Sentiment  0.279348  0.043507   
3  Investment_Opinion  ~         Corona_Sentiment  0.368951   0.00079   
4  Economical_Indices  ~  Energy_Crisis_Sentiment  0.000174   0.00021   

   p-value3  
0  0.610128  
1  0.980196  
2  0.962479  
3  0.923702  
4  0.981514  


In [31]:
model = """
    # Latent Variables:
    Information_Awareness =~ F5aA1_1 + F5aA2_1 + F5aA3_1 + F5bA1_1 + F5bA2_1 + F5bA3_1 + F5bA4_1 + F5bA5_1 + F5A10_1 + F5A11_1 + F5A12_1 + F5A13_1 + F5A14_1

    Investment_Opinion =~ F3A21_1 + F5A10_2
    
    Energy_Crisis_Sentiment =~ F1A13_1 + F1A14_1
    
    Economical_Indices =~ inflation_rate + interest_rate + dax_points + MSCI_world

    Ukraine_Sentiment =~ F2A14 + F2A6

    Corona_Sentiment =~ F3A16_1 + F3A17_1
        
    # Regression:
    Investment_Opinion ~ Information_Awareness + Energy_Crisis_Sentiment + Ukraine_Sentiment + Corona_Sentiment
    Economical_Indices ~ Energy_Crisis_Sentiment + Ukraine_Sentiment + Corona_Sentiment
   
    # Correlations:
    inflation_rate ~~ dax_points
    inflation_rate ~~ interest_rate
    dax_points ~~ interest_rate
    MSCI_world ~~ dax_points
    MSCI_world ~~ interest_rate
    MSCI_world ~~ inflation_rate
    F3A21_1 ~~ F5A10_2
    F5bA4_1 ~~ F5aA1_1
    F5bA2_1 ~~ F5aA3_1
    F5bA3_1 ~~ F5bA2_1
    F5A11_1 ~~ F5A10_1
    F5A10_1 ~~ F5A13_1
    F5A13_1 ~~ F5A11_1
    F3A17_1 ~~ F3A16_1
    F1A13_1 ~~ F1A14_1
    
"""

data = load_data()
data = add_external_data(data)
data = get_data_since_date(data, '2023-04-05')
data = encode_data_to_numeric(data)

relevant_columns = ['inflation_rate', 'interest_rate', 'dax_points', 'MSCI_world', 'F3A21_1', 'F5A10_2', 'F5aA1_1', 'F5aA2_1', 'F5aA3_1', 'F5bA1_1', 'F5bA2_1', 'F5bA3_1', 'F5bA4_1', 'F5bA5_1', 'F5A10_1', 'F5A11_1', 'F5A12_1', 'F5A13_1', 'F5A14_1', 'F3A16_1', 'F3A17_1', 'F2A6', 'F2A14', 'F1A13_1', 'F1A14_1', 'state']

data = data[relevant_columns]
data['inflation_rate'] = -data['inflation_rate']
data['F1A14_1'] = -data['F1A14_1']

data = load_data()
data = add_external_data(data)
data = get_data_since_date(data, '2023-04-05')

data = fill_nan_individually(data)

states_new = ['Sachsen-Anhalt', 'Thüringen', 'Sachsen', 'Mecklenburg-Vorpommern', 'Brandenburg']

states_old = ['Bayern', 'Baden-Württemberg', 'Nordrhein-Westfalen', 'Niedersachsen', 'Schleswig-Holstein', 'Berlin',
              'Rheinland-Pfalz', 'Bremen', 'Hessen', 'Saarland', 'Hamburg']

data['old_new'] = data['state'].apply(lambda x: 'new' if x in states_new else 'old' if x in states_old else 'unknown')

data = encode_data_to_numeric(data)

relevant_columns = ['inflation_rate', 'interest_rate', 'dax_points', 'MSCI_world', 'F3A21_1', 'F5A10_2', 'F5aA1_1',
                    'F5aA2_1', 'F5aA3_1', 'F5bA1_1', 'F5bA2_1', 'F5bA3_1', 'F5bA4_1', 'F5bA5_1', 'F5A10_1', 'F5A11_1',
                    'F5A12_1', 'F5A13_1', 'F5A14_1', 'F3A16_1', 'F3A17_1', 'F2A6', 'F2A14', 'F1A13_1', 'F1A14_1',
                    'old_new']

data = data[relevant_columns]

data = min_max_scale_data(data)

data['inflation_rate'] = -data['inflation_rate']
data['F1A14_1'] = -data['F1A14_1']

states = sorted(data['old_new'].unique())

i = 1

for state in states:
    print(state)
    # Filter the data for the current income class
    class_data = data[data['old_new'] == state]
    class_data.drop('old_new', inplace=True, axis=1)
    mod = Model(model)

    r=mod.fit(class_data)
    
    if i == 1:
        param_est = mod.inspect()[['lval', 'op', 'rval', 'Estimate']]
        df_est = pd.DataFrame(param_est[param_est['op'] != '~~'])
        df_est.rename(columns={'Estimate': 'Estimate1'}, inplace=True)

        param_p = mod.inspect()[['lval', 'op', 'rval', 'p-value']]
        df_p = pd.DataFrame(param_p[param_p['op'] != '~~'])
        df_p.rename(columns={'p-value': 'p-value1'}, inplace=True)
    else:
        new = mod.inspect()[['Estimate', 'op']]
        new_df_est = pd.DataFrame(new[new['op'] != '~~'])
        df_est['Estimate' + str(i)] = new_df_est['Estimate']
        
        new = mod.inspect()[['p-value', 'op']]
        new_df_p = pd.DataFrame(new[new['op'] != '~~'])
        df_p['p-value' + str(i)] = new_df_p['p-value']

    g = semplot(mod, "../results/sem_states/model_" + state + ".png")
    report(mod, "../results/sem_states/model_" + state + "_report")
    i = i + 1

df_est.to_csv('/Users/inagege/Documents/00_Uni/SeminarSocialSentimentInTimesOfCrisis/results/sem_states/all_estimates', index=False)
df_p.to_csv('/Users/inagege/Documents/00_Uni/SeminarSocialSentimentInTimesOfCrisis/results/sem_states/all_p', index=False)

print(df_p.head())

  return pd.read_csv("../Data/data_sample_700_SOSEC_dataset_germany.csv")
[*********************100%***********************]  1 of 1 completed
  return pd.read_csv("../Data/data_sample_700_SOSEC_dataset_germany.csv")
[*********************100%***********************]  1 of 1 completed
  data.fillna({col: data[col].mode()[0]}, inplace=True)


ValueError: could not convert string to float: 'old'

In [21]:
for state in states:
    print(data[data['old_new'] == state].shape, state)

(3227, 26) new
(21097, 26) old


In [34]:
model = """
    # Latent Variables:
    Information_Awareness =~ F5aA1_1 + F5aA2_1 + F5aA3_1 + F5bA1_1 + F5bA2_1 + F5bA3_1 + F5bA4_1 + F5bA5_1 + F5A10_1 + F5A11_1 + F5A12_1 + F5A13_1 + F5A14_1

    Investment_Opinion =~ F3A21_1 + F5A10_2
    
    Energy_Crisis_Sentiment =~ F1A13_1 + F1A14_1
    
    Economical_Indices =~ inflation_rate + interest_rate + dax_points + MSCI_world

    Ukraine_Sentiment =~ F2A14 + F2A6

    Corona_Sentiment =~ F3A16_1 + F3A17_1
        
    # Regression:
    Investment_Opinion ~ Information_Awareness + Energy_Crisis_Sentiment + Ukraine_Sentiment + Corona_Sentiment
    Economical_Indices ~ Energy_Crisis_Sentiment + Ukraine_Sentiment + Corona_Sentiment
   
    # Correlations:
    inflation_rate ~~ dax_points
    inflation_rate ~~ interest_rate
    dax_points ~~ interest_rate
    MSCI_world ~~ dax_points
    MSCI_world ~~ interest_rate
    MSCI_world ~~ inflation_rate
    F3A21_1 ~~ F5A10_2
    F5bA4_1 ~~ F5aA1_1
    F5bA2_1 ~~ F5aA3_1
    F5bA3_1 ~~ F5bA2_1
    F5A11_1 ~~ F5A10_1
    F5A10_1 ~~ F5A13_1
    F5A13_1 ~~ F5A11_1
    F3A17_1 ~~ F3A16_1
    F1A13_1 ~~ F1A14_1
    
"""

data = load_data()
data = add_external_data(data)
data = get_data_since_date(data, '2023-04-05')
data = encode_data_to_numeric(data)

relevant_columns = ['inflation_rate', 'interest_rate', 'dax_points', 'MSCI_world', 'F3A21_1', 'F5A10_2', 'F5aA1_1', 'F5aA2_1', 'F5aA3_1', 'F5bA1_1', 'F5bA2_1', 'F5bA3_1', 'F5bA4_1', 'F5bA5_1', 'F5A10_1', 'F5A11_1', 'F5A12_1', 'F5A13_1', 'F5A14_1', 'F3A16_1', 'F3A17_1', 'F2A6', 'F2A14', 'F1A13_1', 'F1A14_1', 'einkommen']

data = data[relevant_columns]
data['inflation_rate'] = -data['inflation_rate']
data['F1A14_1'] = -data['F1A14_1']

data = fill_nan_individually(data)
data = min_max_scale_data(data)

income_classes = sorted(data['einkommen'].unique())

# Define custom group boundaries
group1 = income_classes[:4]   
group2 = income_classes[4:7]   
group3 = income_classes[7:]

# Create a mapping for each income class to its group
new_income_classes = {}
for income in group1:
    new_income_classes[income] = 1
for income in group2:
    new_income_classes[income] = 2
for income in group3:
    new_income_classes[income] = 3
    
data['einkommen'] = data['einkommen'].map(new_income_classes)

income_classes = sorted(data['einkommen'].unique())

i = 1

for income_class in income_classes:
    # Filter the data for the current income class
    class_data = data[data['einkommen'] == income_class]
    mod = Model(model)
    
    r=mod.fit(class_data)
    
    if i == 1:
        param_est = mod.inspect()[['lval', 'op', 'rval', 'Estimate']]
        df_est = pd.DataFrame(param_est[param_est['op'] != '~~'])
        df_est.rename(columns={'Estimate': 'Estimate1'}, inplace=True)

        param_p = mod.inspect()[['lval', 'op', 'rval', 'p-value']]
        df_p = pd.DataFrame(param_p[param_p['op'] != '~~'])
        df_p.rename(columns={'p-value': 'p-value1'}, inplace=True)
    else:
        new = mod.inspect()[['Estimate', 'op']]
        new_df_est = pd.DataFrame(new[new['op'] != '~~'])
        df_est['Estimate' + str(i)] = new_df_est['Estimate']
        
        new = mod.inspect()[['p-value', 'op']]
        new_df_p = pd.DataFrame(new[new['op'] != '~~'])
        df_p['p-value' + str(i)] = new_df_p['p-value']

    g = semplot(mod, "../results/sem_income/model" + str(i) + ".png")
    report(mod, "../results/sem_income/model" + str(i) + "_report")
    i = i + 1

df_p.to_csv('/Users/inagege/Documents/00_Uni/SeminarSocialSentimentInTimesOfCrisis/results/sem_income/all_p', index=False)
df_est.to_csv('/Users/inagege/Documents/00_Uni/SeminarSocialSentimentInTimesOfCrisis/results/sem_income/all_estimates', index=False)

print(df_p.head())
print(df_est.head())

  return pd.read_csv("../Data/data_sample_700_SOSEC_dataset_germany.csv")
[*********************100%***********************]  1 of 1 completed


                 lval op                     rval  p-value1  p-value2  \
0  Investment_Opinion  ~    Information_Awareness  0.000374  0.107547   
1  Investment_Opinion  ~  Energy_Crisis_Sentiment   0.00421  0.744468   
2  Investment_Opinion  ~        Ukraine_Sentiment  0.000279  0.583787   
3  Investment_Opinion  ~         Corona_Sentiment  0.115044  0.889758   
4  Economical_Indices  ~  Energy_Crisis_Sentiment       0.0  0.719908   

   p-value3  
0  0.219913  
1  0.146996  
2  0.232751  
3   0.27319  
4       0.0  
                 lval op                     rval  Estimate1  Estimate2  \
0  Investment_Opinion  ~    Information_Awareness   0.618859   0.281369   
1  Investment_Opinion  ~  Energy_Crisis_Sentiment  -0.512049  -0.037828   
2  Investment_Opinion  ~        Ukraine_Sentiment  -0.182579   0.033887   
3  Investment_Opinion  ~         Corona_Sentiment  -0.752399  -0.116049   
4  Economical_Indices  ~  Energy_Crisis_Sentiment   0.713867  -0.029526   

   Estimate3  
0   1.49559

In [23]:
model = """
    # Latent Variables:
    Information_Awareness =~ F5aA1_1 + F5aA2_1 + F5aA3_1 + F5bA1_1 + F5bA2_1 + F5bA3_1 + F5bA4_1 + F5bA5_1 + F5A10_1 + F5A11_1 + F5A12_1 + F5A13_1 + F5A14_1

    Investment_Opinion =~ F3A21_1 + F5A10_2
    
    Energy_Crisis_Sentiment =~ F1A13_1 + F1A14_1
    
    Economical_Indices =~ inflation_rate + interest_rate + dax_points + MSCI_world

    Ukraine_Sentiment =~ F2A14 + F2A6

    Corona_Sentiment =~ F3A16_1 + F3A17_1
        
    # Regression:
    Investment_Opinion ~ Information_Awareness + Energy_Crisis_Sentiment + Ukraine_Sentiment + Corona_Sentiment
    Economical_Indices ~ Energy_Crisis_Sentiment + Ukraine_Sentiment + Corona_Sentiment
   
    # Correlations:
    inflation_rate ~~ dax_points
    inflation_rate ~~ interest_rate
    dax_points ~~ interest_rate
    MSCI_world ~~ dax_points
    MSCI_world ~~ interest_rate
    MSCI_world ~~ inflation_rate
    F3A21_1 ~~ F5A10_2
    F5bA4_1 ~~ F5aA1_1
    F5bA2_1 ~~ F5aA3_1
    F5bA3_1 ~~ F5bA2_1
    F5A11_1 ~~ F5A10_1
    F5A10_1 ~~ F5A13_1
    F5A13_1 ~~ F5A11_1
    F3A17_1 ~~ F3A16_1
    F1A13_1 ~~ F1A14_1
    
"""

data = load_data()
data = add_external_data(data)
data = get_data_since_date(data, '2023-04-05')
data = encode_data_to_numeric(data)

relevant_columns = ['inflation_rate', 'interest_rate', 'dax_points', 'MSCI_world', 'F3A21_1', 'F5A10_2', 'F5aA1_1', 'F5aA2_1', 'F5aA3_1', 'F5bA1_1', 'F5bA2_1', 'F5bA3_1', 'F5bA4_1', 'F5bA5_1', 'F5A10_1', 'F5A11_1', 'F5A12_1', 'F5A13_1', 'F5A14_1', 'F3A16_1', 'F3A17_1', 'F2A6', 'F2A14', 'F1A13_1', 'F1A14_1']

data = data[relevant_columns]
data['inflation_rate'] = -data['inflation_rate']
data['F1A14_1'] = -data['F1A14_1']

data = fill_nan_individually(data)
data = min_max_scale_data(data)

mod = Model(model)


r=mod.fit(data)
print(r)
    

param_est = mod.inspect()[['lval', 'op', 'rval', 'Estimate']]
param_p = mod.inspect()[['lval', 'op', 'rval', 'p-value']]
df_est = pd.DataFrame(param_est[param_est['op'] != '~~'])
df_p = pd.DataFrame(param_p[param_p['op'] != '~~'])


g = semplot(mod, "../results/sem/model.png")

report(mod, "../results/sem/model_report")

df_p.to_csv('/Users/inagege/Documents/00_Uni/SeminarSocialSentimentInTimesOfCrisis/results/sem/all_p', index=False)
df_est.to_csv('/Users/inagege/Documents/00_Uni/SeminarSocialSentimentInTimesOfCrisis/results/sem/all_estimates', index=False)
df_est.head()

  return pd.read_csv("../Data/data_sample_700_SOSEC_dataset_germany.csv")
[*********************100%***********************]  1 of 1 completed


Name of objective: MLW
Optimization method: SLSQP
Optimization successful.
Optimization terminated successfully
Objective value: 1.974
Number of iterations: 114
Params: 1.033 0.229 0.837 0.293 0.362 0.941 0.797 1.328 1.183 1.306 1.374 0.477 1.595 0.862 0.662 1.057 1.104 1.451 1.587 0.514 -1.110 -0.495 -1.499 0.596 0.423 0.865 0.096 0.435 0.416 -0.073 0.347 0.251 0.098 0.195 0.288 0.123 0.719 0.421 0.802 0.342 0.837 0.426 0.971 0.416 0.687 0.283 0.606 0.287 0.578 0.389 0.924 0.400 0.874 0.030 0.549 0.126 0.007 -0.073 -0.178 0.907 0.594 0.145 0.970 0.287 0.619 0.949 0.777 0.762 0.988 0.843 0.858 0.223 0.048 0.045 0.129 0.406 -0.030 0.743




Unnamed: 0,lval,op,rval,Estimate
0,Investment_Opinion,~,Information_Awareness,0.514286
1,Investment_Opinion,~,Energy_Crisis_Sentiment,-1.110308
2,Investment_Opinion,~,Ukraine_Sentiment,-0.495112
3,Investment_Opinion,~,Corona_Sentiment,-1.499473
4,Economical_Indices,~,Energy_Crisis_Sentiment,0.595665
