# import

In [27]:
import pandas as pd
import numpy as np
from semopy import Model
from semopy import calc_stats
from sklearn.preprocessing import StandardScaler

# Read_Data

In [35]:
hotel = pd.read_csv("../final_data/hotel_sentiment.csv")
amazon = pd.read_csv("../final_data/amazon_sentiment.csv")
coursera = pd.read_csv("../final_data/coursera_sentiment.csv")
audible = pd.read_csv("../final_data/audible_sentiment.csv")

amazon.dtypes

Product_Name                  object
Num_of_Ratings                 int64
Rating                         int64
Review_Title                  object
Review_Text                   object
Helpfulness                    int64
Is_Photo                       int64
Price                        float64
Time_Lapsed                    int64
Average_Rating               float64
Title_Length                   int64
Text_Length                    int64
Deviation_Of_Star_Ratings    float64
FOG_Index                    float64
Flesch_Reading_Ease          float64
Depth                        float64
Breadth                      float64
Valence                      float64
Arousal                      float64
Interaction                  float64
dtype: object

# Scaling

In [29]:
# 'Helpfulness' 컬럼을 제외한 수치형 컬럼 선택
hotel_numeric_cols = hotel.select_dtypes(include=['float64', 'int64']).drop(columns=['Helpfulness'], errors='ignore').dropna()
amazon_numeric_cols = amazon.select_dtypes(include=['float64', 'int64']).drop(columns=['Helpfulness'], errors='ignore').dropna()
coursera_numeric_cols = coursera.select_dtypes(include=['float64', 'int64']).drop(columns=['Helpfulness'], errors='ignore').dropna()
audible_numeric_cols = audible.select_dtypes(include=['float64', 'int64']).drop(columns=['Helpfulness'], errors='ignore').dropna()

# StandardScaler로 스케일링 적용
scaler = StandardScaler()
hotel_scaled = hotel.copy()
amazon_scaled = amazon.copy()
coursera_scaled = coursera.copy()
audible_scaled = audible.copy()

# 'Helpfulness' 제외하고 스케일링 적용
hotel_scaled[hotel_numeric_cols.columns] = scaler.fit_transform(hotel_numeric_cols)
amazon_scaled[amazon_numeric_cols.columns] = scaler.fit_transform(amazon_numeric_cols)
coursera_scaled[coursera_numeric_cols.columns] = scaler.fit_transform(coursera_numeric_cols)
audible_scaled[audible_numeric_cols.columns] = scaler.fit_transform(audible_numeric_cols)

# Covariance-Based SEM

## Hotel

In [30]:
# SEM 모델 정의
desc = """
# Latent variables
Heuristic =~ Rating + Average_Rating + Is_Photo + Text_Length + Time_Lapsed + Deviation_Of_Star_Ratings 
Systematic =~ FOG_Index + Depth + Breadth + Interaction

# Structural relationships
Helpfulness ~ Heuristic + Systematic
"""

# 모델 생성 및 피팅
model = Model(desc)
model.fit(hotel_scaled)  

# 결과 출력
params = model.inspect()
print(params)

# 🔹 모델 적합도 지표 계산
fit_stats = calc_stats(model)

# 🔹 적합도 지표 출력
print("\n모델 적합도 지표:")
print(fit_stats)

                         lval  op                       rval      Estimate  \
0                      Rating   ~                  Heuristic  1.000000e+00   
1              Average_Rating   ~                  Heuristic  2.422978e-01   
2                    Is_Photo   ~                  Heuristic  1.867192e-02   
3                 Text_Length   ~                  Heuristic -2.068267e-01   
4                 Time_Lapsed   ~                  Heuristic  2.166463e-02   
5   Deviation_Of_Star_Ratings   ~                  Heuristic -5.647456e-01   
6                   FOG_Index   ~                 Systematic  1.000000e+00   
7                       Depth   ~                 Systematic -1.169532e+02   
8                     Breadth   ~                 Systematic  8.459015e+01   
9                 Interaction   ~                 Systematic -1.147844e+01   
10                Helpfulness   ~                  Heuristic -4.492274e-02   
11                Helpfulness   ~                 Systematic -2.

## amazon

In [31]:
# SEM 모델 정의
desc = """
# Latent variables
Heuristic =~ Rating + Average_Rating + Is_Photo + Text_Length + Time_Lapsed + Deviation_Of_Star_Ratings 
Systematic =~ FOG_Index + Depth + Breadth + Interaction

# Structural relationships
Helpfulness ~ Heuristic + Systematic
"""

# 모델 생성 및 피팅
model = Model(desc)
model.fit(amazon_scaled)  

# 결과 출력
params = model.inspect()
print(params)

# 🔹 모델 적합도 지표 계산
fit_stats = calc_stats(model)

# 🔹 적합도 지표 출력
print("\n모델 적합도 지표:")
print(fit_stats)

                         lval  op                       rval  Estimate  \
0                      Rating   ~                  Heuristic  1.000000   
1              Average_Rating   ~                  Heuristic  0.248084   
2                    Is_Photo   ~                  Heuristic -0.024270   
3                 Text_Length   ~                  Heuristic -0.135524   
4                 Time_Lapsed   ~                  Heuristic  0.055307   
5   Deviation_Of_Star_Ratings   ~                  Heuristic -1.020218   
6                   FOG_Index   ~                 Systematic  1.000000   
7                       Depth   ~                 Systematic  3.176920   
8                     Breadth   ~                 Systematic -5.055512   
9                 Interaction   ~                 Systematic  0.100507   
10                Helpfulness   ~                  Heuristic -0.094245   
11                Helpfulness   ~                 Systematic  0.733895   
12                  Heuristic  ~~     

## coursera

In [32]:
# SEM 모델 정의
desc = """
# Latent variables
Heuristic =~ Rating + Average_Rating + Is_Photo + Text_Length + Time_Lapsed + Deviation_Of_Star_Ratings 
Systematic =~ FOG_Index + Depth + Breadth + Interaction

# Structural relationships
Helpfulness ~ Heuristic + Systematic
"""

# 모델 생성 및 피팅
model = Model(desc)
model.fit(coursera_scaled)  

# 결과 출력
params = model.inspect()
print(params)

# 🔹 모델 적합도 지표 계산
fit_stats = calc_stats(model)

# 🔹 적합도 지표 출력
print("\n모델 적합도 지표:")
print(fit_stats)

KeyError: 'Variables FOG_Index, Is_Photo are missing from data.'

## audible

In [33]:
# SEM 모델 정의
desc = """
# Latent variables
Heuristic =~ Rating + Average_Rating + Is_Photo + Text_Length + Time_Lapsed + Deviation_Of_Star_Ratings 
Systematic =~ FOG_Index + Depth + Breadth + Interaction

# Structural relationships
Helpfulness ~ Heuristic + Systematic
"""

# 모델 생성 및 피팅
model = Model(desc)
model.fit(audible_scaled)  

# 결과 출력
params = model.inspect()
print(params)

# 🔹 모델 적합도 지표 계산
fit_stats = calc_stats(model)

# 🔹 적합도 지표 출력
print("\n모델 적합도 지표:")
print(fit_stats)

KeyError: 'Variables FOG_Index, Breadth, Is_Photo, Depth, Time_Lapsed, Deviation_Of_Star_Ratings, Text_Length are missing from data.'