In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
import statsmodels.api as sm
import seaborn as sns
import mne
import openpyxl as op
import matplotlib.cm as cm

# Supplementary Table 1

In [None]:
magnesium = pd.read_excel(r"D:\주성\documents\2025\Mg comma\clinical features2.xlsx", sheet_name='Table2')
magnesium1 = magnesium.loc[magnesium['Mg Protocol Type']==1]
magnesium2 = magnesium.loc[magnesium['Mg Protocol Type']==2]
print(magnesium1.describe())
print(magnesium2.describe())

from scipy.stats import mannwhitneyu as mann
for col in ['Mg Starting Date', 'Mg Infusion Day', 'Loading Mg', 'Maintenance Mg', 'Total Mg', 'Max concentration']:
    s, p = mann(magnesium1[col], magnesium2[col])
    print(col, s, p)

# Figure 4

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm  # For colormap

# Load data
file_path = r"D:\주성\documents\2025\Mg comma\차트리뷰\clinical features9.xlsx"
sheet_name_power = 'pharmacoEEG'
sheet_name_feature = 'clinical_data'

df = pd.read_excel(file_path, sheet_name=sheet_name_power)
response = pd.read_excel(file_path, sheet_name=sheet_name_feature)[['Patient No.', 'response_10']]
response.rename(columns={"Patient No.": "ID", "response_10": 'response'}, inplace=True)
df = df.merge(response, on='ID', how='left')

# Extract unique bands
bands = df["band"].unique()

# Set colormap (True = Blue scale, False = Red scale)
cmap_true = cm.get_cmap("Blues")  # Response=True group
cmap_false = cm.get_cmap("Reds")  # Response=False group

# Assign rankings separately for each band
df["rank_true"] = np.nan
df["rank_false"] = np.nan

for band in bands:
    band_data = df[df["band"] == band]

    # Rank within each band separately
    true_group = band_data[band_data["response"] == True]
    if not true_group.empty:
        df.loc[true_group.index, "rank_true"] = true_group["post_change"].abs().rank(method="min", pct=True)

    false_group = band_data[band_data["response"] == False]
    if not false_group.empty:
        df.loc[false_group.index, "rank_false"] = false_group["post_change"].abs().rank(method="min", pct=True)

# Plot change rate trends for each band
for band in bands:
    fig, ax = plt.subplots(figsize=(12, 6))  # Define Axes explicitly
    
    # Filter data for the current band
    band_data = df[df["band"] == band]

    # Plot individual patient change rates
    for _, row in band_data.iterrows():
        patient_id = row["ID"]
        pre_change = 0  # Pre-timepoint is always 0
        post_change = row["post_change"]
        day1_change = row["day1_change"]
        response_status = row["response"]

        # Assign colors based on rank within each band
        if response_status:
            norm_value = row["rank_true"] if not pd.isna(row["rank_true"]) else 0.5  # Default to mid-value
            color = cmap_true(norm_value)
        else:
            norm_value = row["rank_false"] if not pd.isna(row["rank_false"]) else 0.5  # Default to mid-value
            color = cmap_false(norm_value)

        # Adjust line thickness based on rank
        line_width = 1 + (norm_value * 4)  # Minimum width 1, max width 5
        marker_size = 6  # Keep marker size constant

        # Plot the change rate
        ax.plot(["pre", "post", "day1"], [pre_change, post_change, day1_change],
                 marker='o', linestyle='-', color=color, linewidth=line_width, markersize=marker_size, alpha=0.8)

    # Set plot title and labels
    ax.set_title(f"Change Rate Trend - {band} Band (Rank-based Coloring)", fontsize=14)
    ax.set_xlabel("Time", fontsize=12)
    ax.set_ylabel("Change Rate", fontsize=12)
    ax.axhline(y=0, color='gray', linestyle='--', alpha=0.5)
    ax.grid(True)

    # ✅ Set colorbar with actual percentile rank
    sm_true = cm.ScalarMappable(cmap=cmap_true, norm=plt.Normalize(vmin=0, vmax=1))
    sm_false = cm.ScalarMappable(cmap=cmap_false, norm=plt.Normalize(vmin=0, vmax=1))

    # ✅ Adjust colorbar labels for correct rank representation
    cbar_true = plt.colorbar(sm_true, ax=ax, fraction=0.03, pad=0.02)
    cbar_true.set_label("Percentile Rank (Response: True)")

    cbar_false = plt.colorbar(sm_false, ax=ax, fraction=0.03, pad=0.06)
    cbar_false.set_label("Percentile Rank (Response: False)")

    plt.show()


In [None]:
response

In [None]:
file_path = r"D:\주성\documents\2025\Mg comma\차트리뷰\clinical features9.xlsx"
sheet_name = 'pharmacoEEG'

df = pd.read_excel(file_path, sheet_name = sheet_name)

# 밴드별로 데이터 분리하기
bands = df['band'].unique()  # 모든 고유한 밴드 값 추출

# 그래프 스타일 설정
plt.style.use('ggplot')
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.size'] = 10

# 밴드 이름 첫 글자 대문자로 변경하는 함수
def capitalize_band(band_name):
    return band_name.capitalize()

# 큰 도표 생성 (3x2 그리드)
fig, axes = plt.subplots(3, 2, figsize=(18, 15))
axes = axes.flatten()  # 2D 배열을 1D로 변환하여 접근 용이하게 함

# 각 밴드별 그래프 생성 (첫 5개 서브플롯)
for i, band in enumerate(bands[:5]):  # 5개 밴드까지만 처리
    # 해당 밴드의 데이터만 필터링
    band_data = df[df['band'] == band].copy()
    
    # 각 환자 ID별 데이터 시각화
    for patient_id in band_data['ID'].unique():
        patient_data = band_data[band_data['ID'] == patient_id]
        
        # 시간 포인트를 x축으로 사용하기 위한 데이터 준비
        time_points = ['pre', 'post', 'day1']
        changes = [
            0,  # pre는 기준점이므로 변화율은 0
            patient_data['post_change'].values[0] if not patient_data['post_change'].isna().all() else np.nan,
            patient_data['day1_change'].values[0] if not patient_data['day1_change'].isna().all() else np.nan
        ]
        
        # 결측치가 있는 부분은 점선으로 표시
        valid_indices = ~np.isnan(changes)
        
        # 선 그래프로 표시 (결측치가 없는 부분)
        axes[i].plot(
            np.array(time_points)[valid_indices], 
            np.array(changes)[valid_indices], 
            marker='o', 
            label=f'ID: {patient_id}'
        )
        
        # 결측치가 있는 경우 점선으로 연결
        if not all(valid_indices):
            for j in range(len(time_points)-1):
                if valid_indices[j] and valid_indices[j+1]:
                    continue  # 두 지점 모두 유효하면 실선으로 이미 그려짐
                if valid_indices[j] or valid_indices[j+1]:  # 적어도 하나는 유효
                    axes[i].plot(
                        [time_points[j], time_points[j+1]], 
                        [changes[j], changes[j+1]], 
                        linestyle='--', 
                        alpha=0.5, 
                        color='gray'
                    )
    
    # 그래프 꾸미기
    axes[i].axhline(y=0, color='black', linestyle='-', alpha=0.3)  # 0 기준선 추가
    axes[i].set_title(f'{capitalize_band(band)} Band Power Change Rate by Patient ID')
    axes[i].set_xlabel('Time Point')
    axes[i].set_ylabel('Relative Change in Power (%)')
    axes[i].set_xticks(range(len(time_points)))
    axes[i].set_xticklabels(time_points)
    
    # 환자 ID가 많은 경우 범례를 그래프 밖에 표시하지 않고 제외
    if len(band_data['ID'].unique()) <= 10:
        axes[i].legend(fontsize='small')

# 마지막 서브플롯(6번째)에 박스플롯 두 개 그리기
# Post 변화율에 대한 박스플롯
box_data = pd.melt(df, id_vars=['band'], value_vars=['post_change', 'day1_change'], 
                   var_name='time_point', value_name='change_rate')
box_data['time_point'] = box_data['time_point'].map({'post_change': 'Post', 'day1_change': 'Day 1'})

# 밴드 이름 첫 글자 대문자로 변경
box_data['band'] = box_data['band'].apply(capitalize_band)

sns.boxplot(x='band', y='change_rate', hue='time_point', data=box_data, ax=axes[5])
axes[5].set_title('Change Rate by Band and Time Point')
axes[5].set_xlabel('Band')
axes[5].set_ylabel('Relative Change in Power')
axes[5].axhline(y=0, color='red', linestyle='--')
axes[5].legend(title='Time Point')

# 전체 레이아웃 조정
plt.tight_layout()
plt.savefig('all_bands_change_rate.png', dpi=300)
plt.show()

# 이전 버전의 post의 relative power에 대한 오름차순 막대그래프

In [None]:
df = pd.read_csv(r"E:\주성\documents\2025\Mg comma\data2.csv")

# CSV 파일 읽기
data = df

# 밴드별로 데이터 나누기
bands = ['delta', 'theta', 'alpha', 'beta', 'gamma']

plt.figure(figsize=(15, 10))
for idx, band in enumerate(bands, 1):
    plt.subplot(2, 3, idx)
    
    # 해당 밴드의 데이터만 추출
    band_data = data[data['band'] == band].copy()
    
    # 변화율 계산 ((post - pre) / pre)
    change = (band_data['mean_post_infusion'] - band_data['mean_pre_infusion']) / band_data['mean_pre_infusion']
    
    # 오름차순 정렬
    sorted_change = np.sort(change)
    
    # 바 차트 그리기
    plt.bar(range(len(sorted_change)), sorted_change)
    plt.axhline(y=0, color='black', linestyle='-', linewidth=0.5)
    plt.title(f'{band.capitalize()} Band')
    plt.ylim(-1, 2)  # y축 범위 수정

plt.suptitle('Power Change by Frequency Band', fontsize=14, y=1.02)    
plt.tight_layout()
plt.show()

# Table 3

비율 만드는 코드

In [None]:
# clinical feature
feature = pd.read_excel(r"E:\주성\documents\2025\Mg comma\clinical features5.xlsx", sheet_name = 'Sheet1')
feature = feature.iloc[:-2, :]

# 파생변수 
feature['Immunotherapy'] = ~feature['Immunotherapy Medications'].isna()
feature['CIVADs'] = ~feature['CIVAD Medications'].isna()
feature['STESS_5'] = feature['STESS'] >= 5
feature['mRS_4'] = feature['mRS_premorbid'] >= 4
feature['CIVADs_num'] = feature['CIVAD Medications'].apply(lambda x: sum(1 for c in str(x) if c.isupper()) if pd.notna(x) else 0)
feature['Age_65'] = feature['Age'] >=65
feature['Mg_change'] = feature['max_Mg_conc'] - feature['Mg_baseline']
feature['Mg_8.5'] = feature['max_Mg_conc'] >= 8.5
feature['mRS_Discharge_4'] = feature['mRS_Discharge'] >= 4

# Total subject
total_subject = feature['response'].sum()
print("total response subject:", total_subject)

# 범주형
feature_cat = feature[['Class_SRSE', 'CIVADs', 'Sex', 'Immunotherapy',
    'Class_NORSE', 'Mg type', 'STESS_5', 'mRS_4', 'Age_65', 'Etiology', 
    'Sz Type', 'Mg_8.5', 'mRS_Discharge_4', 'Inhospital_Mortality'
]]

for col in feature_cat.columns:
    cross = pd.crosstab(feature_cat[col], feature['response'])
    print(cross)

# 연속형
feature_num = feature[['CIVADs_num', 'AED_number', 'Age', 'STESS',  'mRS_premorbid', 'max_Mg_conc', 'Mg_change', 'response', 'mRS_Discharge', 'Hospital_days', 'SE_Duration']]
print("responder", "\n", feature_num.loc[feature_num['response']==1].describe())
print("non-responder", "\n", feature_num.loc[feature_num['response']==0].describe())


통계값 구하기

In [None]:
# clinical feature
feature = pd.read_excel(r"D:\주성\documents\2025\Mg comma\clinical features8.xlsx", sheet_name = 'Sheet1')
feature = feature.iloc[:-2, :]

# 파생변수 
feature['Immunotherapy'] = ~feature['Immunotherapy Medications'].isna()
feature['CIVADs'] = ~feature['CIVAD Medications'].isna()
feature['STESS_5'] = feature['STESS'] >= 5
feature['mRS_4'] = feature['mRS_premorbid'] >= 4
feature['CIVADs_num'] = feature['CIVAD Medications'].apply(lambda x: sum(1 for c in str(x) if c.isupper()) if pd.notna(x) else 0)
feature['Age_65'] = feature['Age'] >=65
feature['Mg_change'] = feature['max_Mg_conc'] - feature['Mg_baseline']
feature['Mg_8.5'] = feature['max_Mg_conc'] >= 8.5
feature['mRS_Discharge_4'] = feature['mRS_Discharge'] >= 4

# fisher exact test

def fisher(table, alpha=0.05):
    # Fisher’s Exact Test 수행
    odds_ratio, p_value = stats.fisher_exact(table)
    return p_value 

# chi square test
def chisquare (table, alpha=0.05):
    chi, p_value, dof, exp = stats.chi2_contingency(table)
    return p_value

feature_cat = feature[['Class_SRSE', 'CIVADs', 'Sex', 'Immunotherapy',
    'Class_NORSE', 'Mg type', 'STESS_5', 'mRS_4', 'Age_65', 'Mg_8.5', 'mRS_Discharge_4', 'Inhospital_Mortality']]

feature_mulvar = feature[['Etiology', 'Sz Type']]

feature_num = feature[['CIVADs_num', 'AED_number', 
'Age', 'STESS',  'mRS_premorbid', 'max_Mg_conc', 'Mg_change', 'response_10', 'mRS_Discharge', 'Hospital_days', 'SE_Duration']]

results = {}
for col in feature_cat.columns:
    table = pd.crosstab(feature_cat[col], feature['response_10'])
    p_value = fisher(table)
    results[col] = {'p-value': p_value}

for col in feature_mulvar.columns:
    table = pd.crosstab(feature_mulvar[col], feature['response_10'])
    p_value = chisquare(table)
    results[col] = {'p-value': p_value}

for col in feature_num.columns:
    x = feature_num.loc[feature_num['response_10']==True][col]
    y = feature_num.loc[feature_num['response_10']==False][col]
    statistics, p_value = stats.mannwhitneyu(x, y)
    results[col] = {'p-value': p_value}

result_df = pd.DataFrame(results).T

In [None]:
result_df

yes-no 처리방법에 따른 차이

In [None]:
# clinical feature
feature = pd.read_excel(r"D:\주성\documents\2025\Mg comma\clinical features6.xlsx", sheet_name = 'Sheet1')
feature = feature.iloc[:-2, :]

# 파생변수 
feature['Immunotherapy'] = ~feature['Immunotherapy Medications'].isna()
feature['CIVADs'] = ~feature['CIVAD Medications'].isna()
feature['STESS_5'] = feature['STESS'] >= 5
feature['mRS_4'] = feature['mRS_premorbid'] >= 4
feature['CIVADs_num'] = feature['CIVAD Medications'].apply(lambda x: sum(1 for c in str(x) if c.isupper()) if pd.notna(x) else 0)
feature['Age_65'] = feature['Age'] >=65
feature['Mg_change'] = feature['max_Mg_conc'] - feature['Mg_baseline']
feature['Mg_8.5'] = feature['max_Mg_conc'] >= 8.5
feature['mRS_Discharge_4'] = feature['mRS_Discharge'] >= 4

# fisher exact test

def fisher(table, alpha=0.05):
    # Fisher’s Exact Test 수행
    odds_ratio, p_value = stats.fisher_exact(table)
    return p_value 

# chi square test
def chisquare (table, alpha=0.05):
    chi, p_value, dof, exp = stats.chi2_contingency(table)
    return p_value

feature_cat = feature[['Class_SRSE', 'CIVADs', 'Sex', 'Immunotherapy',
    'Class_NORSE', 'Mg type', 'STESS_5', 'mRS_4', 'Age_65', 'Mg_8.5', 'mRS_Discharge_4', 'Inhospital_Mortality']]

feature_mulvar = feature[['Etiology', 'Sz Type']]

feature_num = feature[['CIVADs_num', 'AED_number', 
'Age', 'STESS',  'mRS_premorbid', 'max_Mg_conc', 'Mg_change', 'mRS_Discharge', 'Hospital_days', 'SE_Duration']]

results = {}
for col in feature_cat.columns:
    table = pd.crosstab(feature_cat[col], feature['response_yesnoyes'])
    p_value = fisher(table)
    results[col] = {'p-value': p_value}

for col in feature_mulvar.columns:
    table = pd.crosstab(feature_mulvar[col], feature['response_yesnoyes'])
    p_value = chisquare(table)
    results[col] = {'p-value': p_value}

for col in feature_num.columns:
    if feature_num[col].isna().sum() != 0:
        feature_num2 = feature_num.copy().dropna() 
        x = feature_num2.loc[feature['response_yesnoyes']=="y"][col]
        y = feature_num2.loc[feature['response_yesnoyes']=="n"][col]
        statistics, p_value = stats.mannwhitneyu(x, y)
        results[col] = {'p-value': p_value}
    else: 
        x = feature_num.loc[feature['response_yesnoyes']=="y"][col]
        y = feature_num.loc[feature['response_yesnoyes']=="n"][col]
        statistics, p_value = stats.mannwhitneyu(x, y)
        results[col] = {'p-value': p_value}        

result_df1 = pd.DataFrame(results).T

results = {}
for col in feature_cat.columns:
    table = pd.crosstab(feature_cat[col], feature['response_yesnoout'])
    p_value = fisher(table)
    results[col] = {'p-value': p_value}

for col in feature_mulvar.columns:
    table = pd.crosstab(feature_mulvar[col], feature['response_yesnoout'])
    p_value = chisquare(table)
    results[col] = {'p-value': p_value}

for col in feature_num.columns:
    if feature_num[col].isna().sum() != 0:
        feature_num2 = feature_num.copy().dropna() 
        x = feature_num2.loc[feature['response_yesnoout']=="y"][col]
        y = feature_num2.loc[feature['response_yesnoout']=="n"][col]
        statistics, p_value = stats.mannwhitneyu(x, y)
        results[col] = {'p-value': p_value}
    else: 
        x = feature_num.loc[feature['response_yesnoout']=="y"][col]
        y = feature_num.loc[feature['response_yesnoout']=="n"][col]
        statistics, p_value = stats.mannwhitneyu(x, y)
        results[col] = {'p-value': p_value}        

result_df2 = pd.DataFrame(results).T

result_df = result_df1.join(result_df2, how='inner', lsuffix='_yes_no_to_yes', rsuffix='_yes_no_exclude')

In [None]:
import pandas as pd
import numpy as np
import scipy.stats as stats
import statsmodels.api as sm
from scipy.stats import chi2_contingency, fisher_exact

# clinical feature
feature = pd.read_excel(r"D:\주성\documents\2025\Mg comma\clinical features6.xlsx", sheet_name = 'Sheet1')
feature = feature.iloc[:-2, :]

# 파생변수 
feature['Immunotherapy'] = ~feature['Immunotherapy Medications'].isna()
feature['CIVADs'] = ~feature['CIVAD Medications'].isna()
feature['STESS_5'] = feature['STESS'] >= 5
feature['mRS_4'] = feature['mRS_premorbid'] >= 4
feature['CIVADs_num'] = feature['CIVAD Medications'].apply(lambda x: sum(1 for c in str(x) if c.isupper()) if pd.notna(x) else 0)
feature['Age_65'] = feature['Age'] >=65
feature['Mg_change'] = feature['max_Mg_conc'] - feature['Mg_baseline']
feature['Mg_8.5'] = feature['max_Mg_conc'] >= 8.5
feature['mRS_Discharge_4'] = feature['mRS_Discharge'] >= 4

# 변수 형태에 따라 나누기
feature_cat = feature[['Class_SRSE', 'CIVADs', 'Sex', 'Immunotherapy',
    'Class_NORSE', 'Mg type', 'STESS_5', 'mRS_4', 'Age_65', 'Mg_8.5', 'mRS_Discharge_4', 'Inhospital_Mortality']]

feature_mulvar = feature[['Etiology', 'Sz Type']]

feature_num = feature[['CIVADs_num', 'AED_number', 
'Age', 'STESS',  'mRS_premorbid', 'max_Mg_conc', 'Mg_change', 'mRS_Discharge', 'Hospital_days', 'SE_Duration']]


cross = pd.crosstab(feature_cat['STESS_5'], feature['response'])
cross
table2x2 = sm.stats.Table2x2(cross)

odds_ratio = table2x2.oddsratio
odds_ratio_ci = table2x2.oddsratio_confint()

cross

In [None]:
import numpy as np
import pandas as pd

# 예시 2x2 분할표: [[a, b], [c, d]]
observed_table = np.array([[2, 10],
                           [5, 3]])

def compute_odds_ratio(table):
    # a, b, c, d
    a, b = table[0]
    c, d = table[1]
    # b나 c가 0이면 계산 시 문제가 발생할 수 있으니, 작은 값(예: 0.5)을 더해 조정할 수 있음
    return (a * d) / (b * c) if (b * c) != 0 else np.nan

# 부트스트랩 함수
def bootstrap_odds_ratio(observed_table, n_bootstrap=10000):
    # 원 데이터에 기반해 '개별 관측치'를 재구성합니다.
    # 여기서는 각 셀의 값이 빈도를 나타내므로, 각 셀의 값을 해당 빈도만큼의 '사건'으로 재구성
    a, b = observed_table[0]
    c, d = observed_table[1]
    
    # 두 변수에 대한 각 관측치를 리스트 형태로 생성
    group1 = [1] * a + [0] * b
    group2 = [1] * c + [0] * d
    
    n1 = len(group1)
    n2 = len(group2)
    
    boot_odds = []
    for _ in range(n_bootstrap):
        sample1 = np.random.choice(group1, size=n1, replace=True)
        sample2 = np.random.choice(group2, size=n2, replace=True)
        # 재구성된 분할표
        a_bs = np.sum(sample1)
        b_bs = n1 - a_bs
        c_bs = np.sum(sample2)
        d_bs = n2 - c_bs
        # 만약 0으로 나누는 상황을 피하기 위해서 조정
        if b_bs == 0 or c_bs == 0:
            continue
        table_bs = np.array([[a_bs, b_bs],
                             [c_bs, d_bs]])
        boot_odds.append(compute_odds_ratio(table_bs))
    
    boot_odds = np.array(boot_odds)
    lower = np.percentile(boot_odds, 2.5)
    upper = np.percentile(boot_odds, 97.5)
    return np.median(boot_odds), (lower, upper)

# 부트스트랩 실행
median_or, ci = bootstrap_odds_ratio(observed_table, n_bootstrap=10000)
print(f"부트스트랩 기반 오즈비 중앙값: {median_or:.4f}")
print(f"부트스트랩 기반 95% 신뢰구간: {ci}")

In [None]:
for col in feature_cat.columns:
    print(col)
    print(feature_cat[col].unique())
    print(feature_cat[col].dtype)
    print(pd.crosstab(feature_cat[col], feature['response']))


In [None]:
# 데이터프레임 처리
df['mRS_3mo'] = pd.to_numeric(df['mRS_3mo_text'], errors='coerce')
df = df.dropna(subset=['mRS_3mo'])

# 두 그룹 데이터 추출
group1 = df.loc[df['response_10']==True, 'mRS_3mo'].values 
group2 = df.loc[df['response_10']==False, 'mRS_3mo'].values 

# Mann-Whitney U 테스트 실행
s, p = stats.mannwhitneyu(group1, group2)

print(f"통계량: {s}, p-값: {p}")

# mRS >= 4
df['mRS_3mo_4'] = df['mRS_3mo'] >= 4
cross = pd.crosstab(df['mRS_3mo_4'], df['response_10'])
s, p = stats.fisher_exact(cross)
print(f'통계량: {s},  p-값: {p}')

# mRS >= 3
df['mRS_3mo_3'] = df['mRS_3mo'] >= 3
cross = pd.crosstab(df['mRS_3mo_3'], df['response_10'])
s, p = stats.fisher_exact(cross)
print(f'통계량: {s},  p-값: {p}')

# 타임 포인트에 대한 RM ANOVA (개인 수준의 분석) at Results

정규성 검정을 일단 하고, ANOVA 및 partial eta-squared와 omega squared를 구해보자

In [None]:
name_list = [
    고정실,김가람,김득실,김영현,김충연,민병춘1,박주연1,
    벌,안중훈,윤병시,이미우,임석봉,전창희,
    정금례,정용태,이귀임,정복연,김정한,정광훈1,조진욱]



In [None]:
# 부분 에타 제곱 계산 함수
def partial_eta_squared(f_value, df_effect, df_error):
    """
    계산된 F 값과 자유도로부터 부분 에타 제곱 계산
    
    Parameters:
    -----------
    f_value : float
        F-통계량 값
    df_effect : int
        효과(처리)의 자유도
    df_error : int
        오차의 자유도
        
    Returns:
    --------
    float
        부분 에타 제곱 값 (0~1 사이)
    """
    return (f_value * df_effect) / (f_value * df_effect + df_error)

# Define Cohens'D
def cohens_d(group1, group2):
    mean1, mean2 = np.mean(group1), np.mean(group2)
    std1, std2 = np.std(group1, ddof=1), np.std(group2, ddof=1)
    n1, n2 = len(group1), len(group2)
    pooled_std = np.sqrt(((n1 - 1) * std1**2 + (n2 - 1) * std2**2) / (n1 + n2 - 2))
    return (mean1 - mean2) / pooled_std



In [None]:
# Removing large artifact by band 함수 정의
def remove_artifacts(before_mean, after_mean, large_artifact):
    before_idx = set()
    after_idx = set()

    for t_start, t_end in large_artifact:
        if t_start < 3600 and t_end < 3600:
            before_idx.update(range(t_start*200, t_end*200))
            after_idx.update(range(t_start*200, t_end*200))
        elif t_start < 3600 and t_end >= 3600:
            before_idx.update(range(t_start*200, 3600*200))
            after_idx.update(range(t_start*200, 3600*200))
            after_idx.update(range(0, (t_end-3600)*200))
            before_idx.update(range(0, (t_end-3600)*200))
        else:
            after_idx.update(range((t_start-3600)*200, (t_end-3600)*200))
            before_idx.update(range((t_start-3600)*200, (t_end-3600)*200))

    before_idx = sorted(before_idx)
    after_idx = sorted(after_idx)

    return np.delete(before_mean, before_idx), np.delete(after_mean, after_idx)

# Process band data 함수 정의
def process_band_data(tfr_data, tfr_data_day1, large_artifact):
    start1, end1 = 0, 3599 * 200
    start2, end2 = 3600 * 200, 7199 * 200

    # Split before and after
    before = tfr_data[:, :, start1:end1]
    after = tfr_data[:, :, start2:end2]
    day1 = tfr_data_day1

    # Average over channels and frequencies
    before_mean_channel = before.mean(axis=0)
    after_mean_channel = after.mean(axis=0)
    day1_mean_channel = day1.mean(axis=0)
    
    before_mean = before_mean_channel.mean(axis=0)
    after_mean = after_mean_channel.mean(axis=0)
    day1_mean = day1_mean_channel.mean(axis=0)

    before_clean, after_clean = remove_artifacts(before_mean, after_mean, large_artifact)
    day1_clean = day1_mean.copy()
    # Remove artifacts
    return before_clean, after_clean, day1_clean

# Main 함수 정의
def main():
    # Initialize name dictionary
    names = ['고정실', '김가람', '김득실', '김영현', '김충연', '민병춘1', '박주연1', '박주연2', '벌', 
             '안중훈', '윤병시', '이미우', '임석봉', '전창희', '정광훈2', '정금례', '정용태', '이귀임', 
             '정복연', '김정한', '정광훈1', '조진욱']
    name_dict = {name: i+1 for i, name in enumerate(names)}

    # Get analysis targets
    name_list = input("분석대상 이름을 입력하세요. (여러명일 경우 쉼표로 구분)").split(',')
    print(f'분석대상은 {name_list}입니다.')

    ## Load workbook and artifact data
    #wb = op.load_workbook(r"C:\Users\esin4\OneDrive\바탕 화면\Github\Mg_infusion_coma\Mg_infusion_data.xlsx")
    large_artifact_data = pd.read_csv(r"C:\Users\Brain_Science\Documents\GitHub\Mg_infusion_coma\large_artifact.csv", encoding='utf-8-sig')

    # Process each name
    for name in name_list:
        print(f'{name}의 데이터를 분석합니다.')
        
        # Get large artifacts
        large_artifact = []
        if name in large_artifact_data['Name'].values:
            coordinates = large_artifact_data[large_artifact_data['Name'] == name]['Coordinates'].values[0]
            if coordinates != 'skip':
                large_artifact = eval(coordinates)
        print(f"{name}의 large artifact: {large_artifact}")

        ## Get row number
        #row = name_dict.get(name)
        #if row is None:
        #    print(f"{name}이(가) name_dict에 없습니다.")
        #    exit()

        # Store band data in dictionary
        tfr_data = {}
        tfr_data_day1 = {}
        
        # Load band data
        for band in ['Delta', 'Theta', 'Alpha', 'Beta', 'Gamma1', 'Gamma2']:
            file_path = rf'E:\Mg_EEG\tfr_files_gamma\{name}{band}_7200_tfr.h5'
            file_path_day1 = rf"E:\Mg_EEG\tfr_files_subacute\{name}_day1_{band}_tfr.h5"
            tfr = mne.time_frequency.read_tfrs(file_path)
            tfr_day1 = mne.time_frequency.read_tfrs(file_path_day1)
            tfr_data[band] = tfr.data
            tfr_data_day1[band] = tfr_day1.data
            del tfr, tfr_day1
        print(f"{name}의 데이터를 불러왔습니다.")

        # Combine Gamma bands
        if 'Gamma1' in tfr_data and 'Gamma2' in tfr_data:
            tfr_data['Gamma'] = np.concatenate([tfr_data['Gamma1'], tfr_data['Gamma2']], axis=1)
            del tfr_data['Gamma1'], tfr_data['Gamma2']
        if 'Gamma1' in tfr_data_day1 and 'Gamma2' in tfr_data_day1:
            tfr_data_day1['Gamma'] = np.concatenate([tfr_data_day1['Gamma1'], tfr_data_day1['Gamma2']], axis=1)
        print(f"{name}의 Gamma1과 Gamma2 데이터를 결합했습니다.")

        # Process each band
        for band in ['Delta', 'Theta', 'Alpha', 'Beta', 'Gamma']:
            if band not in tfr_data:
                print(f"Warning: {band} data not found")
                exit()
                
            before_clean, after_clean, day1_clean = process_band_data(tfr_data[band], tfr_data_day1[band], large_artifact)
            print(f"{band} 데이터의 artifact를 제거했습니다.")
            
            #run_tests(before_clean, after_clean, band, wb, row, name)
            #print(f"{band} 데이터의 테스트를 실행했습니다.")
            
            del tfr_data[band]
            print(f"{band} 데이터를 삭제했습니다.")
        return before_clean, after_clean, day1_clean
    ## Save results
    #wb.save(r"C:\Users\esin4\OneDrive\바탕 화면\Github\Mg_infusion_coma\Mg_infusion_data.xlsx")
    #print(f"{name_list}의 데이터 분석결과를 저장했습니다.")
    #print(f"{name_list}의 데이터 분석이 완료되었습니다.")

In [None]:
before_clean, after_clean, day1_clean = main()

In [None]:
day1_clean.shape

In [None]:
for group in [before_clean, after_clean, day1_clean]:
    stat, p = stats.shapiro(group)
    print(f"Shapiro-Wilk Test p-value: {p:.4f}")

In [None]:
stat, p = stats.levene(before_clean, after_clean, day1_clean)
print(f"Levene Test p-value: {p:.4f}")


In [None]:
from scipy import stats

# ANOVA 수행
f_stat, p_value = stats.f_oneway(before_clean, after_clean, day1_clean)

print(f"F-statistic: {f_stat:.4f}")
print(f"P-value: {p_value:.4f}")

# 결과 해석
alpha = 0.05  # 유의수준 설정
if p_value < alpha:
    print("유의한 차이가 있음 (귀무가설 기각)")
else:
    print("그룹 간 평균 차이가 없음 (귀무가설 채택)")


In [None]:
import pandas as pd
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# 데이터 준비 (NumPy 배열을 Pandas DataFrame 형태로 변환)
data = pd.DataFrame({
    'score': np.concatenate([before_clean, after_clean, day1_clean]),
    'group': (['before_clean'] * len(before_clean)) +
             (['after_clean'] * len(after_clean)) +
             (['day1_clean'] * len(day1_clean))
})

# Tukey's HSD 사후검정 수행
tukey_result = pairwise_tukeyhsd(data['score'], data['group'], alpha=0.05)
print(tukey_result)

# Table 2

In [None]:
df = pd.read_excel(r"E:\주성\documents\2025\Mg comma\merged_power_data.xlsx")

# 변화율 계산
df['post_change'] = (df['post'] - df['pre']) / df['pre']*100
df['day1_change'] = (df['day1'] - df['pre']) / df['pre']*100

# relative change 중앙값 및 Q1, Q3
print('total')
print(df[['post_change', 'day1_change']].describe())
for band in df['band'].unique():
    print(band)
    print(df.loc[df['band']==band][['post_change', 'day1_change']].describe().round(2))

# 감소한 수 및 비율
print('total')
post = print((df['post_change'] < 0).sum())
print(post)
print(post/100)
day1 = print((df['day1_change'] < 0).sum())
print(day1)
print(day1/100)

for band in df['band'].unique():
    print(band)
     = print((df['post_change'] < 0).sum())

# Supplementary Table 2

In [None]:
df = pd.read_excel(r"D:\주성\documents\2025\Mg comma\clinical features6.xlsx", sheet_name='Sheet1')
df = df.iloc[:-2, :]

df1 = df[['Mg type', 'mRS_Discharge', 'mRS_3mo_text', 'Inhospital_Mortality', 'SE_Duration', 'Hospital_days']]
df1['mRS_Discharge_4'] = df1['mRS_Discharge'] >=4
df1['mRS_Discharge_5'] = df1['mRS_Discharge'] >=5

for col in ['mRS_Discharge', 'SE_Duration', 'Hospital_days']:
    x = df1.loc[df1['Mg type']==1][col]
    y = df1.loc[df1['Mg type']==2][col]
    s, p = stats.mannwhitneyu(x, y)
    print(col, p, x.median(), x.quantile(0.25), x.quantile(0.75), y.median(), y.quantile(0.25), y.quantile(0.75))

for col in ['mRS_Discharge_4', 'Inhospital_Mortality', 'mRS_Discharge_5']:
    cross = pd.crosstab(df1['Mg type'], df1[col])
    s, p = stats.fisher_exact(cross)
    print(col, cross, p)

In [None]:
import pandas as pd
import numpy as np
from scipy import stats

# 결과를 저장할 빈 리스트 생성
results = []

# 1부터 20까지 각 임곗값에 대해 반복
for time in range(1, 21):
    # 각 임곗값별로 새로운 불린 변수 생성
    col_time = f'time_{time}'
    df2[col_time] = df2['time_until_Mg'] >= time
    
    # 각 임곗값에 대한 결과를 저장할 딕셔너리 생성
    res = {'threshold': time}
    
    # 연속형 변수 (Mann-Whitney U 검정)
    for col in ['mRS_Discharge', 'SE_Duration', 'Hospital_days']:
        group_true = df2.loc[df2[col_time] == True, col]
        group_false = df2.loc[df2[col_time] == False, col]
        
        # 두 그룹간의 차이를 검정
        try:
            s, p_val = stats.mannwhitneyu(group_true, group_false)
        except Exception as e:
            p_val = np.nan
        
        # 결과 저장 (p-value, 그룹별 중앙값 등)
        res[f'p_{col}'] = p_val
        res[f'{col}_median_true'] = group_true.median()
        res[f'{col}_median_false'] = group_false.median()
        # 추가로 사분위수 등 다른 통계치를 저장할 수도 있음
        res[f'{col}_Q1_true'] = group_true.quantile(0.25)
        res[f'{col}_Q3_true'] = group_true.quantile(0.75)
        res[f'{col}_Q1_false'] = group_false.quantile(0.25)
        res[f'{col}_Q3_false'] = group_false.quantile(0.75)
    
    # 범주형 변수 (Fisher의 정확 검정)
    for col in ['mRS_Discharge_4', 'Inhospital_Mortality', 'mRS_Discharge_5']:
        cross = pd.crosstab(df2[col_time], df2[col])
        try:
            # Fisher 검정은 2x2 교차표에 적합하므로, 그렇지 않을 경우 오류가 날 수 있음
            s, p_cat = stats.fisher_exact(cross)
        except Exception as e:
            p_cat = np.nan
        res[f'p_{col}'] = p_cat
        
    # 결과 리스트에 추가
    results.append(res)

# 결과를 DataFrame으로 변환하여 확인
result_df = pd.DataFrame(results)
print(result_df)


In [None]:
result_df.to_csv(r"D:\주성\documents\2025\Mg comma\result_df.csv")

In [None]:
result_df.loc[result_df['threshold']==11].T