In [62]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
import statsmodels.api as sm

# Table 1

In [None]:
magnesium = pd.read_excel(r"D:\주성\documents\2025\Mg comma\clinical features2.xlsx", sheet_name='Table2')
magnesium1 = magnesium.loc[magnesium['Mg Protocol Type']==1]
magnesium2 = magnesium.loc[magnesium['Mg Protocol Type']==2]
print(magnesium1.describe())
print(magnesium2.describe())

from scipy.stats import mannwhitneyu as mann
for col in ['Mg Starting Date', 'Mg Infusion Day', 'Loading Mg', 'Maintenance Mg', 'Total Mg', 'Max concentration']:
    s, p = mann(magnesium1[col], magnesium2[col])
    print(col, s, p)

# Figure 2

In [None]:
df = pd.read_csv(r"E:\주성\documents\2025\Mg comma\data2.csv")

# CSV 파일 읽기
data = df

# 밴드별로 데이터 나누기
bands = ['delta', 'theta', 'alpha', 'beta', 'gamma']

plt.figure(figsize=(15, 10))
for idx, band in enumerate(bands, 1):
    plt.subplot(2, 3, idx)
    
    # 해당 밴드의 데이터만 추출
    band_data = data[data['band'] == band].copy()
    
    # 변화율 계산 ((post - pre) / pre)
    change = (band_data['mean_post_infusion'] - band_data['mean_pre_infusion']) / band_data['mean_pre_infusion']
    
    # 오름차순 정렬
    sorted_change = np.sort(change)
    
    # 바 차트 그리기
    plt.bar(range(len(sorted_change)), sorted_change)
    plt.axhline(y=0, color='black', linestyle='-', linewidth=0.5)
    plt.title(f'{band.capitalize()} Band')
    plt.ylim(-1, 2)  # y축 범위 수정

plt.suptitle('Power Change by Frequency Band', fontsize=14, y=1.02)    
plt.tight_layout()
plt.show()

# Table 3

In [58]:
# 결측값 확인
missing_values = feature.isna().sum()

# 결측값이 있는 컬럼만 출력
missing_columns = missing_values[missing_values > 0]
print("결측값이 있는 컬럼들:")
print(missing_columns)

# 결측값이 있는 행들을 확인
for col in missing_columns.index:
    print(f"\n'{col}' 컬럼에서 결측값이 있는 행:")
    print(feature[feature[col].isna()])

결측값이 있는 컬럼들:
Immunotherapy Medications    8
CIVAD Medications            7
dtype: int64

'Immunotherapy Medications' 컬럼에서 결측값이 있는 행:
    Patient No. Sex  Age  Premorbid mRS Score  STESS SRSE NORSE   Sz Type  \
2             3   F   79                    5      6   No    No      NCSE   
7             8   F   40                    0      0  Yes    No  Focal SE   
9            10   M   57                    2      4  Yes   Yes      NCSE   
10           11   M   77                    2      4   No    No      NCSE   
11           12   F   81                    5      6   No    No      NCSE   
14           15   F   37                    3      2  Yes    No  Focal SE   
16           17   F   88                    4      6   No    No      NCSE   
19           20   M   23                    1      3  Yes    No      NCSE   

                   Etiology  Number of AED Immunotherapy Medications  \
2   Remote vascular disease              3                       NaN   
7               Cryptogenic  

In [108]:
# response 여부 확인
power = pd.read_csv(r"D:\주성\documents\2025\Mg comma\data2.csv")
power = power.loc[power['band'].isin(['delta', 'theta'])]
power['diff'] = (power['mean_pre_infusion'] - power['mean_post_infusion'])>=0

response = power.groupby('name')['diff'].sum()==2

# clinical feature와의 연결
feature = pd.read_excel(r"D:\주성\documents\2025\Mg comma\clinical features2.xlsx", sheet_name = 'Table1')
feature['response'] = response.reset_index(drop=True)

# 파생변수 
feature['Immunotherapy'] = ~feature['Immunotherapy Medications'].isna()
feature['CIVADs'] = ~feature['CIVAD Medications'].isna()
feature['STESS_5'] = feature['STESS'] >= 5
feature['mRS_4'] = feature['Premorbid mRS Score'] >= 4
feature['CIVADs_num'] = feature['CIVAD Medications'].apply(lambda x: sum(1 for c in str(x) if c.isupper()) if pd.notna(x) else 0)
feature['Age_65'] = feature['Age'] >=65

# fisher exact test with 95% CI

def fisher_ci(table, alpha=0.05):
    # Fisher’s Exact Test 수행
    odds_ratio, p_value = stats.fisher_exact(table)
    
    # 신뢰구간 계산을 위한 데이터 추출
    a = table.iloc[0, 0]
    b = table.iloc[0, 1]
    c = table.iloc[1, 0]
    d = table.iloc[1, 1] 
    
    # 로그 오즈비와 표준 오차 계산
    log_or = np.log(odds_ratio)
    se_log_or = np.sqrt(1/a + 1/b + 1/c + 1/d)  # 표준 오차

    # 신뢰구간 계산 (Z 값: 1.96 for 95% 신뢰구간)
    z = stats.norm.ppf(1 - alpha / 2)
    ci_lower = np.exp(log_or - z * se_log_or)
    ci_upper = np.exp(log_or + z * se_log_or)

    return odds_ratio, (ci_lower, ci_upper), p_value 

# Logistic Resgression with Odds ratio and 95% CI
def logistic_ci (df, target, predictors):
    # Logistic regression 수행
    X = df[predictors]
    X = sm.add_constant(X)
    y = df[target]
    model = sm.Logit(y, X)
    result = model.fit()

    # 오즈비 계산
    odds_ratio = np.exp(result.params[1])

    # p값
    p_value = result.pvalues[1]

    # coefficeint
    coef = result.params[1]

    # 95% 신뢰구간 계산
    conf = result.conf_int().iloc[1, :]
    ci = np.exp(conf).values  # 신뢰구간도 exp 변환

    return odds_ratio, ci, p_value, coef

# chi square test
def chisquare (table, alpha=0.05):
    chi, p_value, dof, exp = stats.chi2_contingency(table)
    return p_value


# 검정 시행
def anlayze_clinical_features_with_response(df):
    results = {}

    for col in df.columns:
        if col in ['SRSE', 'CIVADs', 'Sex', 'Immunotherapy', 'NORSE', 'Mg Protocol', 'STESS_5', 'mRS_4', 'Age_65']:
            table = pd.crosstab(feature[col], feature['response'])
            odds_ratio, ci, p_value = fisher_ci(table)
            results[col] = {'Odds Ratio': odds_ratio, 'CI': ci, 'p-value': p_value}

        elif col in ['CIVADs_num', 'Number of AED', 'Age', 'STESS',  'Premorbid mRS Score']:
            odds_ratio, ci, p_value, coef = logistic_ci(feature, 'response', col)
            results[col] = {'Odds Ratio': odds_ratio, 'CI': ci, 'p-value': p_value, 'Coefficient': coef}

        elif col in ['Sz Type', 'Etiology']:
            table = pd.crosstab(feature[col], feature['response'])
            p_value = chisquare(table)
            results[col] = {'p-value': p_value}
        else:
            pass

    result_df = pd.DataFrame(results).T

    return result_df

result_df = anlayze_clinical_features_with_response(feature)

Optimization terminated successfully.
         Current function value: 0.595755
         Iterations 6
Optimization terminated successfully.
         Current function value: 0.601632
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.602469
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.634731
         Iterations 5
Optimization terminated successfully.
         Current function value: 0.584098
         Iterations 6


  odds_ratio = np.exp(result.params[1])
  p_value = result.pvalues[1]
  coef = result.params[1]
  odds_ratio = np.exp(result.params[1])
  p_value = result.pvalues[1]
  coef = result.params[1]
  odds_ratio = np.exp(result.params[1])
  p_value = result.pvalues[1]
  coef = result.params[1]
  odds_ratio = np.exp(result.params[1])
  p_value = result.pvalues[1]
  coef = result.params[1]
  odds_ratio = np.exp(result.params[1])
  p_value = result.pvalues[1]
  coef = result.params[1]


In [111]:
from statsmodels.stats.multitest import multipletests
p_values = result_df['p-value']

rejected, pval_corrected, _, _ = multipletests(p_values, method='fdr_bh')

result_df['p_corrected'] = pval_corrected

In [112]:
result_df

Unnamed: 0,Odds Ratio,CI,p-value,Coefficient,p_corrected
Sex,4.0,"(0.549900494796587, 29.096173128410328)",0.349845,,0.559752
Age,0.964733,"[0.9149839875793558, 1.0171863396963075]",0.1838,-0.035904,0.371167
Premorbid mRS Score,0.694331,"[0.40264599879275387, 1.1973173698501371]",0.189451,-0.364807,0.371167
STESS,0.719911,"[0.43124979838234545, 1.201791046220174]",0.208782,-0.328627,0.371167
SRSE,4.444444,"(0.6159591405394889, 32.06882586798259)",0.173581,,0.371167
NORSE,0.875,"(0.13730874453778677, 5.575937662071502)",1.0,,1.0
Sz Type,,,0.64432,,0.736365
Etiology,,,0.500017,,0.666689
Number of AED,1.092451,"[0.8522763860191405, 1.400306561045909]",0.485138,0.088423,0.666689
Mg Protocol,0.4,"(0.05543632264855401, 2.886194328118434)",0.612616,,0.736365
