In [1]:
import pandas as pd
import numpy as np

from google.colab import files
uploaded = files.upload()

Saving 202501.xlsx to 202501.xlsx


In [2]:
excel_path = '202501.xlsx'
df = pd.read_excel(excel_path)

In [13]:
# 금속 컬럼 추출
column_pattern = ['(ng/m3)', '(ug/m3)']
columns_to_analyze = [c for c in df.columns if any(p in c for p in column_pattern)]

In [14]:
# 최빈값 계산, 여러 개면 가장 작은 값 반환
def safe_mode(series):
    s = series.dropna() # Nan 값 제거
    if s.empty:
        return np.nan
    modes = s.mode() # 최빈값 구하기
    return modes.min() if not modes.empty else np.nan

In [15]:
# 백분위 계산
def nanpercentile(series, q):
    arr = series.dropna().to_numpy() # Nan 값 제거
    if arr.size == 0:
        return np.nan
    return float(np.percentile(arr, q, method='linear'))

In [16]:
# 통계량 계산
def compute_stats(s):
    x = s.dropna() # Nan값 제거
    n = len(x) # 유효 데이터 개수
    if n == 0: # 값이 없으면
        return dict.fromkeys(['Mean','Std. Error of Mean','Mode','Median',
                              'Std. Deviation','Range','Minimum','Maximum',
                              'Sum','P5','P25','P50','P75','P95','P100'], np.nan)

    mean = x.mean() # 평균
    std = x.std(ddof=1) # 표본 표준편차
    sem = std / np.sqrt(n) # 평균의 표준오차
    xmin, xmax = x.min(), x.max() # 최소값, 최대값

    return {
        'Mean': mean,
        'Std. Error of Mean': sem,
        'Mode': safe_mode(x), # 최빈값
        'Median': x.median(), # 중앙값
        'Std. Deviation': std, # 표준편차
        'Range': xmax - xmin, # 범위
        'Minimum': xmin,
        'Maximum': xmax,
        'Sum': x.sum(),
        'P5': nanpercentile(x, 5), # 5% 백분위수
        'P25': nanpercentile(x, 25), # 25% 백분위수
        'P50': nanpercentile(x, 50), # 50% 백분위수
        'P75': nanpercentile(x, 75), # 75% 백분위수
        'P95': nanpercentile(x, 95), # 95% 백분위수
        'P100': nanpercentile(x, 100) # 100% 백분위수
    }

In [17]:
# 모든 대상 컬럼에 대해 통계 계산
stats_dict = {col: compute_stats(df[col]) for col in columns_to_analyze}

# 표 형태로 변환
stats_table = pd.DataFrame(stats_dict)

# 소수점 자리수 조정
stats_table_rounded = stats_table.round(3)

# 표 출력
from IPython.display import display
display(stats_table_rounded)

Unnamed: 0,Conc(ug/m3),Al(ng/m3),Si(ng/m3),S(ng/m3),K(ng/m3),Ca(ng/m3),Ti(ng/m3),V(ng/m3),Cr(ng/m3),Mn(ng/m3),...,Cd(ng/m3),In(ng/m3),Sn(ng/m3),Sb(ng/m3),Te(ng/m3),Cs(ng/m3),Ba(ng/m3),Ce(ng/m3),Pb(ng/m3),Bi(ng/m3)
Mean,16.865,535.975,1165.064,4073.718,311.863,143.559,0.816,3.027,2.339,5.757,...,215.833,26.379,5.93,1.957,0.974,1.065,2.222,3.992,9.519,2.899
Std. Error of Mean,0.484,9.582,40.235,71.223,6.22,2.899,0.144,0.078,0.093,0.186,...,2.284,0.843,0.601,0.952,0.86,0.241,0.315,0.327,0.467,0.124
Mode,13.72,166.26,903.08,4337.73,206.35,96.0,0.88,3.14,3.29,0.23,...,135.75,-5.22,-4.55,-29.11,-32.66,-2.3,0.0,-1.13,1.39,3.78
Median,13.72,510.645,943.235,3633.105,285.53,132.65,0.46,2.85,2.545,6.57,...,211.845,24.61,5.08,1.41,0.285,-0.6,0.0,1.68,6.29,2.935
Std. Deviation,11.012,215.108,903.284,1598.947,139.649,65.078,3.225,1.756,2.088,4.171,...,51.286,18.916,13.495,21.364,19.305,5.406,7.065,7.342,10.473,2.782
Range,96.19,1756.45,8911.15,9116.3,943.61,637.76,31.48,11.0,10.61,19.02,...,267.44,129.32,78.24,123.47,119.32,36.64,63.89,53.92,59.75,15.76
Minimum,-48.57,166.26,594.85,1094.29,122.71,78.88,-3.52,-0.11,-1.94,-2.32,...,111.09,-20.74,-29.0,-56.85,-55.13,-4.51,0.0,-2.39,-2.86,-3.53
Maximum,47.62,1922.71,9506.0,10210.59,1066.32,716.64,27.96,10.89,8.67,16.7,...,378.53,108.58,49.24,66.62,64.19,32.13,63.89,51.53,56.89,12.23
Sum,8719.46,270131.32,587192.15,2053153.73,157178.79,72353.61,411.47,1525.81,1178.99,2901.29,...,108779.87,13294.87,2988.76,986.31,490.71,537.0,1119.69,2011.93,4797.49,1460.93
P5,4.992,275.793,656.102,2212.358,146.0,94.458,-2.906,0.479,-0.838,-0.887,...,144.451,0.127,-15.216,-32.04,-29.664,-3.177,0.0,-1.58,-0.926,-1.558
