In [5]:
import gc
import numpy as np

"""STVM 계산(3차로 점선) + 지상부 추가"""
import pandas as pd

import os


# FIX 값 모음
###################################################################################################################
start_interval = 1800
end_interval = 5400

weights = {
    "w1" : 1,
    "w2" : 1,
    "w3" : 1,
    "w4" : 1,
    "w5" : 1,
    "w6" : 1
}

vehicle_types = [100, 300, 630, 640, 650]
######## 검지기 #############
# 지상부_진입 검지기(결과 데이터는 지상부_진입의 6번째 데이터 부터 들어가므로 해당 검지기를 선정함)
enter_line = 6
# 본선부 검지기
main_line = 60
############################


######## 램프 ###############
# 유입램프
input_ramp = 59

# 유출램프
output_ramp = 61
############################

######## 구간 ###############
# 진입부_지상
entry_point = [i for i in range(6,11)]

# 본선부
middle_point = [i for i in range(41,241)]

# 진출부_ 지상
exit_point = [i for i in range(271,276)]
############################

###################################################################################################################

# 함수 모음
###################################################################################################################

# 평균속도
def speed_mean(df):
    # TimeGroup, New_Measurement별 그룹화 및 속도 평균
    speed_mean_df = (
        df.groupby(["TimeGroup", "New_Measurement"])
          .agg(V_mean=("v[km/h]", "mean"), V_count=("v[km/h]", "count"))
          .reset_index()
    )
    speed_mean_df["V_next"] = speed_mean_df.groupby("TimeGroup")["V_mean"].shift(-1)
    speed_mean_df["delta_V"] = (speed_mean_df["V_next"] - speed_mean_df["V_mean"]) / speed_mean_df["V_mean"]
    speed_mean_df["delta_V"] = speed_mean_df["delta_V"].fillna(0)
    return speed_mean_df

# 밀도
def density_mean(df):
    density_mean_df = df.assign(K = df["V_count"] * 12 / df["V_mean"])
    density_mean_df["K_next"] = density_mean_df.groupby("TimeGroup")["K"].shift(-1)
    density_mean_df["delta_K"] = (density_mean_df["K_next"] - density_mean_df["K"]) / density_mean_df["K"]
    density_mean_df["delta_K"] = density_mean_df["delta_K"].fillna(0)
    return density_mean_df

# 가중 중차량 비율
def heavy_rate(original_df):

    # 진입부 검지기 선정
    measurement = enter_line

    # TimeGroup별 중차량 갯수 집계
    heavy_df = (
        original_df[
            (original_df["New_Measurement"] == measurement) &
            (original_df["Vehicle type"].isin([630,640,650]))
        ]
        .groupby("TimeGroup")
        .size()
        .reset_index(name="heavy_count")
    )

    # TimeGroup별 총 차량 갯수 집계
    total_df = (
        original_df[original_df["New_Measurement"] == measurement]
        .groupby("TimeGroup")
        .size()
        .reset_index(name="total_count")
    )

    heavy_rate_df = pd.merge(heavy_df, total_df, on="TimeGroup", how="left")

    heavy_rate_df["rate"] = heavy_rate_df["heavy_count"] / heavy_rate_df["total_count"]
    return heavy_rate_df


# 진입 포화도
def entry_saturation(original_df):
    # 실측용량 C
    max_capacity = 2200
    entry_saturation_df = (
        # 진입부에서 유입된 교통량이므로 진입부 중 한 개의 검지기를 선정하여 측정
        original_df[original_df["New_Measurement"] == enter_line]
        .groupby(["TimeGroup", "New_Measurement"])
        .size()
        .reset_index(name="entry_volume")  # 차량 수를 entry_volume이라는 컬럼명으로
    )

    # 단위가 대/시 이기 때문에 현재 5분집계 * 12
    entry_saturation_df["Phi_진입"] = entry_saturation_df["entry_volume"] * 12 / max_capacity

    return entry_saturation_df

# 램프 유출입 비율
def rfr_rate(original_df):
    """
    1. 본선 1개의 검지기 data 수집(New_Measurement = 60)
    2. 위의 검지기 앞, 뒤 검지기 data (59 / 61)
    3. RFR 연산
    """
    main_df = (
        original_df[original_df["New_Measurement"] == main_line] # 60
        .groupby(["TimeGroup", "New_Measurement"])
        .size()
        .reset_index(name="main_line")
    )
    input_df = (
        original_df[original_df["New_Measurement"] == input_ramp] # 59
        .groupby(["TimeGroup", "New_Measurement"])
        .size()
        .reset_index(name="input_ramp")
    )
    output_df = (
        original_df[original_df["New_Measurement"] == output_ramp] # 61
        .groupby(["TimeGroup", "New_Measurement"])
        .size()
        .reset_index(name="output_ramp")
    )
    #main_df["RFR"] = (input_df["input_ramp"] + output_df["output_ramp"])/main_df["main_line"]
    # 유출램프 구현이 안 됐기 때문에 0.2값으로 fix
    main_df["RFR"] = 0.2
    return main_df

# 유출 정상성 비율
def output_normality(original_df):
    # 진입부
    entry_df = (
        original_df[original_df["New_Measurement"].isin(entry_point)]
        .groupby(["TimeGroup"])
        .size()
        .reset_index(name="entry_volume")
    )

    # 진출부
    exit_df = (
        original_df[original_df["New_Measurement"].isin(exit_point)]
        .groupby(["TimeGroup"])
        .size()
        .reset_index(name="exit_volume")
    )
    normality_df = pd.concat([entry_df, exit_df["exit_volume"]], axis=1)
    normality_df["F(outrate)"] = normality_df["exit_volume"] / normality_df["entry_volume"]
    normality_df["1-F(outrate)"] = 1 - normality_df["F(outrate)"]

    return normality_df

def calculate_stvm(speed_df, density_df, heavy_df, entry_saturation_df, rfr_df, normality_df):

    # TimeGroup 기준으로  Merge
    merged = (
    speed_df[["TimeGroup", "New_Measurement", "delta_V"]]
    .merge(density_df[["TimeGroup", "New_Measurement", "delta_K"]], on=["TimeGroup", "New_Measurement"])
    .merge(heavy_df[["TimeGroup", "rate"]], on="TimeGroup")
    .merge(entry_saturation_df[["TimeGroup", "Phi_진입"]], on="TimeGroup")
    .merge(rfr_df[["TimeGroup", "RFR"]], on="TimeGroup")
    .merge(normality_df[["TimeGroup", "1-F(outrate)"]], on="TimeGroup")
    )


    merged["STVM"] = (
        weights["w1"] * merged["delta_V"] +
        weights["w2"] * merged["delta_K"] +
        weights["w3"] * merged["rate"] +
        weights["w4"] * merged["Phi_진입"] +
        weights["w5"] * merged["RFR"] +
        weights["w6"] * merged["1-F(outrate)"]
    )

    return merged[["TimeGroup", "New_Measurement", "STVM"]]


def calculate_z_score(stvm_df):
    # 평균
    mean_stvm = stvm_df["STVM"].mean(skipna=True)

    # 표준편차
    std_stvm = stvm_df["STVM"].std(skipna=True)

    # Z-Score 계산
    stvm_df["Z-Score"] = (stvm_df["STVM"] - mean_stvm) / std_stvm
    z_max = stvm_df["Z-Score"].max()
    z_min = stvm_df["Z-Score"].min()

    stvm_df["환산점수"] = stvm_df["Z-Score"].apply(lambda z : z_to_score(z, z_min, z_max))

    stvm_df = pd.pivot(stvm_df, index="TimeGroup", columns= "New_Measurement", values="환산점수")
    return stvm_df

def modify_frame(z_score_df):
    result_df = z_score_df.copy()
    result_df = result_df.drop(columns=list(range(1,6)) + list(range(276,281)))

    columns = result_df.columns.tolist()

    new_columns = [-5, -4, -3, -2, -1]
    # 나머지 (11번 이후) → 1부터 순차 번호
    new_columns += list(range(1, len(columns) - 5 + 1))
    result_df.columns = new_columns
    return result_df


def z_to_score(z, z_min, z_max):
    if 1.645 <= z <= z_max:
        return 50 + ((95 + 5 * ((z - 1.645) / (z_max - 1.645))) * 0.5)
    elif 1.282 <= z < 1.645:
        return 50 + ((90 + 5 * ((z - 1.282) / (1.645 - 1.282))) * 0.5)
    elif 1.038 <= z < 1.282:
        return 50 + ((85 + 5 * ((z - 1.038) / (1.282 - 1.038))) * 0.5)
    elif 0.842 <= z < 1.038:
        return 50 + ((80 + 5 * ((z - 0.842) / (1.038 - 0.842))) * 0.5)
    elif 0.676 <= z < 0.842:
        return 50 + ((75 + 5 * ((z - 0.676) / (0.842 - 0.676))) * 0.5)
    elif 0.526 <= z < 0.676:
        return 50 + ((70 + 5 * ((z - 0.526) / (0.676 - 0.526))) * 0.5)
    elif 0.387 <= z < 0.526:
        return 50 + ((65 + 5 * ((z - 0.387) / (0.526 - 0.387))) * 0.5)
    elif 0.255 <= z < 0.387:
        return 50 + ((60 + 5 * ((z - 0.255) / (0.387 - 0.255))) * 0.5)
    elif -0.255 <= z < 0.255:
        return 50 + ((40 + 5 * ((z + 0.255) / (0.255 + 0.255))) * 0.5)
    elif -0.387 <= z < -0.255:
        return 50 + ((35 + 5 * ((z + 0.387) / (-0.255 + 0.387))) * 0.5)
    elif -0.526 <= z < -0.387:
        return 50 + ((30 + 5 * ((z + 0.526) / (-0.387 + 0.526))) * 0.5)
    elif -0.676 <= z < -0.526:
        return 50 + ((25 + 5 * ((z + 0.676) / (-0.676 + 0.842))) * 0.5)
    elif -0.842 <= z < -0.676:
        return 50 + ((20 + 5 * ((z + 0.842) / (-0.676 + 0.842))) * 0.5)
    elif -1.038 <= z < -0.842:
        return 50 + ((15 + 5 * ((z + 1.038) / (-0.842 + 1.038))) * 0.5)
    elif -1.282 <= z < -1.038:
        return 50 + ((10 + 5 * ((z + 1.282) / (-1.038 + 1.282))) * 0.5)
    elif -1.645 <= z < -1.282:
        return 50 + ((5 + 5 * ((z + 1.645) / (-1.282 + 1.645))) * 0.5)
    elif z_min <= z < -1.645:
        return 50 + ((0 + 5 * ((z - z_min) / (-1.645 + abs(z_min)))) * 0.5)
    else:
        return np.nan

def merged_varible(speed_df, density_df, heavy_df, entry_saturation_df, rfr_df, normality_df):
    display("speed_df : ", speed_df)
    display("density_df : ", density_df)
    display("heavy_df : ", heavy_df)
    display("entry_saturation_df : ", entry_saturation_df)
    display("rfr_df : ", rfr_df)
    display("normality_df : ", normality_df)
    return False

###################################################################################################################

folder_path = r"C:\Users\(주)내일이엔시 도로교통안전연구소\Desktop\초장대 K-지하고속도로 인프라 안전 및 효율 향상 기술 개발(2차년도)\자료모음\시뮬레이션 설명 자료\소장님 컴펌 자료\mer파일\600"
mer_list = [file for file in os.listdir(folder_path) if file.endswith(".mer")]

grouped_df = pd.DataFrame()
result_df = pd.DataFrame()

for i in range(len(mer_list)):
    mer_file = mer_list[i]
    with open(os.path.join(folder_path, mer_file), "r", encoding="utf-8", errors="ignore") as file:
            lines = file.readlines()
            # 데이터가 시작하는 인덱스 찾기
            data_start_idx = None

            for j, line in enumerate(lines):
                if "Measurem." in line:  # 컬럼명이 포함된 행 찾기
                    data_start_idx = j
                    break

            # 데이터프레임 생성
            if data_start_idx is not None:

                # 컬럼명 추출 및 공백 제거
                columns = [col.strip() for col in lines[data_start_idx].strip().split(";")]

                # 데이터 부분 추출 및 가공
                data_lines = lines[data_start_idx + 1:]  # 컬럼명 제외, 데이터 부분
                data = [line.strip().split(";") for line in data_lines if line.strip()]

                # 데이터프레임 생성
                df = pd.DataFrame(data, columns=columns)

                # 컬럼 내부 데이터 정수형 변환
                df = df.apply(pd.to_numeric, errors="coerce")

                original_df = df[(df["t(Entry)"] != -1.00)].reset_index(drop=True)


                #불필요 컬럼 제거
                original_df.drop(columns=["b[m/s2]", "tQueue", "Occ", "Pers"], inplace=True, errors="ignore")

                original_df["New_Measurement"] = original_df["Measurem."] % 1000

                bins = np.arange(start_interval, end_interval+1, 300)
                labels = [f"{start}~{start+300}" for start in bins[:-1]]  # 구간 라벨링

                # 구간 나누기 및 컬럼 추가
                original_df["TimeGroup"] = pd.cut(original_df["t(Entry)"], bins=bins, labels=labels, right=False)

                # 평균속도
                speed_df = speed_mean(original_df)

                # 밀도
                density_df = density_mean(speed_df)

                # 가중 중차량 비율
                heavy_df = heavy_rate(original_df)

                # 진입 포화도
                entry_saturation_df = entry_saturation(original_df)

                # 램프 유출입 비율
                rfr_df = rfr_rate(original_df)

                # 진출 정상성
                normality_df = output_normality(original_df)

                # STVM 계산
                stvm_df = calculate_stvm(speed_df, density_df, heavy_df, entry_saturation_df, rfr_df, normality_df)

                # Z-Score 계산
                z_score_df = calculate_z_score(stvm_df)

                # 결과 정리
                result_df = modify_frame(z_score_df)

                # 변수별 데이터 모음(STVM 결과랑 상관 X)
                varible_df = merged_varible(speed_df, density_df, heavy_df, entry_saturation_df, rfr_df, normality_df)

                excel_folder_path = os.path.join(folder_path, "STVM")
                os.makedirs(excel_folder_path, exist_ok=True)
                excel_file_name = f"STVM_{i+1}.xlsx"
                excel_file_path = os.path.join(excel_folder_path, excel_file_name)
                #result_df.to_excel(excel_file_path, index=True)
                print(f"{excel_file_name} 생성 완료")

                # 메모리 정리
                del df, original_df, speed_df, density_df, heavy_df, entry_saturation_df, rfr_df, normality_df, stvm_df, z_score_df
                gc.collect()

'speed_df : '

Unnamed: 0,TimeGroup,New_Measurement,V_mean,V_count,V_next,delta_V
0,1800~2100,1,107.321854,151,107.324000,0.000020
1,1800~2100,2,107.324000,150,107.338356,0.000134
2,1800~2100,3,107.338356,146,107.255479,-0.000772
3,1800~2100,4,107.255479,146,107.185517,-0.000652
4,1800~2100,5,107.185517,145,107.106803,-0.000734
...,...,...,...,...,...,...
3355,5100~5400,276,106.664179,134,106.717910,0.000504
3356,5100~5400,277,106.717910,134,106.245522,-0.004427
3357,5100~5400,278,106.245522,134,105.785401,-0.004331
3358,5100~5400,279,105.785401,137,106.466667,0.006440


'density_df : '

Unnamed: 0,TimeGroup,New_Measurement,V_mean,V_count,V_next,delta_V,K,K_next,delta_K
0,1800~2100,1,107.321854,151,107.324000,0.000020,16.883793,16.771645,-0.006642
1,1800~2100,2,107.324000,150,107.338356,0.000134,16.771645,16.322218,-0.026797
2,1800~2100,3,107.338356,146,107.255479,-0.000772,16.322218,16.334830,0.000773
3,1800~2100,4,107.255479,146,107.185517,-0.000652,16.334830,16.233536,-0.006201
4,1800~2100,5,107.185517,145,107.106803,-0.000734,16.233536,16.469542,0.014538
...,...,...,...,...,...,...,...,...,...
3355,5100~5400,276,106.664179,134,106.717910,0.000504,15.075352,15.067761,-0.000503
3356,5100~5400,277,106.717910,134,106.245522,-0.004427,15.067761,15.134755,0.004446
3357,5100~5400,278,106.245522,134,105.785401,-0.004331,15.134755,15.540897,0.026835
3358,5100~5400,279,105.785401,137,106.466667,0.006440,15.540897,15.892298,0.022611


'heavy_df : '

Unnamed: 0,TimeGroup,heavy_count,total_count,rate
0,1800~2100,7,147,0.047619
1,2100~2400,9,135,0.066667
2,2400~2700,5,150,0.033333
3,2700~3000,5,150,0.033333
4,3000~3300,13,178,0.073034
5,3300~3600,7,131,0.053435
6,3600~3900,12,163,0.07362
7,3900~4200,5,157,0.031847
8,4200~4500,6,138,0.043478
9,4500~4800,12,164,0.073171


'entry_saturation_df : '

Unnamed: 0,TimeGroup,New_Measurement,entry_volume,Phi_진입
0,1800~2100,6,147,0.801818
1,2100~2400,6,135,0.736364
2,2400~2700,6,150,0.818182
3,2700~3000,6,150,0.818182
4,3000~3300,6,178,0.970909
5,3300~3600,6,131,0.714545
6,3600~3900,6,163,0.889091
7,3900~4200,6,157,0.856364
8,4200~4500,6,138,0.752727
9,4500~4800,6,164,0.894545


'rfr_df : '

Unnamed: 0,TimeGroup,New_Measurement,main_line,RFR
0,1800~2100,60,147,0.2
1,2100~2400,60,141,0.2
2,2400~2700,60,125,0.2
3,2700~3000,60,158,0.2
4,3000~3300,60,161,0.2
5,3300~3600,60,162,0.2
6,3600~3900,60,144,0.2
7,3900~4200,60,160,0.2
8,4200~4500,60,153,0.2
9,4500~4800,60,142,0.2


'normality_df : '

Unnamed: 0,TimeGroup,entry_volume,exit_volume,F(outrate),1-F(outrate)
0,1800~2100,743,749,1.008075,-0.008075
1,2100~2400,671,786,1.171386,-0.171386
2,2400~2700,759,664,0.874835,0.125165
3,2700~3000,744,758,1.018817,-0.018817
4,3000~3300,878,632,0.719818,0.280182
5,3300~3600,671,715,1.065574,-0.065574
6,3600~3900,807,807,1.0,0.0
7,3900~4200,779,821,1.053915,-0.053915
8,4200~4500,683,764,1.118594,-0.118594
9,4500~4800,831,626,0.753309,0.246691


STVM_1.xlsx 생성 완료


'speed_df : '

Unnamed: 0,TimeGroup,New_Measurement,V_mean,V_count,V_next,delta_V
0,1800~2100,1,104.584247,146,104.656164,0.000688
1,1800~2100,2,104.656164,146,104.863699,0.001983
2,1800~2100,3,104.863699,146,104.777083,-0.000826
3,1800~2100,4,104.777083,144,105.015385,0.002274
4,1800~2100,5,105.015385,143,105.017361,0.000019
...,...,...,...,...,...,...
3355,5100~5400,276,98.652941,170,99.552941,0.009123
3356,5100~5400,277,99.552941,170,100.306509,0.007570
3357,5100~5400,278,100.306509,169,100.459172,0.001522
3358,5100~5400,279,100.459172,169,101.746061,0.012810


'density_df : '

Unnamed: 0,TimeGroup,New_Measurement,V_mean,V_count,V_next,delta_V,K,K_next,delta_K
0,1800~2100,1,104.584247,146,104.656164,0.000688,16.752045,16.740533,-0.000687
1,1800~2100,2,104.656164,146,104.863699,0.001983,16.740533,16.707402,-0.001979
2,1800~2100,3,104.863699,146,104.777083,-0.000826,16.707402,16.492156,-0.012883
3,1800~2100,4,104.777083,144,105.015385,0.002274,16.492156,16.340463,-0.009198
4,1800~2100,5,105.015385,143,105.017361,0.000019,16.340463,16.454422,0.006974
...,...,...,...,...,...,...,...,...,...
3355,5100~5400,276,98.652941,170,99.552941,0.009123,20.678552,20.491610,-0.009040
3356,5100~5400,277,99.552941,170,100.306509,0.007570,20.491610,20.218030,-0.013351
3357,5100~5400,278,100.306509,169,100.459172,0.001522,20.218030,20.187306,-0.001520
3358,5100~5400,279,100.459172,169,101.746061,0.012810,20.187306,19.460213,-0.036017


'heavy_df : '

Unnamed: 0,TimeGroup,heavy_count,total_count,rate
0,1800~2100,40,144,0.277778
1,2100~2400,44,129,0.341085
2,2400~2700,36,137,0.262774
3,2700~3000,44,146,0.30137
4,3000~3300,52,153,0.339869
5,3300~3600,48,151,0.317881
6,3600~3900,46,145,0.317241
7,3900~4200,46,153,0.300654
8,4200~4500,36,145,0.248276
9,4500~4800,45,145,0.310345


'entry_saturation_df : '

Unnamed: 0,TimeGroup,New_Measurement,entry_volume,Phi_진입
0,1800~2100,6,144,0.785455
1,2100~2400,6,129,0.703636
2,2400~2700,6,137,0.747273
3,2700~3000,6,146,0.796364
4,3000~3300,6,153,0.834545
5,3300~3600,6,151,0.823636
6,3600~3900,6,145,0.790909
7,3900~4200,6,153,0.834545
8,4200~4500,6,145,0.790909
9,4500~4800,6,145,0.790909


'rfr_df : '

Unnamed: 0,TimeGroup,New_Measurement,main_line,RFR
0,1800~2100,60,125,0.2
1,2100~2400,60,152,0.2
2,2400~2700,60,121,0.2
3,2700~3000,60,152,0.2
4,3000~3300,60,142,0.2
5,3300~3600,60,166,0.2
6,3600~3900,60,137,0.2
7,3900~4200,60,146,0.2
8,4200~4500,60,154,0.2
9,4500~4800,60,136,0.2


'normality_df : '

Unnamed: 0,TimeGroup,entry_volume,exit_volume,F(outrate),1-F(outrate)
0,1800~2100,713,643,0.901823,0.098177
1,2100~2400,655,745,1.137405,-0.137405
2,2400~2700,679,627,0.923417,0.076583
3,2700~3000,726,732,1.008264,-0.008264
4,3000~3300,780,690,0.884615,0.115385
5,3300~3600,748,703,0.93984,0.06016
6,3600~3900,728,708,0.972527,0.027473
7,3900~4200,767,699,0.911343,0.088657
8,4200~4500,719,853,1.18637,-0.18637
9,4500~4800,713,719,1.008415,-0.008415


STVM_2.xlsx 생성 완료
