In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
from tqdm import tqdm
import pandas as pd
import numpy as np
import random
from scipy.stats import norm
import statsmodels.api as sm
import statsmodels.formula.api as smf

import matplotlib.pyplot as plt
import seaborn as sns

pd.options.display.max_columns = None
survey_result_preprocessed = pd.read_csv('/content/drive/MyDrive/졸업프로젝트/설문결과/survey_result_preprocessed.csv',encoding='utf-8',index_col=0)
df = survey_result_preprocessed.copy()

# 경로 A 선택하면 1, 경로 B 선택하면 0
df['choice']=df['route_choice'].apply(lambda x:1 if x=='경로 A' else 0)

# 경로 A 속성 - 경로 B 속성 차이
df['cost_diff'] = df['A_cost'] - df['B_cost']
df['ivt_diff'] = df['A_ivt'] - df['B_ivt']
df['ovt_fmlm_diff'] = df['A_ovt_fmlm'] - df['B_ovt_fmlm']
df['ovt_transfer_diff'] = df['A_ovt_transfer'] - df['B_ovt_transfer']

# cost 단위가 너무 큼. 100원당 효용으로 scale 변경.
df['cost_diff']=df['cost_diff']/100

# baseline(train), update, reference 분리
seed = 42
np.random.seed(seed)

List = []
for i in range(204):
    List.extend(np.random.permutation(range(1, 9)).tolist())

df['run_random'] = List

# train (run_random = 1)
df_train = df[df.run_random==1].reset_index(drop=True)
# update (run_random = 2~7)
df_update = df[df['run_random'].isin([2,3,4,5,6,7])].reset_index(drop=True)
# test (run_random = 8)
df_test = df[df.run_random==8].reset_index(drop=True)
# reference model (run_random = 1~7)
df_ref = df[df.run_random!=8].reset_index(drop=True)

print(df_train.shape, df_update.shape, df_test.shape, df_ref.shape)

reference_model = smf.logit('choice ~ cost_diff + ivt_diff + ovt_fmlm_diff + ovt_transfer_diff -1', data=df_ref)
reference_result = reference_model.fit()

print(reference_result.summary())

cost_mean_update_result = pd.read_csv('/content/drive/MyDrive/졸업프로젝트/설문결과/최종결과/parameter_updated_cost.csv',index_col=0)
ivt_mean_update_result = pd.read_csv('/content/drive/MyDrive/졸업프로젝트/설문결과/최종결과/parameter_updated_ivt.csv',index_col=0)
ovt_fmlm_mean_update_result = pd.read_csv('/content/drive/MyDrive/졸업프로젝트/설문결과/최종결과/parameter_updated_ovt_fmlm.csv',index_col=0)
ovt_transfer_mean_update_result = pd.read_csv('/content/drive/MyDrive/졸업프로젝트/설문결과/최종결과/parameter_updated_ovt_transfer.csv',index_col=0)

(204, 21) (1224, 21) (204, 21) (1428, 21)
Optimization terminated successfully.
         Current function value: 0.402596
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:                 choice   No. Observations:                 1428
Model:                          Logit   Df Residuals:                     1424
Method:                           MLE   Df Model:                            3
Date:                Mon, 18 Nov 2024   Pseudo R-squ.:                  0.4189
Time:                        07:14:43   Log-Likelihood:                -574.91
converged:                       True   LL-Null:                       -989.36
Covariance Type:            nonrobust   LLR p-value:                2.328e-179
                        coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------------
cost_diff            -0.3377      0.023    -14.740  

In [3]:
kakaomap_result = pd.read_csv('/content/drive/MyDrive/졸업프로젝트/설문결과/kakaomap_route.csv')

In [4]:
kakaomap_result

Unnamed: 0,route,cost,ivt,ovt_fmlm,ovt_transfer
0,1,1500,41,18,1
1,2,1500,55,13,5
2,3,1600,47,10,9
3,4,1650,43,10,9
4,5,2800,49,13,9
5,6,1700,36,10,14
6,7,1700,37,10,13
7,8,2400,33,14,12
8,9,1600,53,13,6
9,10,2800,55,13,10


In [None]:
def utility_diff(model_result, features):
    '''
    Input
    - model_result : 사용할 모델의 결과
    - features : 주어진 상황의 경로 A - 경로 B [cost_diff, ivt_diff, ovt_fmlm_diff, ovt_transfer_diff]
    Ouput
    - df_ref로 추정한 reference model에 따른 utility diff
    '''
    beta_cost, beta_ivt, beta_ovt_fmlm, beta_ovt_transfer = [model_result.params.values[i] for i in range(4)]
    cost_diff, ivt_diff, ovt_fmlm_diff, ovt_transfer_diff = features
    cost_utility = beta_cost*cost_diff
    ivt_utility = beta_ivt * ivt_diff
    ovt_fmlm_utility = beta_ovt_fmlm*ovt_fmlm_diff
    ovt_transfer_utility = beta_ovt_transfer * ovt_transfer_diff
    utility_diff = cost_utility + ivt_utility + ovt_fmlm_utility + ovt_transfer_utility
    return cost_utility, ivt_utility, ovt_fmlm_utility, ovt_transfer_utility, utility_diff

# 평균적인 경로 선호도

In [None]:
# 기준을 경로 1로 두자.

kakaomap_result['cost_diff'] = (kakaomap_result['cost'] - 1500) / 100
kakaomap_result['ivt_diff'] = kakaomap_result['ivt'] - 41
kakaomap_result['ovt_fmlm_diff'] = kakaomap_result['ovt_fmlm'] - 18
kakaomap_result['ovt_transfer_diff'] = kakaomap_result['ovt_transfer'] - 1

kakaomap_result.head(3)


Unnamed: 0,route,cost,ivt,ovt_fmlm,ovt_transfer,cost_diff,ivt_diff,ovt_fmlm_diff,ovt_transfer_diff
0,1,1500,41,18,1,0.0,0,0,0
1,2,1500,55,13,5,0.0,14,-5,4
2,3,1600,47,10,9,1.0,6,-8,8


In [None]:
df = pd.DataFrame(columns = ['route','cost_diff','ivt_diff','ovt_fmlm_diff','ovt_transfer_diff','cost_utility','ivt_utility','ovt_fmlm_utility','ovt_transfer_utility','total_utility'])
for i in range(13):
  features = kakaomap_result.iloc[i,5:9].values.tolist()
  df.loc[i,'route'] = i+1
  df.iloc[i,1:5] = features
  df.iloc[i,5:] = utility_diff(reference_result, features)


In [None]:
df
# 일단 route1이 다른 경로들과 비교 시 best (비교경로 - 경로1 의 효용이 항상 음수)

Unnamed: 0,route,cost_diff,ivt_diff,ovt_fmlm_diff,ovt_transfer_diff,cost_utility,ivt_utility,ovt_fmlm_utility,ovt_transfer_utility,total_utility
0,1,0.0,0.0,0.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0
1,2,0.0,14.0,-5.0,4.0,-0.0,-2.729662,1.865904,-1.326548,-2.190306
2,3,1.0,6.0,-8.0,8.0,-0.337664,-1.169855,2.985447,-2.653096,-1.175168
3,4,1.5,2.0,-8.0,8.0,-0.506496,-0.389952,2.985447,-2.653096,-0.564097
4,5,13.0,8.0,-5.0,8.0,-4.38963,-1.559807,1.865904,-2.653096,-6.736629
5,6,2.0,-5.0,-8.0,13.0,-0.675328,0.974879,2.985447,-4.311281,-1.026283
6,7,2.0,-4.0,-8.0,12.0,-0.675328,0.779903,2.985447,-3.979644,-0.889622
7,8,9.0,-8.0,-4.0,11.0,-3.038975,1.559807,1.492723,-3.648007,-3.634452
8,9,1.0,12.0,-5.0,5.0,-0.337664,-2.33971,1.865904,-1.658185,-2.469655
9,10,13.0,14.0,-5.0,9.0,-4.38963,-2.729662,1.865904,-2.984733,-8.238121


In [None]:
df.sort_values(by='total_utility',ascending=False)
# refernce model에 의하면 평균적으로 효용이
# 경로 1 > 4 > 7 > 6 > 3 > 2 > 9 > 8 > 11 > 5 > 12 > 10 > 13

Unnamed: 0,route,cost_diff,ivt_diff,ovt_fmlm_diff,ovt_transfer_diff,cost_utility,ivt_utility,ovt_fmlm_utility,ovt_transfer_utility,total_utility
0,1,0.0,0.0,0.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0
3,4,1.5,2.0,-8.0,8.0,-0.506496,-0.389952,2.985447,-2.653096,-0.564097
6,7,2.0,-4.0,-8.0,12.0,-0.675328,0.779903,2.985447,-3.979644,-0.889622
5,6,2.0,-5.0,-8.0,13.0,-0.675328,0.974879,2.985447,-4.311281,-1.026283
2,3,1.0,6.0,-8.0,8.0,-0.337664,-1.169855,2.985447,-2.653096,-1.175168
1,2,0.0,14.0,-5.0,4.0,-0.0,-2.729662,1.865904,-1.326548,-2.190306
8,9,1.0,12.0,-5.0,5.0,-0.337664,-2.33971,1.865904,-1.658185,-2.469655
7,8,9.0,-8.0,-4.0,11.0,-3.038975,1.559807,1.492723,-3.648007,-3.634452
10,11,7.0,-15.0,4.0,9.0,-2.363647,2.924638,-1.492723,-2.984733,-3.916465
4,5,13.0,8.0,-5.0,8.0,-4.38963,-1.559807,1.865904,-2.653096,-6.736629


# ID 별로 달라진 경로 선호도

In [None]:
import numpy as np

def utility_diff(features):
    beta_cost, beta_ivt, beta_ovt_fmlm, beta_ovt_transfer = [
        cost_mean_update_result.update_6.values,
        ivt_mean_update_result.update_6.values,
        ovt_fmlm_mean_update_result.update_6.values,
        ovt_transfer_mean_update_result.update_6.values,
    ]

    cost_diff, ivt_diff, ovt_fmlm_diff, ovt_transfer_diff = features

    cost_utility = beta_cost * cost_diff
    ivt_utility = beta_ivt * ivt_diff
    ovt_fmlm_utility = beta_ovt_fmlm * ovt_fmlm_diff
    ovt_transfer_utility = beta_ovt_transfer * ovt_transfer_diff
    utility_diff = cost_utility + ivt_utility + ovt_fmlm_utility + ovt_transfer_utility

    result = np.column_stack((cost_utility, ivt_utility, ovt_fmlm_utility, ovt_transfer_utility, utility_diff))

    return result, utility_diff

In [None]:
result_2, utility_diff_2 = utility_diff(kakaomap_result.iloc[1,5:9].values)

In [None]:
df_id = pd.DataFrame(columns = [f'route{x}' for x in range(1,14)])
for i in range(13):
  _, utility_diff_route = utility_diff(kakaomap_result.iloc[i,5:9].values)
  df_id.iloc[:,i] = utility_diff_route


In [None]:
df_id

Unnamed: 0,route1,route2,route3,route4,route5,route6,route7,route8,route9,route10,route11,route12,route13
0,-0.0,-3.067311,-1.102419,-0.136522,-10.405383,-0.571378,-0.416051,-5.190067,-3.475261,-12.745129,-6.015224,-10.938063,-20.779626
1,-0.0,-6.19322,-3.507611,-1.877549,-12.254312,-1.42962,-1.382425,-4.650091,-6.291835,-15.613052,-3.266545,-14.533734,-14.945129
2,-0.0,-6.313845,-2.87726,-1.083548,-11.899402,0.097497,0.013927,-3.337069,-6.249659,-15.42264,-2.000038,-14.976422,-14.866226
3,-0.0,-6.106831,-3.479456,-1.87627,-12.208585,-1.475104,-1.421246,-4.709888,-6.218867,-15.526992,-3.36086,-14.382608,-14.958934
4,-0.0,-4.560238,-1.939371,-0.594885,-10.941314,-0.292461,-0.24542,-4.262038,-4.742296,-13.812344,-4.083608,-12.734051,-17.867452
...,...,...,...,...,...,...,...,...,...,...,...,...,...
199,-0.0,-4.111537,-2.627271,-1.618008,-11.823584,-2.14683,-1.969581,-6.255817,-4.551703,-14.282277,-5.732803,-11.174878,-16.012028
200,-0.0,-1.931209,-1.236756,-0.728927,-5.394656,-1.269294,-1.122716,-3.101174,-2.165705,-6.64481,-3.28658,-5.890918,-10.096129
201,-0.0,-6.215269,-2.824299,-1.05642,-11.989802,0.053101,-0.019262,-3.497212,-6.177754,-15.488047,-2.244234,-14.918992,-15.290283
202,-0.0,-2.995831,-2.660765,-2.039237,-9.639808,-3.527821,-3.189925,-6.589835,-3.562876,-11.44991,-6.568561,-8.77277,-15.199599


In [None]:
# ID 별로 1순위부터 13순위까지의 경로를 나열
# 차이가 존재함
sorted_indices = df_id.apply(lambda row: row.sort_values(ascending=False).index.tolist(), axis=1)
sorted_indices_df = pd.DataFrame(sorted_indices.tolist(), index=df_id.index)
sorted_indices_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,route1,route4,route7,route6,route3,route2,route9,route8,route11,route5,route12,route10,route13
1,route1,route7,route6,route4,route11,route3,route8,route2,route9,route5,route12,route13,route10
2,route6,route7,route1,route4,route11,route3,route8,route9,route2,route5,route13,route12,route10
3,route1,route7,route6,route4,route11,route3,route8,route2,route9,route5,route12,route13,route10
4,route1,route7,route6,route4,route3,route11,route8,route2,route9,route5,route12,route10,route13
...,...,...,...,...,...,...,...,...,...,...,...,...,...
199,route1,route4,route7,route6,route3,route2,route9,route11,route8,route12,route5,route10,route13
200,route1,route4,route7,route3,route6,route2,route9,route8,route11,route5,route12,route10,route13
201,route6,route1,route7,route4,route11,route3,route8,route9,route2,route5,route12,route13,route10
202,route1,route4,route3,route2,route7,route6,route9,route11,route8,route12,route5,route10,route13


In [None]:
# ID 111
sorted_indices_df.iloc[111,:]
# ID 181
sorted_indices_df.iloc[181,:]
# ID 193
sorted_indices_df.iloc[193,:]

Unnamed: 0,193
0,route1
1,route4
2,route7
3,route3
4,route6
5,route2
6,route9
7,route8
8,route11
9,route5


In [None]:
# 추천 경로 1순위 변화
sorted_indices_df.iloc[:,0].value_counts()

Unnamed: 0_level_0,count
0,Unnamed: 1_level_1
route1,167
route6,17
route11,10
route4,9
route7,1
