In [1]:
import os
import pandas as pd
import numpy as np
from sqlalchemy import create_engine

In [2]:
# MySQL 접속 설정
username = "root"      # MySQL 사용자명
password = "1234"      # MySQL 비밀번호
host = "127.0.0.1"     # 로컬호스트
port = 3306            # 포트
database = "ott_db"  # DB 이름

# MySQL 엔진 생성
engine = create_engine(f"mysql+pymysql://{username}:{password}@{host}:{port}/{database}")

In [3]:
# 데이터 파일 경로 설정
folder_path = f"../../방송통신위원회_해외 OTT 이용행태조사 원시데이터_20231231"
file_2022 = os.path.join(folder_path, "2022해외OTT이용행태조사_국가통합(20240403).csv")
file_2023 = os.path.join(folder_path, "2023해외OTT이용행태조사_국가통합(20240403).csv")

# CSV 읽기
df_2022 = pd.read_csv(file_2022, encoding='utf-8-sig', low_memory=False)
df_2023 = pd.read_csv(file_2023, encoding='utf-8-sig', low_memory=False)

print("2022 데이터:", df_2022.shape)
print("2023 데이터:", df_2023.shape)

2022 데이터: (4536, 1903)
2023 데이터: (6326, 929)


### (1) user_id

In [4]:
# 1. 2022 user_id 생성
df_2022['user_id'] = ('2022' + df_2022['RESPID'].astype(str)).astype(int)
# 2. 2023 user_id 생성
df_2023['user_id'] = ('2023' + df_2023['id'].astype(str)).astype(int)

df_user_id_2022 = df_2022[['user_id']].copy()
df_user_id_2023 = df_2023[['user_id']].copy()

# 3. 합치기
df_user_id = pd.concat([df_user_id_2022, df_user_id_2023], ignore_index=True)

# 4. ott 테이블에 user_id 업로드
df_user_id.to_sql(name="ott", con=engine, index=False, if_exists="replace")

10862

In [5]:
def add_column_to_ott(df_ott, df_2022, df_2023, col_2022, col_2023, new_col_name,
                       mapping_2022=None, mapping_2023=None):
    """
    ott 테이블에 새로운 컬럼을 추가하는 함수
    :param df_user: 기존 user 테이블 DataFrame
    :param df_2022: 2022 데이터 DataFrame
    :param df_2023: 2023 데이터 DataFrame
    :param col_2022: 2022 데이터에서 가져올 컬럼명
    :param col_2023: 2023 데이터에서 가져올 컬럼명
    :param new_col_name: ott 테이블에 추가할 새 컬럼명
    :param mapping_2022: 2022 매핑 딕셔너리 (없으면 None)
    :param mapping_2023: 2023 매핑 딕셔너리 (없으면 None)
    :return: 새로운 컬럼이 추가된 df_
    """

    # 2022 데이터 처리
    df_2022_col = df_2022[['user_id', col_2022]].copy()
    df_2022_col = df_2022_col.rename(columns={col_2022: new_col_name})
    if mapping_2022:
        df_2022_col[new_col_name] = df_2022_col[new_col_name].map(mapping_2022)

    # 2023 데이터 처리
    df_2023_col = df_2023[['user_id', col_2023]].copy()
    df_2023_col = df_2023_col.rename(columns={col_2023: new_col_name})
    if mapping_2023:
        df_2023_col[new_col_name] = df_2023_col[new_col_name].map(mapping_2023)

    # 합치기
    df_new_col = pd.concat([df_2022_col, df_2023_col], ignore_index=True)

    # user 테이블 병합
    df_ott = df_ott.merge(df_new_col, on='user_id', how='left')

    return df_ott

In [6]:
# 현재 user 테이블 불러오기
df_ott = pd.read_sql("SELECT * FROM ott", con=engine)

In [None]:
### (2) usage_ott
usage_ott_map = {1: 1, 2: 0}
df_ott = add_column_to_ott(
    df_ott, df_2022, df_2023,
    col_2022='SQ5', col_2023='SQ5',
    new_col_name='usage_ott',
    mapping_2022=usage_ott_map,
    mapping_2023=usage_ott_map 
    )

### 복수형

In [None]:
def add_multi_column(df_ott, df_2022, df_2023, new_col_name, cols_2022, cols_2023, target_value_2022, target_value_2023, convert_value=1):
    """
    ott 테이블에 복수 응답 컬럼 추가하는 함수
    """
    # 2022 데이터 처리
    df_2022_part = df_2022[['user_id'] + cols_2022].copy()
    df_2022_part[cols_2022] = df_2022_part[cols_2022].replace(' ', 0, regex=False).fillna(0).astype(int)
    df_2022_part[new_col_name] = df_2022_part[cols_2022].apply(
        lambda row: 1 if row.isin(target_value_2022).any() else 0, axis=1
    )

    df_2022_part = df_2022_part[['user_id', new_col_name]]

    # 2023 데이터 처리
    df_2023_part = df_2023[['user_id'] + cols_2023].copy()
    df_2023_part[cols_2023] = df_2023_part[cols_2023].replace(' ', 0, regex=False).fillna(0).astype(int)
    df_2023_part[new_col_name] = df_2023_part[cols_2023].apply(
        lambda row: 1 if row.isin(target_value_2023).any() else 0, axis=1
    )

    df_2023_part = df_2023_part[['user_id', new_col_name]]

    # 합치기
    df_merge = pd.concat([df_2022_part, df_2023_part], ignore_index=True)

    # user 테이블 병합
    df_ott = df_ott.merge(df_merge, on='user_id', how='left')
    
    return df_ott

In [None]:
### (3) plan_monthly
df_ott = add_multi_column(
    df_ott,
    df_2022, df_2023,
    new_col_name='plan_monthly',
    cols_2022=['SQ6_1'],
    cols_2023=['SQ6', 'SQ6_m2', 'SQ6_m3', 'SQ6_m4', 'SQ6_m5'],
    target_value_2022=[1],
    target_value_2023=[1,2],
    convert_value=1
)

In [None]:
### (4) plan_freewithad
df_ott = add_multi_column(
    df_ott,
    df_2022, df_2023,
    new_col_name='plan_freewithad',
    cols_2022=['SQ6_2'],
    cols_2023=['SQ6', 'SQ6_m2', 'SQ6_m3', 'SQ6_m4', 'SQ6_m5'],
    target_value_2022=[2],
    target_value_2023=[3],
    convert_value=1
)

In [None]:
### (5) plan_single
df_ott = add_multi_column(
    df_ott,
    df_2022, df_2023,
    new_col_name='plan_single',
    cols_2022=['SQ6_3'],
    cols_2023=['SQ6', 'SQ6_m2', 'SQ6_m3', 'SQ6_m4', 'SQ6_m5'],
    target_value_2022=[3],
    target_value_2023=[4],
    convert_value=1
)

In [None]:
### (6) plan_tvbundle
df_ott = add_multi_column(
    df_ott,
    df_2022, df_2023,
    new_col_name='plan_tvbundle',
    cols_2022=['SQ6_4'],
    cols_2023=['SQ6', 'SQ6_m2', 'SQ6_m3', 'SQ6_m4', 'SQ6_m5'],
    target_value_2022=[4],
    target_value_2023=[5],
    convert_value=1
)

In [13]:
### (7) usage_netflix
df_ott = add_multi_column(
    df_ott,
    df_2022, df_2023,
    new_col_name='usage_netflix',
    cols_2022=['B1_1'],
    cols_2023=['A1'] + [f'A1_m{i}' for i in range(2, 62)],
    target_value_2022=[1],
    target_value_2023=[1],
    convert_value=1
)

  df_2023_part[cols_2023] = df_2023_part[cols_2023].replace(' ', 0).fillna(0).astype(int)


In [14]:
### (8) usage_amazon
df_ott = add_multi_column(
    df_ott,
    df_2022, df_2023,
    new_col_name='usage_amazon',
    cols_2022=['B1_2'],
    cols_2023=['A1'] + [f'A1_m{i}' for i in range(2, 62)],
    target_value_2022=[2],
    target_value_2023=[2],
    convert_value=1
)

  df_2023_part[cols_2023] = df_2023_part[cols_2023].replace(' ', 0).fillna(0).astype(int)


In [15]:
### (9) usage_disney
df_ott = add_multi_column(
    df_ott,
    df_2022, df_2023,
    new_col_name='usage_disney',
    cols_2022=['B1_3'],
    cols_2023=['A1'] + [f'A1_m{i}' for i in range(2, 62)],
    target_value_2022=[3],
    target_value_2023=[3],
    convert_value=1
)

  df_2023_part[cols_2023] = df_2023_part[cols_2023].replace(' ', 0).fillna(0).astype(int)


In [16]:
### (10) frequency
frequency_map = {1: '매일', 2: '1주일에 5~6일', 3:'1주일에 3~4일', 4:'1주일에 1~2일', 5:'한 달에 1일', 6:'2~3달에 1일 이하'}
df_ott = add_column_to_ott(
    df_ott, df_2022, df_2023,
    col_2022='B6', col_2023='A4',
    new_col_name='frequency',
    mapping_2022=frequency_map,
    mapping_2023=frequency_map 
    )

In [17]:
### (11) place_weekdays
place_weekdays_map = {1: '집', 2: '이동 중인 교통수단', 3:'기타 실내장소', 4:'길거리 등 실외'}
df_ott = add_column_to_ott(
    df_ott, df_2022, df_2023,
    col_2022='B7_1', col_2023='A5_1',
    new_col_name='place_weekdays',
    mapping_2022=place_weekdays_map,
    mapping_2023=place_weekdays_map 
    )

In [18]:
### (12) place_weekend
place_weekend_map = {1: '집', 2: '이동 중인 교통수단', 3:'기타 실내장소', 4:'길거리 등 실외'}
df_ott = add_column_to_ott(
    df_ott, df_2022, df_2023,
    col_2022='B7_2', col_2023='A5_2',
    new_col_name='place_weekend',
    mapping_2022=place_weekend_map,
    mapping_2023=place_weekend_map 
    )

In [19]:
### (13) hours_weekdays
hours_weekdays_map = {1: '30분 미만', 2: '30분~1시간', 3:'1~2시간', 4:'2~3시간', 5:'3~4시간', 6:'4~5시간', 7:'5~6시간', 8:'6시간 이상'}
df_ott = add_column_to_ott(
    df_ott, df_2022, df_2023,
    col_2022='B8_1', col_2023='A6_1',
    new_col_name='hours_weekdays',
    mapping_2022=hours_weekdays_map,
    mapping_2023=hours_weekdays_map 
    )

In [20]:
### (14) hours_weekend
hours_weekend_map = {1: '30분 미만', 2: '30분~1시간', 3:'1~2시간', 4:'2~3시간', 5:'3~4시간', 6:'4~5시간', 7:'5~6시간', 8:'6시간 이상'}
df_ott = add_column_to_ott(
    df_ott, df_2022, df_2023,
    col_2022='B8_2', col_2023='A6_2',
    new_col_name='hours_weekend',
    mapping_2022=hours_weekend_map,
    mapping_2023=hours_weekend_map 
    )

In [21]:
### (15) viewing_style
viewing_style_map = {1: '이동시간 등 시간날 때 틈틈이 시청', 2: '원하는 콘텐츠를 한 편씩만 시청', 3:'콘텐츠 여러 개를 몰아보기', 4:'기타'}
df_ott = add_column_to_ott(
    df_ott, df_2022, df_2023,
    col_2022='B10', col_2023='A8',
    new_col_name='viewing_style',
    mapping_2022=viewing_style_map,
    mapping_2023=viewing_style_map 
    )

In [22]:
### (16) viewing_option
viewing_option_map = {1: '스트리밍 방식으로 시청', 2: '콘텐츠를 다운로드 받아 시청', 3:'기타'}
df_ott = add_column_to_ott(
    df_ott, df_2022, df_2023,
    col_2022='B11', col_2023='A9',
    new_col_name='viewing_option',
    mapping_2022=viewing_option_map,
    mapping_2023=viewing_option_map 
    )

In [23]:
### (17) viewing_members
viewing_members_map = {1: '혼자서 시청', 2: '가족과 함께 시청', 3:'친구/지인', 4:'기타'}
df_ott = add_column_to_ott(
    df_ott, df_2022, df_2023,
    col_2022='B12', col_2023='A10',
    new_col_name='viewing_members',
    mapping_2022=viewing_members_map,
    mapping_2023=viewing_members_map 
    )

In [24]:
### (18) child_ott
df_ott = add_multi_column(
    df_ott,
    df_2022, df_2023,
    new_col_name='child_ott',
    cols_2022=['B13'],
    cols_2023=['A11'],
    target_value_2022=[1],
    target_value_2023=[1],
    convert_value=1
)

In [25]:
### (18) child_netflix
df_ott = add_multi_column(
    df_ott,
    df_2022, df_2023,
    new_col_name='child_netflix',
    cols_2022=['B13_1_1'],
    cols_2023=['A11_1'] + [f'A11_1_m{i}' for i in range(2, 61)],
    target_value_2022=[1],
    target_value_2023=[1],
    convert_value=1
)

  df_2023_part[cols_2023] = df_2023_part[cols_2023].replace(' ', 0).fillna(0).astype(int)


In [26]:
### (19) child_amazon
df_ott = add_multi_column(
    df_ott,
    df_2022, df_2023,
    new_col_name='child_amazon',
    cols_2022=['B13_1_2'],
    cols_2023=['A11_1'] + [f'A11_1_m{i}' for i in range(2, 61)],
    target_value_2022=[2],
    target_value_2023=[2],
    convert_value=1
)

  df_2023_part[cols_2023] = df_2023_part[cols_2023].replace(' ', 0).fillna(0).astype(int)


In [27]:
### (20) child_disney
df_ott = add_multi_column(
    df_ott,
    df_2022, df_2023,
    new_col_name='child_disney',
    cols_2022=['B13_1_3'],
    cols_2023=['A11_1'] + [f'A11_1_m{i}' for i in range(2, 61)],
    target_value_2022=[3],
    target_value_2023=[3],
    convert_value=1
)

  df_2023_part[cols_2023] = df_2023_part[cols_2023].replace(' ', 0).fillna(0).astype(int)


In [28]:
cancel_reason_map = {
    1: '영상 광고 삽입', 
    2: '제공하는 콘텐츠가 다양하지 않아서', 
    3:'이용요금이 적절하지 않아서', 
    4:'나에게 적합한 서비스를 추천해주지 않아서', 
    5:'시청 가능한 기기가 제한되어서', 
    6:'몰아보기로 콘텐츠를 이용하기 불편해서', 
    7:'제공되는 영상의 화질/음질이 좋지 않아서', 
    8:'영상을 안정적으로 시청할 수 없어서',
    9:'영상의 자막/더빙의 질이 좋지 않아서',
    10: '유료 OTT 서비스를 해지한 적 없다'}
### (21) cancel_r_rank1
df_ott = add_column_to_ott(
    df_ott, df_2022, df_2023,
    col_2022='B14RANK_1', col_2023='A12',
    new_col_name='cancel_r_rank1',
    mapping_2022=cancel_reason_map,
    mapping_2023=cancel_reason_map 
    )

In [29]:
cancel_reason2_map = {
    '1': '영상 광고 삽입', 
    '2': '제공하는 콘텐츠가 다양하지 않아서', 
    '3':'이용요금이 적절하지 않아서', 
    '4':'나에게 적합한 서비스를 추천해주지 않아서', 
    '5':'시청 가능한 기기가 제한되어서', 
    '6':'몰아보기로 콘텐츠를 이용하기 불편해서', 
    '7':'제공되는 영상의 화질/음질이 좋지 않아서', 
    '8':'영상을 안정적으로 시청할 수 없어서',
    '9':'영상의 자막/더빙의 질이 좋지 않아서',
    '10': '유료 OTT 서비스를 해지한 적 없다'}
### (22) cancel_r_rank2
df_ott = add_column_to_ott(
    df_ott, df_2022, df_2023,
    col_2022='B14RANK_2', col_2023='A12_m2',
    new_col_name='cancel_r_rank2',
    mapping_2022=cancel_reason2_map,
    mapping_2023=cancel_reason2_map 
    )

### (22) cancel_r_rank3
df_ott = add_column_to_ott(
    df_ott, df_2022, df_2023,
    col_2022='B14RANK_3', col_2023='A12_m3',
    new_col_name='cancel_r_rank3',
    mapping_2022=cancel_reason2_map,
    mapping_2023=cancel_reason2_map 
    )

In [30]:
### (23) multi_scattered_contents
df_ott = add_multi_column(
    df_ott,
    df_2022, df_2023,
    new_col_name='multi_scattered_contents',
    cols_2022=['B15_2_1'],
    cols_2023=['A14'] + [f'A14_m{i}' for i in range(2, 7)],
    target_value_2022=[1],
    target_value_2023=[1],
    convert_value=1
)

In [31]:
### (24) multi_not_burden
df_ott = add_multi_column(
    df_ott,
    df_2022, df_2023,
    new_col_name='multi_not_burden',
    cols_2022=['B15_2_2'],
    cols_2023=['A14'] + [f'A14_m{i}' for i in range(2, 7)],
    target_value_2022=[2],
    target_value_2023=[2],
    convert_value=1
)

In [32]:
### (25) multi_share_account
df_ott = add_multi_column(
    df_ott,
    df_2022, df_2023,
    new_col_name='multi_share_account',
    cols_2022=['B15_2_3'],
    cols_2023=['A14'] + [f'A14_m{i}' for i in range(2, 7)],
    target_value_2022=[3],
    target_value_2023=[3],
    convert_value=1
)

In [33]:
### (26) multi_for_family
df_ott = add_multi_column(
    df_ott,
    df_2022, df_2023,
    new_col_name='multi_for_family',
    cols_2022=['B15_2_4'],
    cols_2023=['A14'] + [f'A14_m{i}' for i in range(2, 7)],
    target_value_2022=[4],
    target_value_2023=[4],
    convert_value=1
)

In [34]:
### (27) multi_before_deciding
df_ott = add_multi_column(
    df_ott,
    df_2022, df_2023,
    new_col_name='multi_before_deciding',
    cols_2022=['B15_2_5'],
    cols_2023=['A14'] + [f'A14_m{i}' for i in range(2, 7)],
    target_value_2022=[5],
    target_value_2023=[5],
    convert_value=1
)

In [35]:
### (28) multi_other
df_ott = add_multi_column(
    df_ott,
    df_2022, df_2023,
    new_col_name='multi_other',
    cols_2022=['B15_2_6'],
    cols_2023=['A14'] + [f'A14_m{i}' for i in range(2, 7)],
    target_value_2022=[6],
    target_value_2023=[6],
    convert_value=1
)

## 2022 / 2023 분리 문항의 경우

In [36]:
def add_nan_columns_to_ott(df_ott, df_2022, df_2023, col_2022=None, col_2023=None,
                      new_col_name='',mapping_2022=None, mapping_2023=None, x_value=None):
# 1. 2022 데이터 처리
    if col_2022 is not None:
        df_2022_col = df_2022[['user_id', col_2022]].copy()
        df_2022_col = df_2022_col.rename(columns={col_2022: new_col_name})

        if mapping_2022 is not None:
            df_2022_col[new_col_name] = df_2022_col[new_col_name].map(mapping_2022)
        elif x_value is not None:
            df_2022_col[new_col_name] = df_2022_col[new_col_name].apply(
                lambda x: 1 if x == x_value else 0
            )
    else:
        # 컬럼이 없을 경우: user_id만 있고, 값은 NaN
        df_2022_col = df_2022[['user_id']].copy()
        df_2022_col[new_col_name] = np.nan

    # 2. 2023 데이터 처리
    if col_2023 is not None:
        df_2023_col = df_2023[['user_id', col_2023]].copy()
        df_2023_col = df_2023_col.rename(columns={col_2023: new_col_name})

        if mapping_2023 is not None:
            df_2023_col[new_col_name] = df_2023_col[new_col_name].map(mapping_2023)
        elif x_value is not None:
            df_2023_col[new_col_name] = df_2023_col[new_col_name].apply(
                lambda x: 1 if x == x_value else 0
            )
    else:
        df_2023_col = df_2023[['user_id']].copy()
        df_2023_col[new_col_name] = np.nan

    # 3. 합치기
    df_new_col = pd.concat([df_2022_col, df_2023_col], ignore_index=True)

    # 4. user 테이블 병합
    df_ott = df_ott.merge(df_new_col, on='user_id', how='left')

    return df_ott

### 넷플릭스 이용이유

In [37]:
### (29) netflix_r_ad
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_1_1', 
    col_2023=None,
    new_col_name='netflix_r_ad',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='1')

### (30) netflix_r_various
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_1_2', 
    col_2023=None,
    new_col_name='netflix_r_various',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='2')

### (31) netflix_r_new
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_1_3', 
    col_2023=None,
    new_col_name='netflix_r_new',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='3')

### (32) netflix_r_original
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_1_4', 
    col_2023=None,
    new_col_name='netflix_r_original',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='4')

### (33) netflix_r_charge
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_1_5', 
    col_2023=None,
    new_col_name='netflix_r_charge',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='5')

### (34) netflix_r_promotion
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_1_6', 
    col_2023=None,
    new_col_name='netflix_r_promotion',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='6')

### (35) netflix_r_recommand
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_1_7', 
    col_2023=None,
    new_col_name='netflix_r_recommand',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='7')

### (36) netflix_r_device
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_1_8', 
    col_2023=None,
    new_col_name='netflix_r_device',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='8')

### (37) netflix_r_bingewatching
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_1_9', 
    col_2023=None,
    new_col_name='netflix_r_bingewatching',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='9')

### (38) netflix_r_ux
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_1_10', 
    col_2023=None,
    new_col_name='netflix_r_ux',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='10')

### (39) netflix_r_videoquality
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_1_11', 
    col_2023=None,
    new_col_name='netflix_r_videoquality',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='11')

### (40) netflix_r_stability
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_1_12', 
    col_2023=None,
    new_col_name='netflix_r_stability',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='12')

### (41) netflix_r_subtitle
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_1_13', 
    col_2023=None,
    new_col_name='netflix_r_subtitle',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='13')

### (42) netflix_r_hollyhood
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_1_14', 
    col_2023=None,
    new_col_name='netflix_r_hollyhood',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='14')

### (43) netflix_r_nation
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_1_15', 
    col_2023=None,
    new_col_name='netflix_r_nation',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='15')

### 아마존 이용이유

In [38]:
### (44) amazon_r_ad
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_2_1', 
    col_2023=None,
    new_col_name='amazon_r_ad',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='1')

### (45) amazon_r_various
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_2_2', 
    col_2023=None,
    new_col_name='amazon_r_various',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='2')

### (46) amazon_r_new
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_2_3', 
    col_2023=None,
    new_col_name='amazon_r_new',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='3')

### (47) amazon_r_original
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_2_4', 
    col_2023=None,
    new_col_name='amazon_r_original',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='4')

### (48) amazon_r_charge
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_2_5', 
    col_2023=None,
    new_col_name='amazon_r_charge',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='5')

### (49) amazon_r_promotion
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_2_6', 
    col_2023=None,
    new_col_name='amazon_r_promotion',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='6')

### (50) amazon_r_recommand
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_2_7', 
    col_2023=None,
    new_col_name='amazon_r_recommand',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='7')

### (51) amazon_r_device
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_2_8', 
    col_2023=None,
    new_col_name='amazon_r_device',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='8')

### (52) amazon_r_bingewatching
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_2_9', 
    col_2023=None,
    new_col_name='amazon_r_bingewatching',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='9')

### (53) amazon_r_ux
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_2_10', 
    col_2023=None,
    new_col_name='amazon_r_ux',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='10')

### (54) amazon_r_videoquality
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_2_11', 
    col_2023=None,
    new_col_name='amazon_r_videoquality',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='11')

### (55) amazon_r_stability
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_2_12', 
    col_2023=None,
    new_col_name='amazon_r_stability',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='12')

### (56) amazon_r_subtitle
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_2_13', 
    col_2023=None,
    new_col_name='amazon_r_subtitle',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='13')

### (57) amazon_r_hollyhood
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_2_14', 
    col_2023=None,
    new_col_name='amazon_r_hollyhood',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='14')

### (58) amazon_r_nation
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_2_15', 
    col_2023=None,
    new_col_name='amazon_r_nation',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='15')


### 디즈니 이용이유

In [39]:
### (59) disney_r_ad
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_3_1', 
    col_2023=None,
    new_col_name='disney_r_ad',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='1')

### (60) disney_r_various
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_3_2', 
    col_2023=None,
    new_col_name='disney_r_various',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='2')

### (61) disney_r_new
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_3_3', 
    col_2023=None,
    new_col_name='disney_r_new',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='3')

### (62) disney_r_original
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_3_4', 
    col_2023=None,
    new_col_name='disney_r_original',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='4')

### (63) adisney_r_charge
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_3_5', 
    col_2023=None,
    new_col_name='disney_r_charge',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='5')

### (64) disney_r_promotion
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_3_6', 
    col_2023=None,
    new_col_name='disney_r_promotion',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='6')

### (65) disney_r_recommand
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_3_7', 
    col_2023=None,
    new_col_name='disney_r_recommand',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='7')

### (66) disney_r_device
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_3_8', 
    col_2023=None,
    new_col_name='disney_r_device',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='8')

### (67) disney_r_bingewatching
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_3_9', 
    col_2023=None,
    new_col_name='disney_r_bingewatching',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='9')

### (68) disney_r_ux
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_3_10', 
    col_2023=None,
    new_col_name='disney_r_ux',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='10')

### (69) disney_r_videoquality
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_3_11', 
    col_2023=None,
    new_col_name='disney_r_videoquality',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='11')

### (70) disney_r_stability
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_3_12', 
    col_2023=None,
    new_col_name='disney_r_stability',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='12')

### (71) disney_r_subtitle
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_3_13', 
    col_2023=None,
    new_col_name='disney_r_subtitle',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='13')

### (72) disney_r_hollyhood
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_3_14', 
    col_2023=None,
    new_col_name='disney_r_hollyhood',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='14')

### (74) disney_r_nation
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022='C1_3_15', 
    col_2023=None,
    new_col_name='disney_r_nation',
    mapping_2022=None, 
    mapping_2023=None, 
    x_value='15')

In [40]:
reason_map = {
    '1': '다양한 콘텐츠를 제공한다',
    '2': '다양한 국가의 콘텐츠를 시청할 수 있다',
    '3': '최신 콘텐츠가 많다',
    '4':'오리지널 콘텐츠가 많다',
    '5':'Hollywood 콘텐츠를 시청할 수 있다',
    '6':'자국 콘텐츠를 시청할 수 있다',
    '7':'나에게 적합한 콘텐츠를 추천해준다',
    '8':'몰아보기로 콘텐츠를 이용하기 편리하다',
    '9':'제공되는 영상의 자막/더빙의 질이 우수하다',
    '10':'화면, 메뉴, 검색 등이 편리하다',
    '11':'시청 가능한 기기가 다양하다',
    '12':'영상에 삽입되는 광고가 없거나 적정하다',
    '13':'제공되는 영상의 화질/음질이 우수하다',
    '14':'영상을 끊이지 않고 안정적으로 시청 가능하다',
    '15':'이용 요금이 적절하다',
    '16':'결합요금 혜택(통신사, 커머스 등)을 통해 쉽게 이용할 수 있다'
    }

score_map = {'1': 1, '2': 2, '3': 3, '4': 4, '5': 5}

In [41]:
### (75) netflix_r_rank1
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022=None, 
    col_2023='B2_r1',
    new_col_name='netflix_r_rank1',
    mapping_2022=None, 
    mapping_2023=reason_map, 
    x_value=None)

In [42]:
### (76) netflix_r_rank2
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022=None, 
    col_2023='B2_r1_m2',
    new_col_name='netflix_r_rank2',
    mapping_2022=None, 
    mapping_2023=reason_map, 
    x_value=None)

In [43]:
### (77) nexflix_satisfaction
df_ott = add_column_to_ott(
    df_ott, df_2022, df_2023,
    col_2022='C2_1', col_2023='B3_r1',
    new_col_name='nexflix_satisfaction',
    mapping_2022= score_map,
    mapping_2023= score_map 
    )

In [44]:
### (78) nexflix_keep
df_ott = add_column_to_ott(
    df_ott, df_2022, df_2023,
    col_2022='C3_1', col_2023='B4_r1',
    new_col_name='nexflix_keep',
    mapping_2022= score_map,
    mapping_2023= score_map 
    )

In [45]:
### (79) amazon_r_rank1
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022=None, 
    col_2023='B2_r2',
    new_col_name='amazon_r_rank1',
    mapping_2022=None, 
    mapping_2023=reason_map, 
    x_value=None)

In [46]:
### (80) amazon_r_rank2
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022=None, 
    col_2023='B2_r2_m2',
    new_col_name='amazon_r_rank2',
    mapping_2022=None, 
    mapping_2023=reason_map, 
    x_value=None)

In [47]:

### (81) amazon_satisfaction
df_ott = add_column_to_ott(
    df_ott, df_2022, df_2023,
    col_2022='C2_2', col_2023='B3_r2',
    new_col_name='amazon_satisfaction',
    mapping_2022= score_map,
    mapping_2023= score_map 
    )

In [48]:
### (82) amazon_keep
df_ott = add_column_to_ott(
    df_ott, df_2022, df_2023,
    col_2022='C3_2', col_2023='B4_r2',
    new_col_name='amazon_keep',
    mapping_2022= score_map,
    mapping_2023= score_map 
    )

In [49]:
### (83) disney_r_rank1
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022=None, 
    col_2023='B2_r3',
    new_col_name='disney_r_rank1',
    mapping_2022=None, 
    mapping_2023=reason_map, 
    x_value=None)

In [50]:
### (84) disney_r_rank2
df_ott = add_nan_columns_to_ott(
    df_ott, df_2022, df_2023, 
    col_2022=None, 
    col_2023='B2_r3_m2',
    new_col_name='disney_r_rank2',
    mapping_2022=None, 
    mapping_2023=reason_map, 
    x_value=None)

In [51]:
### (85) disney_satisfaction
df_ott = add_column_to_ott(
    df_ott, df_2022, df_2023,
    col_2022='C2_3', col_2023='B3_r3',
    new_col_name='disney_satisfaction',
    mapping_2022= score_map,
    mapping_2023= score_map 
    )

In [52]:
### (86) disney_keep
df_ott = add_column_to_ott(
    df_ott, df_2022, df_2023,
    col_2022='C3_3', col_2023='B4_r3',
    new_col_name='disney_keep',
    mapping_2022= score_map,
    mapping_2023= score_map 
    )

In [53]:
### (87) intention_subs_ott
intention_map = {
    1:'추가로 구독',
    2:'지금 서비스 중 하나를 교체',
    3:'지금 이용하고 있는 서비스가 많아 더 이상 구독하지 않는다',
    4:'모르겠다'}
df_ott = add_column_to_ott(
    df_ott, df_2022, df_2023,
    col_2022='G1', col_2023='A15',
    new_col_name='intention_subs_ott',
    mapping_2022= intention_map,
    mapping_2023= intention_map 
    )

In [54]:
# 최종 DB 업로드
df_ott.to_sql(name="ott", con=engine, index=False, if_exists="replace")
print("ott 테이블 최종 업데이트 완료!")

ott 테이블 최종 업데이트 완료!
