In [1]:
import os
import pandas as pd
from sqlalchemy import create_engine

In [2]:
# MySQL 접속 설정
username = "root"      # MySQL 사용자명
password = "1234"      # MySQL 비밀번호
host = "127.0.0.1"     # 로컬호스트
port = 3306            # 포트
database = "ott_db"  # DB 이름

# MySQL 엔진 생성
engine = create_engine(f"mysql+pymysql://{username}:{password}@{host}:{port}/{database}")

In [3]:
# 데이터 파일 경로 설정
folder_path = r"방송통신위원회_해외 OTT 이용행태조사 원시데이터_20231231"
file_2022 = os.path.join(folder_path, "2022해외OTT이용행태조사_국가통합(20240403).csv")
file_2023 = os.path.join(folder_path, "2023해외OTT이용행태조사_국가통합(20240403).csv")

# CSV 읽기
df_2022 = pd.read_csv(file_2022, encoding='utf-8-sig', low_memory=False)
df_2023 = pd.read_csv(file_2023, encoding='utf-8-sig', low_memory=False)

print("2022 데이터:", df_2022.shape)
print("2023 데이터:", df_2023.shape)

2022 데이터: (4536, 1903)
2023 데이터: (6326, 929)


In [4]:
# 1. 2022 user_id 생성
df_2022['user_id'] = ('2022' + df_2022['RESPID'].astype(str)).astype(int)
# 2. 2023 user_id 생성
df_2023['user_id'] = ('2023' + df_2023['id'].astype(str)).astype(int)

df_kcontents_id_2022 = df_2022[['user_id']].copy()
df_kcontents_id_2023 = df_2023[['user_id']].copy()

# 3. 합치기
df_kcontents_id = pd.concat([df_kcontents_id_2022, df_kcontents_id_2023], ignore_index=True)

# 4. user 테이블에 user_id 업로드
df_kcontents_id.to_sql(name="kcontents", con=engine, index=False, if_exists="replace")

10862

# (1) user

In [5]:
def add_column_to_kcontents(df_kcontents, df_2022, df_2023, col_2022, col_2023, new_col_name,
                       mapping_2022=None, mapping_2023=None):
    """
    user 테이블에 새로운 컬럼을 추가하는 함수
    :param df_kcontents: 기존 user 테이블 DataFrame
    :param df_2022: 2022 데이터 DataFrame
    :param df_2023: 2023 데이터 DataFrame
    :param col_2022: 2022 데이터에서 가져올 컬럼명
    :param col_2023: 2023 데이터에서 가져올 컬럼명
    :param new_col_name: user 테이블에 추가할 새 컬럼명
    :param mapping_2022: 2022 매핑 딕셔너리 (없으면 None)
    :param mapping_2023: 2023 매핑 딕셔너리 (없으면 None)
    :return: 새로운 컬럼이 추가된 df_kcontents
    """

    # 2022 데이터 처리
    df_2022_col = df_2022[['user_id', col_2022]].copy()
    df_2022_col = df_2022_col.rename(columns={col_2022: new_col_name})
    if mapping_2022:
        df_2022_col[new_col_name] = df_2022_col[new_col_name].map(mapping_2022)

    # 2023 데이터 처리
    df_2023_col = df_2023[['user_id', col_2023]].copy()
    df_2023_col = df_2023_col.rename(columns={col_2023: new_col_name})
    if mapping_2023:
        df_2023_col[new_col_name] = df_2023_col[new_col_name].map(mapping_2023)

    # 합치기
    df_new_col = pd.concat([df_2022_col, df_2023_col], ignore_index=True)

    # user 테이블 병합
    df_kcontents = df_kcontents.merge(df_new_col, on='user_id', how='left')

    return df_kcontents

In [6]:
# 현재 user 테이블 불러오기
df_kcontents = pd.read_sql("SELECT * FROM kcontents", con=engine)

# (2) content_type_drama

In [7]:
def add_content_type_column(df_kcontents, df_2022, df_2023,
                            col_2022, cols_2023, new_col_name,
                            target_code, convert_value=1):
    """
    df_kcontents에 콘텐츠 이용 유형 컬럼 추가
    """

    # 2022 처리
    df_2022_part = df_2022[['user_id', col_2022]].copy()
    df_2022_part = df_2022_part.rename(columns={col_2022: new_col_name})

    df_2022_part[new_col_name] = (
        df_2022_part[new_col_name]
        .replace(' ', pd.NA)
        .apply(pd.to_numeric, errors='coerce')
        .apply(lambda x: convert_value if x == target_code else 0 if pd.notna(x) else pd.NA)
    ).astype("Int64")

    # 2023 처리
    df_2023_part = df_2023[['user_id'] + cols_2023].copy()
    df_2023_part[cols_2023] = df_2023_part[cols_2023].replace(' ', pd.NA)
    df_2023_part[cols_2023] = df_2023_part[cols_2023].apply(pd.to_numeric, errors='coerce')

    def contains_target(row):
        values = row.dropna().astype("Int64").values
        return convert_value if target_code in values else 0

    df_2023_part[new_col_name] = df_2023_part[cols_2023].apply(contains_target, axis=1).astype("Int64")
    df_2023_part = df_2023_part[['user_id', new_col_name]]

    # 병합
    df_merge = pd.concat([df_2022_part, df_2023_part], ignore_index=True)
    df_kcontents = df_kcontents.merge(df_merge, on='user_id', how='left')

    return df_kcontents


In [8]:
# # 드라마/시리즈물
df_kcontents = add_content_type_column(
    df_kcontents, df_2022, df_2023,
    col_2022='F1_1',
    cols_2023=[
        'E1', 'E1_m2', 'E1_m3', 'E1_m4',
        'E1_m5', 'E1_m6', 'E1_m7', 'E1_m8', 'E1_m9'
    ],
    new_col_name='content_type_drama',
    target_code=1
)

# (3) content_type_movie

In [9]:
# 영화
df_kcontents = add_content_type_column(
    df_kcontents, df_2022, df_2023,
    col_2022='F1_2',
    cols_2023=[
        'E1', 'E1_m2', 'E1_m3', 'E1_m4',
        'E1_m5', 'E1_m6', 'E1_m7', 'E1_m8', 'E1_m9'
    ],
    new_col_name='content_type_movie',
    target_code=2
)

# (4) content_type_variety

In [10]:
# 예능/버라이어티
df_kcontents = add_content_type_column(
    df_kcontents, df_2022, df_2023,
    col_2022='F1_3',
    cols_2023=[
        'E1', 'E1_m2', 'E1_m3', 'E1_m4',
        'E1_m5', 'E1_m6', 'E1_m7', 'E1_m8', 'E1_m9'
    ],
    new_col_name='content_type_variety',
    target_code=3
)

# (5) content_type_music

In [11]:
# 음악
df_kcontents = add_content_type_column(
    df_kcontents, df_2022, df_2023,
    col_2022='F1_4',
    cols_2023=[
        'E1', 'E1_m2', 'E1_m3', 'E1_m4',
        'E1_m5', 'E1_m6', 'E1_m7', 'E1_m8', 'E1_m9'
    ],
    new_col_name='content_type_music',
    target_code=4
)

# (6) content_type_kpop

In [12]:
# 한국 연예인 방송
df_kcontents = add_content_type_column(
    df_kcontents, df_2022, df_2023,
    col_2022='F1_5',
    cols_2023=[
        'E1', 'E1_m2', 'E1_m3', 'E1_m4',
        'E1_m5', 'E1_m6', 'E1_m7', 'E1_m8', 'E1_m9'
    ],
    new_col_name='content_type_kpop',
    target_code=5
)

# (7) content_type_animation

In [13]:
# 애니메이션
df_kcontents = add_content_type_column(
    df_kcontents, df_2022, df_2023,
    col_2022='F1_6',
    cols_2023=[
        'E1', 'E1_m2', 'E1_m3', 'E1_m4',
        'E1_m5', 'E1_m6', 'E1_m7', 'E1_m8', 'E1_m9'
    ],
    new_col_name='content_type_animation',
    target_code=6
)

# (8) content_type_webtoon

In [14]:
# 웹툰
df_kcontents = add_content_type_column(
    df_kcontents, df_2022, df_2023,
    col_2022='F1_7',
    cols_2023=[
        'E1', 'E1_m2', 'E1_m3', 'E1_m4',
        'E1_m5', 'E1_m6', 'E1_m7', 'E1_m8', 'E1_m9'
    ],
    new_col_name='content_type_webtoon',
    target_code=7
)

# (9) content_type_novel

In [15]:
# 웹소설
df_kcontents = add_content_type_column(
    df_kcontents, df_2022, df_2023,
    col_2022='F1_8',
    cols_2023=[
        'E1', 'E1_m2', 'E1_m3', 'E1_m4',
        'E1_m5', 'E1_m6', 'E1_m7', 'E1_m8', 'E1_m9'
    ],
    new_col_name='content_type_webnovel',
    target_code=8
)

# (10) content_type_esport

In [16]:

# 게임/e-스포츠
df_kcontents = add_content_type_column(
    df_kcontents, df_2022, df_2023,
    col_2022='F1_9',
    cols_2023=[
        'E1', 'E1_m2', 'E1_m3', 'E1_m4',
        'E1_m5', 'E1_m6', 'E1_m7', 'E1_m8', 'E1_m9'
    ],
    new_col_name='content_type_esport',
    target_code=9
)

In [17]:
content_cols = [col for col in df_kcontents.columns if col.startswith("content_type_")]

summary = []

for col in content_cols:
    value_counts = df_kcontents[col].value_counts(dropna=False).to_dict()
    total = sum(value_counts.values())  # ✅ 여기가 핵심
    row = {
        'column': col,
        '0_count': value_counts.get(0, 0),
        '1_count': value_counts.get(1, 0),
        '0_pct': round(value_counts.get(0, 0) / total * 100, 1) if total > 0 else 0,
        '1_pct': round(value_counts.get(1, 0) / total * 100, 1) if total > 0 else 0,
    }
    summary.append(row)

import pandas as pd
df_summary = pd.DataFrame(summary)
df_summary

Unnamed: 0,column,0_count,1_count,0_pct,1_pct
0,content_type_drama,4522,5125,41.6,47.2
1,content_type_movie,5386,4261,49.6,39.2
2,content_type_variety,7814,1833,71.9,16.9
3,content_type_music,7496,2151,69.0,19.8
4,content_type_kpop,8295,1352,76.4,12.4
5,content_type_animation,8591,1056,79.1,9.7
6,content_type_webtoon,8723,924,80.3,8.5
7,content_type_webnovel,9219,428,84.9,3.9
8,content_type_esport,9297,350,85.6,3.2


# (11) series_usage_method_2022

In [18]:
series_cols_2022 = ['F2_1_1', 'F2_1_2', 'F2_1_3', 'F2_1_4', 'F2_1_5', 'F2_1_7']

df_temp = df_2022[['user_id'] + series_cols_2022].copy()

# 0/1 처리
for col in series_cols_2022:
    df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
    df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# 하나라도 1이면 1
df_temp['series_usage_method_2022'] = df_temp[series_cols_2022].max(axis=1)

# 필요한 컬럼만
df_temp = df_temp[['user_id', 'series_usage_method_2022']]

# 병합
df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')

# (12) movies_usage_method_2022

In [19]:
# 영화 콘텐츠 이용 경로 컬럼들
movies_cols_2022 = ['F2_2_1', 'F2_2_2', 'F2_2_3', 'F2_2_4', 'F2_2_5', 'F2_2_7']

df_temp = df_2022[['user_id'] + movies_cols_2022].copy()

# 0/1 이진화
for col in movies_cols_2022:
    df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
    df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# 하나라도 1이면 1
df_temp['movies_usage_method_2022'] = df_temp[movies_cols_2022].max(axis=1)

# 필요한 컬럼만 남기기
df_temp = df_temp[['user_id', 'movies_usage_method_2022']]

# df_kcontents에 병합
df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')


# (13) variety_usage_method_2022

In [20]:
# 예능/버라이어티 이용 경로 컬럼 리스트
variety_cols_2022 = ['F2_3_1', 'F2_3_2', 'F2_3_3', 'F2_3_4', 'F2_3_5', 'F2_3_7']

# 필요한 컬럼만 복사
df_temp = df_2022[['user_id'] + variety_cols_2022].copy()

# 0/1 이진화
for col in variety_cols_2022:
    df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
    df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# 한 컬럼으로 통합
df_temp['variety_usage_method_2022'] = df_temp[variety_cols_2022].max(axis=1)

# 필요한 컬럼만 남기고 병합
df_temp = df_temp[['user_id', 'variety_usage_method_2022']]
df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')


# (14) kpop_usage_method_2022

In [21]:
# 음악 콘텐츠 이용 경로 컬럼 리스트
music_cols_2022 = ['F2_4_1', 'F2_4_2', 'F2_4_3', 'F2_4_4', 'F2_4_5', 'F2_4_6', 'F2_4_7']

# 필요한 컬럼 복사
df_temp = df_2022[['user_id'] + music_cols_2022].copy()

# 0/1 이진화 처리
for col in music_cols_2022:
    df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
    df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# 통합 컬럼 생성
df_temp['kpop_usage_method_2022'] = df_temp[music_cols_2022].max(axis=1)

# 병합
df_temp = df_temp[['user_id', 'kpop_usage_method_2022']]
df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')


# (15) celebrity_viewing_method_2022

In [22]:
# 연예인 방송 콘텐츠 이용 경로 컬럼 리스트
celebrity_cols_2022 = ['F2_5_1', 'F2_5_2', 'F2_5_3', 'F2_5_4', 'F2_5_5', 'F2_5_7']

# 필요한 컬럼 복사
df_temp = df_2022[['user_id'] + celebrity_cols_2022].copy()

# 0/1 이진화 처리
for col in celebrity_cols_2022:
    df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
    df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# 통합 컬럼 생성
df_temp['celebrity_viewing_method_2022'] = df_temp[celebrity_cols_2022].max(axis=1)

# 병합
df_temp = df_temp[['user_id', 'celebrity_viewing_method_2022']]
df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')


# (16) animation_usage_method_2022

In [23]:
# 애니메이션 콘텐츠 이용 경로 컬럼 리스트
animation_cols_2022 = ['F2_6_1', 'F2_6_2', 'F2_6_3', 'F2_6_4', 'F2_6_5', 'F2_6_7']

# 필요한 컬럼 복사
df_temp = df_2022[['user_id'] + animation_cols_2022].copy()

# 0/1 이진화 처리
for col in animation_cols_2022:
    df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
    df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# 통합 컬럼 생성
df_temp['animation_usage_method_2022'] = df_temp[animation_cols_2022].max(axis=1)

# 병합
df_temp = df_temp[['user_id', 'animation_usage_method_2022']]
df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')


# (17) webtoon_viewing_method_2022

In [24]:
# 웹툰 콘텐츠 이용 경로 컬럼 리스트
webtoon_cols_2022 = ['F2_7_1', 'F2_7_2', 'F2_7_3', 'F2_7_4', 'F2_7_5', 'F2_7_7']

# 필요한 컬럼 복사
df_temp = df_2022[['user_id'] + webtoon_cols_2022].copy()

# 0/1 이진화 처리
for col in webtoon_cols_2022:
    df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
    df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# 통합 컬럼 생성
df_temp['webtoon_viewing_method_2022'] = df_temp[webtoon_cols_2022].max(axis=1)

# 병합
df_temp = df_temp[['user_id', 'webtoon_viewing_method_2022']]
df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')


# (18) webnovel_viewung_method_2022

In [25]:
# 웹소설 콘텐츠 이용 경로 컬럼 리스트
webnovel_cols_2022 = ['F2_8_1', 'F2_8_2', 'F2_8_3', 'F2_8_4', 'F2_8_5', 'F2_8_7']

# 필요한 컬럼 복사
df_temp = df_2022[['user_id'] + webnovel_cols_2022].copy()

# 0/1 이진화 처리
for col in webnovel_cols_2022:
    df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
    df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# 통합 컬럼 생성
df_temp['webnovel_viewing_method_2022'] = df_temp[webnovel_cols_2022].max(axis=1)

# 병합
df_temp = df_temp[['user_id', 'webnovel_viewing_method_2022']]
df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')


# (19) esport_viewing_method_2022

In [26]:
# e스포츠 콘텐츠 이용 경로 컬럼 리스트
esport_cols_2022 = ['F2_9_1', 'F2_9_2', 'F2_9_3', 'F2_9_4', 'F2_9_5', 'F2_9_7']

# 필요한 컬럼 복사
df_temp = df_2022[['user_id'] + esport_cols_2022].copy()

# 0/1 이진화 처리
for col in esport_cols_2022:
    df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
    df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# 통합 컬럼 생성
df_temp['esport_viewing_method_2022'] = df_temp[esport_cols_2022].max(axis=1)

# 병합
df_temp = df_temp[['user_id', 'esport_viewing_method_2022']]
df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')


# (20) series_usage_method_2023

In [27]:
df_kcontents['series_usage_method_2023'] = df_2023['E3']


# (21) movies_usage_method_2023

In [28]:
df_kcontents['movies_usage_method_2023'] = df_2023['E3_1']

# (22) variety_usage_method_2023

In [29]:
df_kcontents['variety_usage_method_2023'] = df_2023['E3_2']

# (23) kpop_usage_method_2023

In [30]:
df_kcontents['kpop_usage_method_2023'] = df_2023['E3_3']

# (24) celebrity_viewing_method_2023

In [31]:
df_kcontents['celebrity_usage_method_2023'] = df_2023['E3_4']

# (25) animation_usage_method_2023

In [32]:
df_kcontents['animation_usage_method_2023'] = df_2023['E3_5']

# (26) webtoon_viewing_method_2023

In [33]:
df_kcontents['webtoon_usage_method_2023'] = df_2023['E3_6']

# (27) webnovel_viewing_method_2023

In [34]:
df_kcontents['webnovel_usage_method_2023'] = df_2023['E3_7']

# (28) esport_viewing_method_2023

In [35]:
df_kcontents['esport_usage_method_2023'] = df_2023['E3_8']


# (29) series_usage_frequency

In [36]:
frequency_map = {
    1: '매일',
    2: '1주일에 5~6일',
    3: '1주일에 3~4일',
    4: '1주일에 1~2일',
    5: '한 달에 1~3일',
    6: '2~3달에 1~2일 이하'
}

df_kcontents = add_column_to_kcontents(
    df_kcontents, df_2022, df_2023,
    col_2022="F3", col_2023="E4",
    new_col_name="series_usage_frequency",
    mapping_2022=frequency_map,
    mapping_2023=frequency_map
)

# (30) movies_usage_frequency

In [37]:
df_kcontents = add_column_to_kcontents(
    df_kcontents, df_2022, df_2023,
    col_2022="F4", col_2023="E5",
    new_col_name="movies_usage_frequency",
    mapping_2022=frequency_map,
    mapping_2023=frequency_map
)

# (31) variety_usage_frequency

In [38]:
df_kcontents = add_column_to_kcontents(
    df_kcontents, df_2022, df_2023,
    col_2022="F5", col_2023="E6",
    new_col_name="variety_usage_frequency",
    mapping_2022=frequency_map,
    mapping_2023=frequency_map
)

# (32) kpop_usage_frequency

In [39]:
df_kcontents = add_column_to_kcontents(
    df_kcontents,
    df_2022, df_2023,
    col_2022='F6',
    col_2023='E7',
    new_col_name='kpop_usage_frequency',
    mapping_2022=frequency_map,
    mapping_2023=frequency_map
)

# (33) webtoon_usage_frequency

In [40]:
df_kcontents = add_column_to_kcontents(
    df_kcontents,
    df_2022, df_2023,
    col_2022='F7',
    col_2023='E8',
    new_col_name='webtoon_usage_frequency',
    mapping_2022=frequency_map,
    mapping_2023=frequency_map
)

# (34) webnovel_usage_frequency

In [41]:
df_kcontents = add_column_to_kcontents(
    df_kcontents,
    df_2022, df_2023,
    col_2022='F8',
    col_2023='E9',
    new_col_name='webnovel_usage_frequency',
    mapping_2022=frequency_map,
    mapping_2023=frequency_map
)

# (35) drama_movie_genre_rank1

In [42]:
def add_column_to_kcontents(df_kcontents, df_2022, df_2023, col_2022, col_2023, new_col_name,
                            mapping_2022=None, mapping_2023=None):
    """
    kcontents 테이블에 새로운 컬럼을 추가하는 함수
    """
    # 2022 처리
    df_2022_col = df_2022[['user_id', col_2022]].copy()
    df_2022_col = df_2022_col.rename(columns={col_2022: new_col_name})
    if mapping_2022:
        df_2022_col[new_col_name] = df_2022_col[new_col_name].map(mapping_2022)

    # 2023 처리 (col_2023이 주어진 경우에만)
    if col_2023 is not None:
        df_2023_col = df_2023[['user_id', col_2023]].copy()
        df_2023_col = df_2023_col.rename(columns={col_2023: new_col_name})
        if mapping_2023:
            df_2023_col[new_col_name] = df_2023_col[new_col_name].map(mapping_2023)
        df_new_col = pd.concat([df_2022_col, df_2023_col], ignore_index=True)
    else:
        df_new_col = df_2022_col.copy()

    # 병합
    df_kcontents = df_kcontents.merge(df_new_col, on='user_id', how='left')
    return df_kcontents


In [43]:
genre_map_2022 = {
    1: '로맨스/멜로',
    2: 'SF/판타지',
    3: '액션',
    4: '공포/스릴러/좀비물',
    5: '역사물',
    6: '휴면/가족',
    7: '다큐멘터리',
    8: '뮤지컬',
    9: '기타',
    10: '이용하지 않음'
}

In [44]:
df_kcontents = add_column_to_kcontents(
    df_kcontents,
    df_2022, df_2023,
    col_2022='F10RANK_1',
    col_2023=None,
    new_col_name='drama_movie_genre_rank1',
    mapping_2022=genre_map_2022,
    mapping_2023=None
)


# (36) drama_movie_genre_rank2

In [45]:
df_kcontents = add_column_to_kcontents(
    df_kcontents,
    df_2022, df_2023,
    col_2022='F10RANK_2',
    col_2023=None,
    new_col_name='drama_movie_genre_rank2',
    mapping_2022=genre_map_2022,
    mapping_2023=None
)


# (37) drama_movie_romance

In [46]:
def add_multiselect_column_2023(df_kcontents, df_2023, base_col, multiselect_cols, new_col_name, target_code):
    all_cols = [base_col] + multiselect_cols
    df_2023_part = df_2023[['user_id'] + all_cols].copy()

    df_2023_part[new_col_name] = df_2023_part[all_cols].apply(
        lambda row: 1 if target_code in row.values else 0, axis=1
    )

    df_result = df_kcontents.merge(df_2023_part[['user_id', new_col_name]], on='user_id', how='left')
    return df_result

In [47]:
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2023,
    base_col='E10',
    multiselect_cols=['E10_m2', 'E10_m3', 'E10_m4', 'E10_m5', 'E10_m6', 'E10_m7', 'E10_m8', 'E10_m9', 'E10_m10', 'E10_m11', 'E10_m12', 'E10_m13'],
    new_col_name='drama_movie_romance',
    target_code=1
)


# (38) drama_movie_sf

In [48]:
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2023,
    base_col='E10',
    multiselect_cols=[
        'E10_m2', 'E10_m3', 'E10_m4', 'E10_m5', 'E10_m6',
        'E10_m7', 'E10_m8', 'E10_m9', 'E10_m10', 'E10_m11',
        'E10_m12', 'E10_m13'
    ],
    new_col_name='drama_movie_sf',
    target_code=2
)


# (39) drama_movie_action

In [49]:
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2023,
    base_col='E10',
    multiselect_cols=[
        'E10_m2', 'E10_m3', 'E10_m4', 'E10_m5', 'E10_m6',
        'E10_m7', 'E10_m8', 'E10_m9', 'E10_m10', 'E10_m11',
        'E10_m12', 'E10_m13'
    ],
    new_col_name='drama_movie_action',
    target_code=3
)


# (40) drama_movie_horror

In [50]:
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2023,
    base_col='E10',
    multiselect_cols=[
        'E10_m2', 'E10_m3', 'E10_m4', 'E10_m5', 'E10_m6',
        'E10_m7', 'E10_m8', 'E10_m9', 'E10_m10', 'E10_m11',
        'E10_m12', 'E10_m13'
    ],
    new_col_name='drama_movie_horror',
    target_code=4
)


# (41) drama_movie_history

In [51]:
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2023,
    base_col='E10',
    multiselect_cols=[
        'E10_m2', 'E10_m3', 'E10_m4', 'E10_m5', 'E10_m6',
        'E10_m7', 'E10_m8', 'E10_m9', 'E10_m10', 'E10_m11',
        'E10_m12', 'E10_m13'
    ],
    new_col_name='drama_movie_history',
    target_code=6
)


# (42) drama_movie_family

In [52]:
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2023,
    base_col='E10',
    multiselect_cols=[
        'E10_m2', 'E10_m3', 'E10_m4', 'E10_m5', 'E10_m6',
        'E10_m7', 'E10_m8', 'E10_m9', 'E10_m10', 'E10_m11',
        'E10_m12', 'E10_m13'
    ],
    new_col_name='drama_movie_family',
    target_code=7
)


# (43) drama_movie_documentary

In [53]:
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2023,
    base_col='E10',
    multiselect_cols=[
        'E10_m2', 'E10_m3', 'E10_m4', 'E10_m5', 'E10_m6',
        'E10_m7', 'E10_m8', 'E10_m9', 'E10_m10', 'E10_m11',
        'E10_m12', 'E10_m13'
    ],
    new_col_name='drama_movie_documentary',
    target_code=8
)


# (44) drama_movie_musical

In [54]:
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2023,
    base_col='E10',
    multiselect_cols=[
        'E10_m2', 'E10_m3', 'E10_m4', 'E10_m5', 'E10_m6',
        'E10_m7', 'E10_m8', 'E10_m9', 'E10_m10', 'E10_m11',
        'E10_m12', 'E10_m13'
    ],
    new_col_name='drama_movie_musical',
    target_code=9
)


# (45) drama_movie_other

In [55]:
# 기타 범주: 코미디, 애니메이션, 키즈, 기타 (코드: 5, 10, 11, 12)
df_kcontents['drama_movie_other'] = df_2023[[
    'E10', 'E10_m2', 'E10_m3', 'E10_m4', 'E10_m5', 'E10_m6',
    'E10_m7', 'E10_m8', 'E10_m9', 'E10_m10', 'E10_m11',
    'E10_m12', 'E10_m13'
]].apply(lambda row: int(any(code in [5, 10, 11, 12] for code in row if pd.notna(code))), axis=1)


# (46) drama_movie_no

In [56]:
# 이용하지 않음: 코드 13
df_kcontents['drama_movie_no'] = df_2023[[
    'E10', 'E10_m2', 'E10_m3', 'E10_m4', 'E10_m5', 'E10_m6',
    'E10_m7', 'E10_m8', 'E10_m9', 'E10_m10', 'E10_m11',
    'E10_m12', 'E10_m13'
]].apply(lambda row: int(any(code == 13 for code in row if pd.notna(code))), axis=1)


# (47) variety_favorite_rank1

In [57]:
variety_genre_map_2022 = {
    1: '게임 예능',
    2: '여행 예능',
    3: '관찰 예능',
    4: '연애 리얼리티 예능',
    5: '오디션/서바이벌 예능',
    6: '교양 예능',
    7: '기타',
    8: '이용하지 않음'
}


In [58]:
df_kcontents = add_column_to_kcontents(
    df_kcontents,
    df_2022, df_2023,
    col_2022='F11RANK_1',
    col_2023=None,
    new_col_name='variety_favorite_rank1',
    mapping_2022=variety_genre_map_2022,
    mapping_2023=None
)

# (48) variety_favorite_rank2

In [59]:
df_kcontents = add_column_to_kcontents(
    df_kcontents,
    df_2022, df_2023,
    col_2022='F11RANK_2',
    col_2023=None,
    new_col_name='variety_favorite_rank2',
    mapping_2022=variety_genre_map_2022,
    mapping_2023=None
)


# (49) variety_favorite_game

In [60]:
e11_multiselect_cols = [
    'E11_m2', 'E11_m3', 'E11_m4', 'E11_m5', 'E11_m6',
    'E11_m7', 'E11_m8', 'E11_m9', 'E11_m10', 'E11_m11',
    'E11_m12', 'E11_m13', 'E11_m14'
]


In [61]:
# (1) 게임 예능
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2023,
    base_col='E11',
    multiselect_cols=e11_multiselect_cols,
    new_col_name='variety_favorite_game',
    target_code=1
)

# (50) variety_favorite_travel

In [62]:
# (2) 여행 예능
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2023,
    base_col='E11',
    multiselect_cols=e11_multiselect_cols,
    new_col_name='variety_favorite_travel',
    target_code=2
)

# (51) variety_favorite_observation

In [63]:
# (3) 관찰 예능
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2023,
    base_col='E11',
    multiselect_cols=e11_multiselect_cols,
    new_col_name='variety_favorite_observation',
    target_code=3
)

# (52) variety_favorite_romance

In [64]:
# (4) 연애 리얼리티 예능
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2023,
    base_col='E11',
    multiselect_cols=e11_multiselect_cols,
    new_col_name='variety_favorite_romance',
    target_code=4
)

# (53) variety_favorite_audition

In [65]:
# (5) 음악/댄스 오디션 예능
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2023,
    base_col='E11',
    multiselect_cols=e11_multiselect_cols,
    new_col_name='variety_favorite_audition',
    target_code=5
)


# (54) variety_favorite_sport

In [66]:
# (6) 스포츠/게임 서바이벌 예능
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2023,
    base_col='E11',
    multiselect_cols=e11_multiselect_cols,
    new_col_name='variety_favorite_sport',
    target_code=6
)

# (55) variety_favorite_culture

In [67]:
# (7) 교양 예능
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2023,
    base_col='E11',
    multiselect_cols=e11_multiselect_cols,
    new_col_name='variety_favorite_culture',
    target_code=7
)

# (56) variety_favorite_other

In [68]:
# 기타 항목: 8~14 → 따로 컬럼 하나로 통합하고 싶으신 경우

for code in [8, 9, 10, 11, 12, 13, 14]:
    df_kcontents = add_multiselect_column_2023(
        df_kcontents, df_2023,
        base_col='E11',
        multiselect_cols=e11_multiselect_cols,
        new_col_name=f'variety_favorite_other_{code}',
        target_code=code
    )

# 최종 통합: variety_favorite_other = OR 연산
df_kcontents['variety_favorite_other'] = (
    df_kcontents[[f'variety_favorite_other_{c}' for c in [8,9,10,11,12,13,14]]].max(axis=1)
)

# 중간 컬럼 삭제 (선택)
df_kcontents.drop(columns=[f'variety_favorite_other_{c}' for c in [8,9,10,11,12,13,14]], inplace=True)

# (57) prefer_immediate_watch

In [69]:
df_kcontents = add_column_to_kcontents(
    df_kcontents,
    df_2022, df_2023,
    col_2022='F12_1',
    col_2023='E12',
    new_col_name='prefer_immediate_watch'
)

# (58) usage_inconvenience

In [70]:
df_kcontents = add_column_to_kcontents(
    df_kcontents,
    df_2022, df_2023,
    col_2022='F13',
    col_2023='E13',
    new_col_name='usage_inconvenience'
)


# (59) nonusage_reason

In [71]:
nonusage_reason_map = {
    1: '경험할 기회/방법이 없음',
    2: '관심이 없음'
}
df_kcontents = add_column_to_kcontents(
    df_kcontents,
    df_2022, df_2023,
    col_2022='F14',
    col_2023='E14',
    new_col_name='nonusage_reason'
)

# (60) preferred_future_drama

In [72]:
f16_cols = [
    'F16_1', 'F16_2', 'F16_3', 'F16_4',
    'F16_5', 'F16_6', 'F16_7', 'F16_8'
]


In [73]:
# (1) 드라마/시리즈물
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2022,
    base_col='F16_1',
    multiselect_cols=[],  # F16은 단일 컬럼이므로 멀티 없음
    new_col_name='preferred_future_drama',
    target_code=1
)


# (61) preferred_future_movie

In [74]:
# (2) 영화
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2022,
    base_col='F16_2',
    multiselect_cols=[],
    new_col_name='preferred_future_movie',
    target_code=2
)


# (62) preferred_future_variety

In [75]:
# (3) 예능/버라이어티
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2022,
    base_col='F16_3',
    multiselect_cols=[],
    new_col_name='preferred_future_variety',
    target_code=3
)


# (63) preferred_future_music

In [76]:
# (4) 음악
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2022,
    base_col='F16_4',
    multiselect_cols=[],
    new_col_name='preferred_future_music',
    target_code=4
)


# (64) preferred_future_webtoon

In [77]:
# (5) 웹툰
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2022,
    base_col='F16_5',
    multiselect_cols=[],
    new_col_name='preferred_future_webtoon',
    target_code=5
)


# (65) preferred_future_webnovel

In [78]:
# (6) 웹소설
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2022,
    base_col='F16_6',
    multiselect_cols=[],
    new_col_name='preferred_future_webnovel',
    target_code=6
)


# (66) preferred_future_game

In [79]:
# (7) 게임
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2022,
    base_col='F16_7',
    multiselect_cols=[],
    new_col_name='preferred_future_game',
    target_code=7
)


# (67) preferred_future_other

In [80]:
# (8) 기타
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2022,
    base_col='F16_8',
    multiselect_cols=[],
    new_col_name='preferred_future_other',
    target_code=8
)


# (68) desired_genre_drama

In [81]:
e16_multiselect_cols = [
    'E16_m2', 'E16_m3', 'E16_m4', 'E16_m5',
    'E16_m6', 'E16_m7', 'E16_m8'
]


In [82]:
# 드라마/시리즈물 (코드 1)
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2023,
    base_col='E16',
    multiselect_cols=e16_multiselect_cols,
    new_col_name='desired_genre_drama',
    target_code=1
)

# (69) desired_genre_movie

In [83]:
# 영화 (코드 2)
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2023,
    base_col='E16',
    multiselect_cols=e16_multiselect_cols,
    new_col_name='desired_genre_movie',
    target_code=2
)

# (70) desired_genre_variety

In [84]:
# 예능/버라이어티 (코드 3)
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2023,
    base_col='E16',
    multiselect_cols=e16_multiselect_cols,
    new_col_name='desired_genre_variety',
    target_code=3
)

# (71) desired_genre_music

In [85]:
# 음악 (코드 4)
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2023,
    base_col='E16',
    multiselect_cols=e16_multiselect_cols,
    new_col_name='desired_genre_music',
    target_code=4
)

# (72) desired_genre_webtoon

In [86]:
# 웹툰 (코드 5)
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2023,
    base_col='E16',
    multiselect_cols=e16_multiselect_cols,
    new_col_name='desired_genre_webtoon',
    target_code=5
)

# (73) desired_genre_webnovel

In [87]:
# 웹소설 (코드 6)
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2023,
    base_col='E16',
    multiselect_cols=e16_multiselect_cols,
    new_col_name='desired_genre_webnovel',
    target_code=6
)

# (74) desired_genre_game

In [88]:
# 게임 (코드 7)
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2023,
    base_col='E16',
    multiselect_cols=e16_multiselect_cols,
    new_col_name='desired_genre_game',
    target_code=7
)

# (75) desired_genre_other

In [89]:
# 기타 (코드 8)
df_kcontents = add_multiselect_column_2023(
    df_kcontents, df_2023,
    base_col='E16',
    multiselect_cols=e16_multiselect_cols,
    new_col_name='desired_genre_other',
    target_code=8
)

In [90]:
new_columns = df_kcontents.columns.difference(['user_id'])
df_kcontents[['user_id'] + list(new_columns)]

Unnamed: 0,user_id,animation_usage_method_2022,animation_usage_method_2023,celebrity_usage_method_2023,celebrity_viewing_method_2022,content_type_animation,content_type_drama,content_type_esport,content_type_kpop,content_type_movie,...,variety_favorite_travel,variety_usage_frequency,variety_usage_method_2022,variety_usage_method_2023,webnovel_usage_frequency,webnovel_usage_method_2023,webnovel_viewing_method_2022,webtoon_usage_frequency,webtoon_usage_method_2023,webtoon_viewing_method_2022
0,20228,0.0,,,0.0,,,,,,...,,,0.0,,,,0.0,,,0.0
1,202211,0.0,,,0.0,,,,,,...,,,0.0,,,,0.0,,,0.0
2,202212,0.0,,,0.0,0,0,0,0,0,...,,,1.0,,,,0.0,,,0.0
3,202213,0.0,,,0.0,,,,,,...,,,0.0,,,2,0.0,,,0.0
4,202215,1.0,,3,0.0,1,0,0,0,0,...,,,1.0,,,,0.0,,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10857,202310298,,,,,0,1,0,0,1,...,0.0,,,,,,,,,
10858,202310299,,,,,0,0,0,0,1,...,0.0,,,,,,,,,
10859,202310300,,,,,1,0,0,0,1,...,0.0,,,,,,,,,
10860,202310301,,,,,0,1,0,1,1,...,0.0,,,,,,,,,


In [91]:
# 모든 컬럼명 확인
df_kcontents.columns.tolist()


['user_id',
 'content_type_drama',
 'content_type_movie',
 'content_type_variety',
 'content_type_music',
 'content_type_kpop',
 'content_type_animation',
 'content_type_webtoon',
 'content_type_webnovel',
 'content_type_esport',
 'series_usage_method_2022',
 'movies_usage_method_2022',
 'variety_usage_method_2022',
 'kpop_usage_method_2022',
 'celebrity_viewing_method_2022',
 'animation_usage_method_2022',
 'webtoon_viewing_method_2022',
 'webnovel_viewing_method_2022',
 'esport_viewing_method_2022',
 'series_usage_method_2023',
 'movies_usage_method_2023',
 'variety_usage_method_2023',
 'kpop_usage_method_2023',
 'celebrity_usage_method_2023',
 'animation_usage_method_2023',
 'webtoon_usage_method_2023',
 'webnovel_usage_method_2023',
 'esport_usage_method_2023',
 'series_usage_frequency',
 'movies_usage_frequency',
 'variety_usage_frequency',
 'kpop_usage_frequency',
 'webtoon_usage_frequency',
 'webnovel_usage_frequency',
 'drama_movie_genre_rank1',
 'drama_movie_genre_rank2',
 'dr

In [93]:
# 최종 DB 업로드
df_kcontents.to_sql(name="kcontents", con=engine, index=False, if_exists="replace")
print("kcontents 테이블 최종 업데이트 완료!")

kcontents 테이블 최종 업데이트 완료!
