In [1]:
import os
import pandas as pd
from sqlalchemy import create_engine
import numpy as np

In [2]:
# MySQL 접속 설정
username = "root"      # MySQL 사용자명
password = "1234"      # MySQL 비밀번호
host = "127.0.0.1"     # 로컬호스트
port = 3306            # 포트
database = "ott_db"  # DB 이름

# MySQL 엔진 생성
engine = create_engine(f"mysql+pymysql://{username}:{password}@{host}:{port}/{database}")

In [3]:
# 데이터 파일 경로 설정
folder_path = r"방송통신위원회_해외 OTT 이용행태조사 원시데이터_20231231"
file_2022 = os.path.join(folder_path, "2022해외OTT이용행태조사_국가통합(20240403).csv")
file_2023 = os.path.join(folder_path, "2023해외OTT이용행태조사_국가통합(20240403).csv")

# CSV 읽기
df_2022 = pd.read_csv(file_2022, encoding='utf-8-sig', low_memory=False)
df_2023 = pd.read_csv(file_2023, encoding='utf-8-sig', low_memory=False)

print("2022 데이터:", df_2022.shape)
print("2023 데이터:", df_2023.shape)

2022 데이터: (4536, 1903)
2023 데이터: (6326, 929)


In [4]:
# 1. 2022 user_id 생성
df_2022['user_id'] = ('2022' + df_2022['RESPID'].astype(str)).astype(int)
# 2. 2023 user_id 생성
df_2023['user_id'] = ('2023' + df_2023['id'].astype(str)).astype(int)

df_kcontents_id_2022 = df_2022[['user_id']].copy()
df_kcontents_id_2023 = df_2023[['user_id']].copy()

# 3. 합치기
df_kcontents_id = pd.concat([df_kcontents_id_2022, df_kcontents_id_2023], ignore_index=True)

# 4. user 테이블에 user_id 업로드
df_kcontents_id.to_sql(name="kcontents", con=engine, index=False, if_exists="replace")

10862

# (1) user

In [5]:
def add_column_to_kcontents(df_kcontents, df_2022, df_2023, col_2022, col_2023, new_col_name,
                       mapping_2022=None, mapping_2023=None):
    """
    user 테이블에 새로운 컬럼을 추가하는 함수
    :param df_kcontents: 기존 user 테이블 DataFrame
    :param df_2022: 2022 데이터 DataFrame
    :param df_2023: 2023 데이터 DataFrame
    :param col_2022: 2022 데이터에서 가져올 컬럼명
    :param col_2023: 2023 데이터에서 가져올 컬럼명
    :param new_col_name: user 테이블에 추가할 새 컬럼명
    :param mapping_2022: 2022 매핑 딕셔너리 (없으면 None)
    :param mapping_2023: 2023 매핑 딕셔너리 (없으면 None)
    :return: 새로운 컬럼이 추가된 df_kcontents
    """

    # 2022 데이터 처리
    df_2022_col = df_2022[['user_id', col_2022]].copy()
    df_2022_col = df_2022_col.rename(columns={col_2022: new_col_name})
    if mapping_2022:
        df_2022_col[new_col_name] = df_2022_col[new_col_name].map(mapping_2022)

    # 2023 데이터 처리
    df_2023_col = df_2023[['user_id', col_2023]].copy()
    df_2023_col = df_2023_col.rename(columns={col_2023: new_col_name})
    if mapping_2023:
        df_2023_col[new_col_name] = df_2023_col[new_col_name].map(mapping_2023)

    # 합치기
    df_new_col = pd.concat([df_2022_col, df_2023_col], ignore_index=True)

    # user 테이블 병합
    df_kcontents = df_kcontents.merge(df_new_col, on='user_id', how='left')

    return df_kcontents

In [6]:
# 현재 user 테이블 불러오기
df_kcontents = pd.read_sql("SELECT * FROM kcontents", con=engine)

# (2) content_type_drama

In [7]:
def add_content_type_column(df_kcontents, df_2022, df_2023,
                            col_2022, cols_2023, new_col_name,
                            target_code, convert_value=1):
    """
    df_kcontents에 콘텐츠 이용 유형 컬럼 추가
    """

    # 2022 처리
    df_2022_part = df_2022[['user_id', col_2022]].copy()
    df_2022_part = df_2022_part.rename(columns={col_2022: new_col_name})

    df_2022_part[new_col_name] = (
        df_2022_part[new_col_name]
        .replace(' ', pd.NA)
        .apply(pd.to_numeric, errors='coerce')
        .apply(lambda x: convert_value if x == target_code else 0 if pd.notna(x) else pd.NA)
    ).astype("Int64")

    # 2023 처리
    df_2023_part = df_2023[['user_id'] + cols_2023].copy()
    df_2023_part[cols_2023] = df_2023_part[cols_2023].replace(' ', pd.NA)
    df_2023_part[cols_2023] = df_2023_part[cols_2023].apply(pd.to_numeric, errors='coerce')

    def contains_target(row):
        values = row.dropna().astype("Int64").values
        return convert_value if target_code in values else 0

    df_2023_part[new_col_name] = df_2023_part[cols_2023].apply(contains_target, axis=1).astype("Int64")
    df_2023_part = df_2023_part[['user_id', new_col_name]]

    # 병합
    df_merge = pd.concat([df_2022_part, df_2023_part], ignore_index=True)
    df_kcontents = df_kcontents.merge(df_merge, on='user_id', how='left')

    return df_kcontents


In [None]:
# # 드라마/시리즈물
df_kcontents = add_content_type_column(
    df_kcontents, df_2022, df_2023,
    col_2022='F1_1',
    cols_2023=[
        'E1', 'E1_m2', 'E1_m3', 'E1_m4',
        'E1_m5', 'E1_m6', 'E1_m7', 'E1_m8', 'E1_m9'
    ],
    new_col_name='content_type_drama',
    target_code=1
)

# (3) content_type_movie

In [None]:
# 영화
df_kcontents = add_content_type_column(
    df_kcontents, df_2022, df_2023,
    col_2022='F1_2',
    cols_2023=[
        'E1', 'E1_m2', 'E1_m3', 'E1_m4',
        'E1_m5', 'E1_m6', 'E1_m7', 'E1_m8', 'E1_m9'
    ],
    new_col_name='content_type_movie',
    target_code=2
)

# (4) content_type_variety

In [None]:
# 예능/버라이어티
df_kcontents = add_content_type_column(
    df_kcontents, df_2022, df_2023,
    col_2022='F1_3',
    cols_2023=[
        'E1', 'E1_m2', 'E1_m3', 'E1_m4',
        'E1_m5', 'E1_m6', 'E1_m7', 'E1_m8', 'E1_m9'
    ],
    new_col_name='content_type_variety',
    target_code=3
)

# (5) content_type_music

In [None]:
# 음악
df_kcontents = add_content_type_column(
    df_kcontents, df_2022, df_2023,
    col_2022='F1_4',
    cols_2023=[
        'E1', 'E1_m2', 'E1_m3', 'E1_m4',
        'E1_m5', 'E1_m6', 'E1_m7', 'E1_m8', 'E1_m9'
    ],
    new_col_name='content_type_music',
    target_code=4
)

# (6) content_type_kpop

In [None]:
# 한국 연예인 방송
df_kcontents = add_content_type_column(
    df_kcontents, df_2022, df_2023,
    col_2022='F1_5',
    cols_2023=[
        'E1', 'E1_m2', 'E1_m3', 'E1_m4',
        'E1_m5', 'E1_m6', 'E1_m7', 'E1_m8', 'E1_m9'
    ],
    new_col_name='content_type_kpop',
    target_code=5
)

# (7) content_type_animation

In [None]:
# 애니메이션
df_kcontents = add_content_type_column(
    df_kcontents, df_2022, df_2023,
    col_2022='F1_6',
    cols_2023=[
        'E1', 'E1_m2', 'E1_m3', 'E1_m4',
        'E1_m5', 'E1_m6', 'E1_m7', 'E1_m8', 'E1_m9'
    ],
    new_col_name='content_type_animation',
    target_code=6
)

# (8) content_type_webtoon

In [None]:
# 웹툰
df_kcontents = add_content_type_column(
    df_kcontents, df_2022, df_2023,
    col_2022='F1_7',
    cols_2023=[
        'E1', 'E1_m2', 'E1_m3', 'E1_m4',
        'E1_m5', 'E1_m6', 'E1_m7', 'E1_m8', 'E1_m9'
    ],
    new_col_name='content_type_webtoon',
    target_code=7
)

# (9) content_type_novel

In [None]:
# 웹소설
df_kcontents = add_content_type_column(
    df_kcontents, df_2022, df_2023,
    col_2022='F1_8',
    cols_2023=[
        'E1', 'E1_m2', 'E1_m3', 'E1_m4',
        'E1_m5', 'E1_m6', 'E1_m7', 'E1_m8', 'E1_m9'
    ],
    new_col_name='content_type_webnovel',
    target_code=8
)

# (10) content_type_esport

In [None]:

# 게임/e-스포츠
df_kcontents = add_content_type_column(
    df_kcontents, df_2022, df_2023,
    col_2022='F1_9',
    cols_2023=[
        'E1', 'E1_m2', 'E1_m3', 'E1_m4',
        'E1_m5', 'E1_m6', 'E1_m7', 'E1_m8', 'E1_m9'
    ],
    new_col_name='content_type_esport',
    target_code=9
)

In [None]:
content_cols = [col for col in df_kcontents.columns if col.startswith("content_type_")]

summary = []

for col in content_cols:
    value_counts = df_kcontents[col].value_counts(dropna=False).to_dict()
    total = sum(value_counts.values())  # ✅ 여기가 핵심
    row = {
        'column': col,
        '0_count': value_counts.get(0, 0),
        '1_count': value_counts.get(1, 0),
        '0_pct': round(value_counts.get(0, 0) / total * 100, 1) if total > 0 else 0,
        '1_pct': round(value_counts.get(1, 0) / total * 100, 1) if total > 0 else 0,
    }
    summary.append(row)

import pandas as pd
df_summary = pd.DataFrame(summary)
df_summary

# (11) series_usage_method_2022

In [None]:
# series_cols_2022 = ['F2_1_1', 'F2_1_2', 'F2_1_3', 'F2_1_4', 'F2_1_5', 'F2_1_7']

# df_temp = df_2022[['user_id'] + series_cols_2022].copy()

# # 0/1 처리
# for col in series_cols_2022:
#     df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
#     df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# # 하나라도 1이면 1
# df_temp['series_usage_method_2022'] = df_temp[series_cols_2022].max(axis=1)

# # 필요한 컬럼만
# df_temp = df_temp[['user_id', 'series_usage_method_2022']]
# # 병합

# df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')

# (12) movies_usage_method_2022

In [None]:
# # 영화 콘텐츠 이용 경로 컬럼들
# movies_cols_2022 = ['F2_2_1', 'F2_2_2', 'F2_2_3', 'F2_2_4', 'F2_2_5', 'F2_2_7']

# df_temp = df_2022[['user_id'] + movies_cols_2022].copy()

# # 0/1 이진화
# for col in movies_cols_2022:
#     df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
#     df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# # 하나라도 1이면 1
# df_temp['movies_usage_method_2022'] = df_temp[movies_cols_2022].max(axis=1)

# # 필요한 컬럼만 남기기
# df_temp = df_temp[['user_id', 'movies_usage_method_2022']]

# # df_kcontents에 병합
# df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')


# (13) variety_usage_method_2022

In [None]:
# # 예능/버라이어티 이용 경로 컬럼 리스트
# variety_cols_2022 = ['F2_3_1', 'F2_3_2', 'F2_3_3', 'F2_3_4', 'F2_3_5', 'F2_3_7']

# # 필요한 컬럼만 복사
# df_temp = df_2022[['user_id'] + variety_cols_2022].copy()

# # 0/1 이진화
# for col in variety_cols_2022:
#     df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
#     df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# # 한 컬럼으로 통합
# df_temp['variety_usage_method_2022'] = df_temp[variety_cols_2022].max(axis=1)

# # 필요한 컬럼만 남기고 병합
# df_temp = df_temp[['user_id', 'variety_usage_method_2022']]
# df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')


# (14) kpop_usage_method_2022

In [None]:
# # 음악 콘텐츠 이용 경로 컬럼 리스트
# music_cols_2022 = ['F2_4_1', 'F2_4_2', 'F2_4_3', 'F2_4_4', 'F2_4_5', 'F2_4_6', 'F2_4_7']

# # 필요한 컬럼 복사
# df_temp = df_2022[['user_id'] + music_cols_2022].copy()

# # 0/1 이진화 처리
# for col in music_cols_2022:
#     df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
#     df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# # 통합 컬럼 생성
# df_temp['kpop_usage_method_2022'] = df_temp[music_cols_2022].max(axis=1)

# # 병합
# df_temp = df_temp[['user_id', 'kpop_usage_method_2022']]
# df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')


# (15) celebrity_viewing_method_2022

In [None]:
# # 연예인 방송 콘텐츠 이용 경로 컬럼 리스트
# celebrity_cols_2022 = ['F2_5_1', 'F2_5_2', 'F2_5_3', 'F2_5_4', 'F2_5_5', 'F2_5_7']

# # 필요한 컬럼 복사
# df_temp = df_2022[['user_id'] + celebrity_cols_2022].copy()

# # 0/1 이진화 처리
# for col in celebrity_cols_2022:
#     df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
#     df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# # 통합 컬럼 생성
# df_temp['celebrity_viewing_method_2022'] = df_temp[celebrity_cols_2022].max(axis=1)

# # 병합
# df_temp = df_temp[['user_id', 'celebrity_viewing_method_2022']]
# df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')


# (16) animation_usage_method_2022

In [None]:
# # 애니메이션 콘텐츠 이용 경로 컬럼 리스트
# animation_cols_2022 = ['F2_6_1', 'F2_6_2', 'F2_6_3', 'F2_6_4', 'F2_6_5', 'F2_6_7']

# # 필요한 컬럼 복사
# df_temp = df_2022[['user_id'] + animation_cols_2022].copy()

# # 0/1 이진화 처리
# for col in animation_cols_2022:
#     df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
#     df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# # 통합 컬럼 생성
# df_temp['animation_usage_method_2022'] = df_temp[animation_cols_2022].max(axis=1)

# # 병합
# df_temp = df_temp[['user_id', 'animation_usage_method_2022']]
# df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')


# (17) webtoon_viewing_method_2022

In [None]:
# # 웹툰 콘텐츠 이용 경로 컬럼 리스트
# webtoon_cols_2022 = ['F2_7_1', 'F2_7_2', 'F2_7_3', 'F2_7_4', 'F2_7_5', 'F2_7_7']

# # 필요한 컬럼 복사
# df_temp = df_2022[['user_id'] + webtoon_cols_2022].copy()

# # 0/1 이진화 처리
# for col in webtoon_cols_2022:
#     df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
#     df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# # 통합 컬럼 생성
# df_temp['webtoon_viewing_method_2022'] = df_temp[webtoon_cols_2022].max(axis=1)

# # 병합
# df_temp = df_temp[['user_id', 'webtoon_viewing_method_2022']]
# df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')


# (18) webnovel_viewung_method_2022

In [None]:
# # 웹소설 콘텐츠 이용 경로 컬럼 리스트
# webnovel_cols_2022 = ['F2_8_1', 'F2_8_2', 'F2_8_3', 'F2_8_4', 'F2_8_5', 'F2_8_7']

# # 필요한 컬럼 복사
# df_temp = df_2022[['user_id'] + webnovel_cols_2022].copy()

# # 0/1 이진화 처리
# for col in webnovel_cols_2022:
#     df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
#     df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# # 통합 컬럼 생성
# df_temp['webnovel_viewing_method_2022'] = df_temp[webnovel_cols_2022].max(axis=1)

# # 병합
# df_temp = df_temp[['user_id', 'webnovel_viewing_method_2022']]
# df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')


# (19) esport_viewing_method_2022

In [None]:
# # e스포츠 콘텐츠 이용 경로 컬럼 리스트
# esport_cols_2022 = ['F2_9_1', 'F2_9_2', 'F2_9_3', 'F2_9_4', 'F2_9_5', 'F2_9_7']

# # 필요한 컬럼 복사
# df_temp = df_2022[['user_id'] + esport_cols_2022].copy()

# # 0/1 이진화 처리
# for col in esport_cols_2022:
#     df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
#     df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# # 통합 컬럼 생성
# df_temp['esport_viewing_method_2022'] = df_temp[esport_cols_2022].max(axis=1)

# # 병합
# df_temp = df_temp[['user_id', 'esport_viewing_method_2022']]
# df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')


# (20) series_usage_method_2023

In [None]:
# # 매핑 정의
# usage_method_map = {
#     1: "TV(지상파, 유료방송, FAST) 프로그램",
#     2: "넷플릭스/디즈니+ 등의 유료 OTT 플랫폼",
#     3: "유튜브/틱톡 등의 오픈 비디오 플랫폼",
#     4: "K-콘텐츠 전문 플랫폼 (Viki/KOCOWA 등)",
#     5: "K-콘텐츠 커뮤니티",
#     6: "SNS 서비스",
#     7: "인터넷 사이트 (기타)",
#     8: "P2P 사이트",
#     9: "기타"
# }

# # E3 → series_usage_method_2023으로 직접 매핑
# df_2023['series_usage_method_2023'] = (
#     df_2023['E3']
#     .astype(str).str.strip()           # 공백 제거
#     .replace('', np.nan)               # 빈 문자열을 NaN으로
#     .astype(float)                     # 숫자로 변환
#     .map(usage_method_map)             # 매핑
# )

# # 필요한 컬럼만 추출
# df_series_method = df_2023[['user_id', 'series_usage_method_2023']].copy()


# df_kcontents = df_kcontents.merge(df_series_method, on='user_id', how='left')


# (21) movies_usage_method_2023

In [None]:
# # E3_1 → movies_usage_method_2023 매핑
# df_2023['movies_usage_method_2023'] = (
#     df_2023['E3_1']
#     .astype(str).str.strip()           # 공백 제거
#     .replace('', np.nan)               # 빈 문자열을 NaN으로
#     .astype(float)                     # 숫자로 변환
#     .map(usage_method_map)             # 매핑
# )

# # 병합 준비
# df_movies_method = df_2023[['user_id', 'movies_usage_method_2023']].copy()


# df_kcontents = df_kcontents.merge(df_movies_method, on='user_id', how='left')

# (22) variety_usage_method_2023

In [None]:
# # E3_2 → variety_usage_method_2023으로 매핑
# df_2023['variety_usage_method_2023'] = (
#     df_2023['E3_2']
#     .astype(str).str.strip()           # 공백 제거
#     .replace('', np.nan)               # 빈 문자열을 NaN으로
#     .astype(float)                     # 숫자로 변환
#     .map(usage_method_map)             # 매핑
# )

# # 필요한 컬럼만 추출
# df_variety = df_2023[['user_id', 'variety_usage_method_2023']].copy()


# df_kcontents = df_kcontents.merge(df_variety, on='user_id', how='left')


# (23) kpop_usage_method_2023

In [None]:
# # E3_3 → kpop_usage_method_2023으로 매핑
# df_2023['kpop_usage_method_2023'] = (
#     df_2023['E3_3']
#     .astype(str).str.strip()           # 공백 제거
#     .replace('', np.nan)               # 빈 문자열을 NaN으로
#     .astype(float)                     # 숫자로 변환
#     .map(usage_method_map)             # 매핑
# )

# # 필요한 컬럼만 추출
# df_kpop = df_2023[['user_id', 'kpop_usage_method_2023']].copy()


# df_kcontents = df_kcontents.merge(df_kpop, on='user_id', how='left')


# (24) celebrity_viewing_method_2023

In [None]:
# # E3_4 → celebrity_viewing_method_2023으로 매핑
# df_2023['celebrity_viewing_method_2023'] = (
#     df_2023['E3_4']
#     .astype(str).str.strip()           # 공백 제거
#     .replace('', np.nan)               # 빈 문자열을 NaN으로
#     .astype(float)                     # 숫자로 변환
#     .map(usage_method_map)             # 매핑
# )

# # 필요한 컬럼만 추출
# df_celebrity = df_2023[['user_id', 'celebrity_viewing_method_2023']].copy()


# df_kcontents = df_kcontents.merge(df_celebrity, on='user_id', how='left')


# (25) animation_usage_method_2023

In [None]:
# # E3_5 → animation_usage_method_2023으로 매핑
# df_2023['animation_usage_method_2023'] = (
#     df_2023['E3_5']
#     .astype(str).str.strip()           # 공백 제거
#     .replace('', np.nan)               # 빈 문자열을 NaN으로
#     .astype(float)                     # 숫자로 변환
#     .map(usage_method_map)             # 매핑
# )

# # 필요한 컬럼만 추출
# df_animation = df_2023[['user_id', 'animation_usage_method_2023']].copy()


# df_kcontents = df_kcontents.merge(df_animation, on='user_id', how='left')


# (26) webtoon_viewing_method_2023

In [None]:
# # E3_6 → webtoon_viewing_method_2023으로 매핑
# df_2023['webtoon_viewing_method_2023'] = (
#     df_2023['E3_6']
#     .astype(str).str.strip()           # 공백 제거
#     .replace('', np.nan)               # 빈 문자열을 NaN으로
#     .astype(float)                     # 숫자로 변환
#     .map(usage_method_map)             # 매핑
# )

# # 필요한 컬럼만 추출
# df_webtoon = df_2023[['user_id', 'webtoon_viewing_method_2023']].copy()


# df_kcontents = df_kcontents.merge(df_webtoon, on='user_id', how='left')


# (27) webnovel_viewing_method_2023

In [None]:
# # E3_7 → webnovel_viewing_method_2023으로 매핑
# df_2023['webnovel_viewing_method_2023'] = (
#     df_2023['E3_7']
#     .astype(str).str.strip()           # 공백 제거
#     .replace('', np.nan)               # 빈 문자열을 NaN으로
#     .astype(float)                     # 숫자로 변환
#     .map(usage_method_map)             # 매핑
# )

# # 필요한 컬럼만 추출
# df_webnovel = df_2023[['user_id', 'webnovel_viewing_method_2023']].copy()


# df_kcontents = df_kcontents.merge(df_webnovel, on='user_id', how='left')

# (28) esport_viewing_method_2023

In [None]:
# # E3_8 → esport_viewing_method_2023으로 매핑
# df_2023['esport_viewing_method_2023'] = (
#     df_2023['E3_8']
#     .astype(str).str.strip()           # 공백 제거
#     .replace('', np.nan)               # 빈 문자열을 NaN으로
#     .astype(float)                     # 숫자로 변환
#     .map(usage_method_map)             # 매핑
# )

# # 필요한 컬럼만 추출
# df_esport = df_2023[['user_id', 'esport_viewing_method_2023']].copy()


# df_kcontents = df_kcontents.merge(df_esport, on='user_id', how='left')


# (29) series_usage_frequency

In [None]:
# 매핑 정의
frequency_map = {
    1: '매일',
    2: '1주일에 5~6일',
    3: '1주일에 3~4일',
    4: '1주일에 1~2일',
    5: '한 달에 1~3일',
    6: '2~3달에 1~2일 이하'
}

# 1. 공백 제거 후 숫자형으로 강제 변환
df_2022['F3'] = pd.to_numeric(df_2022['F3'].astype(str).str.strip(), errors='coerce')
df_2023['E4'] = pd.to_numeric(df_2023['E4'].astype(str).str.strip(), errors='coerce')

# 2. 컬럼명 맞춰서 정리
df_2022_freq = df_2022[['user_id', 'F3']].copy().rename(columns={'F3': 'series_usage_frequency'})
df_2023_freq = df_2023[['user_id', 'E4']].copy().rename(columns={'E4': 'series_usage_frequency'})

# 3. 매핑 적용 + 공백은 "이용안함"
df_2022_freq['series_usage_frequency'] = df_2022_freq['series_usage_frequency'].map(frequency_map).fillna('이용안함')
df_2023_freq['series_usage_frequency'] = df_2023_freq['series_usage_frequency'].map(frequency_map).fillna('이용안함')

# 4. 두 데이터프레임 합치기
df_freq_combined = pd.concat([df_2022_freq, df_2023_freq], ignore_index=True)

# 5. 기존 컬럼 제거하고 병합
df_kcontents = df_kcontents.drop(columns=['series_usage_frequency'], errors='ignore')
df_kcontents = df_kcontents.merge(df_freq_combined, on='user_id', how='left')


# (30) movies_usage_frequency

In [None]:
# 1. 공백 제거 후 숫자형으로 강제 변환
df_2022['F4'] = pd.to_numeric(df_2022['F4'].astype(str).str.strip(), errors='coerce')
df_2023['E5'] = pd.to_numeric(df_2023['E5'].astype(str).str.strip(), errors='coerce')

# 2. 컬럼명 맞춰서 정리
df_2022_freq = df_2022[['user_id', 'F4']].copy().rename(columns={'F4': 'movies_usage_frequency'})
df_2023_freq = df_2023[['user_id', 'E5']].copy().rename(columns={'E5': 'movies_usage_frequency'})

# 3. 매핑 적용 + 공백은 "이용안함"
df_2022_freq['movies_usage_frequency'] = df_2022_freq['movies_usage_frequency'].map(frequency_map).fillna('이용안함')
df_2023_freq['movies_usage_frequency'] = df_2023_freq['movies_usage_frequency'].map(frequency_map).fillna('이용안함')

# 4. 두 데이터프레임 합치기 및 병합
df_freq_combined = pd.concat([df_2022_freq, df_2023_freq], ignore_index=True)
df_kcontents = df_kcontents.merge(df_freq_combined, on='user_id', how='left')


# (31) variety_usage_frequency

In [None]:
# 1. 공백 제거 후 숫자형으로 강제 변환
df_2022['F5'] = pd.to_numeric(df_2022['F5'].astype(str).str.strip(), errors='coerce')
df_2023['E6'] = pd.to_numeric(df_2023['E6'].astype(str).str.strip(), errors='coerce')

# 2. 컬럼명 맞춰서 정리
df_2022_freq = df_2022[['user_id', 'F5']].copy().rename(columns={'F5': 'variety_usage_frequency'})
df_2023_freq = df_2023[['user_id', 'E6']].copy().rename(columns={'E6': 'variety_usage_frequency'})

# 3. 매핑 적용 + 공백은 "이용안함"
df_2022_freq['variety_usage_frequency'] = df_2022_freq['variety_usage_frequency'].map(frequency_map).fillna('이용안함')
df_2023_freq['variety_usage_frequency'] = df_2023_freq['variety_usage_frequency'].map(frequency_map).fillna('이용안함')

# 4. 두 데이터프레임 합치기 및 병합
df_freq_combined = pd.concat([df_2022_freq, df_2023_freq], ignore_index=True)
df_kcontents = df_kcontents.merge(df_freq_combined, on='user_id', how='left')


# (32) kpop_usage_frequency

In [None]:
# 1. 공백 제거 후 숫자형으로 강제 변환
df_2022['F6'] = pd.to_numeric(df_2022['F6'].astype(str).str.strip(), errors='coerce')
df_2023['E7'] = pd.to_numeric(df_2023['E7'].astype(str).str.strip(), errors='coerce')

# 2. 컬럼명 맞춰서 정리
df_2022_freq = df_2022[['user_id', 'F6']].copy().rename(columns={'F6': 'kpop_usage_frequency'})
df_2023_freq = df_2023[['user_id', 'E7']].copy().rename(columns={'E7': 'kpop_usage_frequency'})

# 3. 매핑 적용 + 공백은 "이용안함"
df_2022_freq['kpop_usage_frequency'] = df_2022_freq['kpop_usage_frequency'].map(frequency_map).fillna('이용안함')
df_2023_freq['kpop_usage_frequency'] = df_2023_freq['kpop_usage_frequency'].map(frequency_map).fillna('이용안함')

# 4. 두 데이터프레임 합치기 및 병합
df_freq_combined = pd.concat([df_2022_freq, df_2023_freq], ignore_index=True)
df_kcontents = df_kcontents.merge(df_freq_combined, on='user_id', how='left')


# (33) webtoon_usage_frequency

In [None]:
# 1. 공백 제거 후 숫자형으로 강제 변환
df_2022['F7'] = pd.to_numeric(df_2022['F7'].astype(str).str.strip(), errors='coerce')
df_2023['E8'] = pd.to_numeric(df_2023['E8'].astype(str).str.strip(), errors='coerce')

# 2. 컬럼명 맞춰서 정리
df_2022_freq = df_2022[['user_id', 'F7']].copy().rename(columns={'F7': 'webtoon_usage_frequency'})
df_2023_freq = df_2023[['user_id', 'E8']].copy().rename(columns={'E8': 'webtoon_usage_frequency'})

# 3. 매핑 적용 + 공백은 "이용안함"
df_2022_freq['webtoon_usage_frequency'] = df_2022_freq['webtoon_usage_frequency'].map(frequency_map).fillna('이용안함')
df_2023_freq['webtoon_usage_frequency'] = df_2023_freq['webtoon_usage_frequency'].map(frequency_map).fillna('이용안함')

# 4. 두 데이터프레임 합치기 및 병합
df_freq_combined = pd.concat([df_2022_freq, df_2023_freq], ignore_index=True)
df_kcontents = df_kcontents.merge(df_freq_combined, on='user_id', how='left')


# (34) webnovel_usage_frequency

In [None]:
# 1. 공백 제거 후 숫자형으로 변환
df_2022['F8'] = pd.to_numeric(df_2022['F8'].astype(str).str.strip(), errors='coerce')
df_2023['E9'] = pd.to_numeric(df_2023['E9'].astype(str).str.strip(), errors='coerce')

# 2. 컬럼명 정리
df_2022_freq = df_2022[['user_id', 'F8']].copy().rename(columns={'F8': 'webnovel_usage_frequency'})
df_2023_freq = df_2023[['user_id', 'E9']].copy().rename(columns={'E9': 'webnovel_usage_frequency'})

# 3. 매핑 적용 + NaN → "이용안함"
df_2022_freq['webnovel_usage_frequency'] = df_2022_freq['webnovel_usage_frequency'].map(frequency_map).fillna('이용안함')
df_2023_freq['webnovel_usage_frequency'] = df_2023_freq['webnovel_usage_frequency'].map(frequency_map).fillna('이용안함')

# 4. 병합
df_freq_combined = pd.concat([df_2022_freq, df_2023_freq], ignore_index=True)
df_kcontents = df_kcontents.merge(df_freq_combined, on='user_id', how='left')


# (35) drama_movie_genre_rank1

In [None]:
# 1. 매핑 정의
genre_map = {
    1: "로맨스/멜로",
    2: "SF/판타지",
    3: "액션",
    4: "공포/스릴러/좀비물",
    5: "역사물",
    6: "휴면/가족",
    7: "다큐멘터리",
    8: "뮤지컬",
    9: "기타",
    10: "이용하지 않음"
}

# 2. 숫자 변환 및 매핑
df_2022['F10RANK_1'] = pd.to_numeric(df_2022['F10RANK_1'].astype(str).str.strip(), errors='coerce')
df_2022['drama_movie_genre_rank1'] = df_2022['F10RANK_1'].map(genre_map).fillna("이용하지 않음")

# 3. 필요한 컬럼만 추출
df_genre = df_2022[['user_id', 'drama_movie_genre_rank1']].copy()

# 4. 기존 컬럼 삭제 후 병합
df_kcontents = df_kcontents.drop(columns=['drama_movie_genre_rank1'], errors='ignore')
df_kcontents = df_kcontents.merge(df_genre, on='user_id', how='left')


# (36) drama_movie_genre_rank2

In [None]:
# 숫자형 변환
df_2022['F10RANK_2'] = pd.to_numeric(df_2022['F10RANK_2'].astype(str).str.strip(), errors='coerce')

# 기존 매핑 사용
df_2022['drama_movie_genre_rank2'] = df_2022['F10RANK_2'].map(genre_map).fillna("이용하지 않음")

# 필요한 컬럼 추출
df_genre2 = df_2022[['user_id', 'drama_movie_genre_rank2']].copy()

# 기존 컬럼 제거 후 병합
df_kcontents = df_kcontents.drop(columns=['drama_movie_genre_rank2'], errors='ignore')
df_kcontents = df_kcontents.merge(df_genre2, on='user_id', how='left')


# (37) drama_movie_romance

In [None]:
def add_genre_column_2023(df_kcontents, df_2023, cols_2023, new_col_name, target_code, convert_value=1):
    df_2023_part = df_2023[['user_id'] + cols_2023].copy()

    # 공백 및 NaN 처리
    df_2023_part[cols_2023] = df_2023_part[cols_2023].replace(' ', pd.NA)
    df_2023_part[cols_2023] = df_2023_part[cols_2023].apply(pd.to_numeric, errors='coerce')

    # target_code 포함 여부 확인
    df_2023_part[new_col_name] = df_2023_part[cols_2023].apply(
        lambda row: convert_value if target_code in row.dropna().values else 0, axis=1
    ).astype("Int64")

    df_2023_part = df_2023_part[['user_id', new_col_name]]
    df_kcontents = df_kcontents.merge(df_2023_part, on='user_id', how='left')
    return df_kcontents


In [None]:
df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E10', 'E10_m2', 'E10_m3', 'E10_m4', 'E10_m5',
        'E10_m6', 'E10_m7', 'E10_m8', 'E10_m9', 'E10_m10',
        'E10_m11', 'E10_m12', 'E10_m13'
    ],
    new_col_name='drama_movie_romance',
    target_code=1
)

# (38) drama_movie_sf

In [None]:
# drama_movie_sf: 코드 2번이 해당 (SF/판타지)

df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E10', 'E10_m2', 'E10_m3', 'E10_m4', 'E10_m5',
        'E10_m6', 'E10_m7', 'E10_m8', 'E10_m9', 'E10_m10',
        'E10_m11', 'E10_m12', 'E10_m13'
    ],
    new_col_name='drama_movie_sf',
    target_code=2
)


# (39) drama_movie_action

In [None]:
# drama_movie_action: 코드 3번이 해당 (액션)
df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E10', 'E10_m2', 'E10_m3', 'E10_m4', 'E10_m5',
        'E10_m6', 'E10_m7', 'E10_m8', 'E10_m9', 'E10_m10',
        'E10_m11', 'E10_m12', 'E10_m13'
    ],
    new_col_name='drama_movie_action',
    target_code=3
)

# (40) drama_movie_horror

In [None]:
# drama_movie_horror: 코드 4번이 해당 (공포/스릴러/좀비물)
df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E10', 'E10_m2', 'E10_m3', 'E10_m4', 'E10_m5',
        'E10_m6', 'E10_m7', 'E10_m8', 'E10_m9', 'E10_m10',
        'E10_m11', 'E10_m12', 'E10_m13'
    ],
    new_col_name='drama_movie_horror',
    target_code=4
)


# (41) drama_movie_comedy

In [None]:
# drama_movie_comedy: 코드 5번이 해당 (코미디)
df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E10', 'E10_m2', 'E10_m3', 'E10_m4', 'E10_m5',
        'E10_m6', 'E10_m7', 'E10_m8', 'E10_m9', 'E10_m10',
        'E10_m11', 'E10_m12', 'E10_m13'
    ],
    new_col_name='drama_movie_comedy',
    target_code=5
)


# (42) drama_movie_history

In [None]:
# drama_movie_history: 코드 6번이 해당 (역사물)
df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E10', 'E10_m2', 'E10_m3', 'E10_m4', 'E10_m5',
        'E10_m6', 'E10_m7', 'E10_m8', 'E10_m9', 'E10_m10',
        'E10_m11', 'E10_m12', 'E10_m13'
    ],
    new_col_name='drama_movie_history',
    target_code=6
)


# (43) drama_movie_family

In [None]:
# drama_movie_family: 코드 7번이 해당 (휴먼/가족)
df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E10', 'E10_m2', 'E10_m3', 'E10_m4', 'E10_m5',
        'E10_m6', 'E10_m7', 'E10_m8', 'E10_m9', 'E10_m10',
        'E10_m11', 'E10_m12', 'E10_m13'
    ],
    new_col_name='drama_movie_family',
    target_code=7
)


# (44) drama_movie_documentary

In [None]:
# drama_movie_documentary: 코드 8번이 해당 (다큐멘터리)
df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E10', 'E10_m2', 'E10_m3', 'E10_m4', 'E10_m5',
        'E10_m6', 'E10_m7', 'E10_m8', 'E10_m9', 'E10_m10',
        'E10_m11', 'E10_m12', 'E10_m13'
    ],
    new_col_name='drama_movie_documentary',
    target_code=8
)


# (45) drama_movie_musical

In [None]:
# drama_movie_musical: 코드 9번이 해당 (뮤지컬/음악)
df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E10', 'E10_m2', 'E10_m3', 'E10_m4', 'E10_m5',
        'E10_m6', 'E10_m7', 'E10_m8', 'E10_m9', 'E10_m10',
        'E10_m11', 'E10_m12', 'E10_m13'
    ],
    new_col_name='drama_movie_musical',
    target_code=9
)


# (46) drama_movie_animation

In [None]:
# drama_movie_animation: 코드 10번이 해당 (애니메이션)
df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E10', 'E10_m2', 'E10_m3', 'E10_m4', 'E10_m5',
        'E10_m6', 'E10_m7', 'E10_m8', 'E10_m9', 'E10_m10',
        'E10_m11', 'E10_m12', 'E10_m13'
    ],
    new_col_name='drama_movie_animation',
    target_code=10
)


# (47) drama_movie_kids

In [None]:
# drama_movie_kids: 코드 11번이 해당 (키즈)
df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E10', 'E10_m2', 'E10_m3', 'E10_m4', 'E10_m5',
        'E10_m6', 'E10_m7', 'E10_m8', 'E10_m9', 'E10_m10',
        'E10_m11', 'E10_m12', 'E10_m13'
    ],
    new_col_name='drama_movie_kids',
    target_code=11
)


# (48) drama_movie_other

In [None]:
# drama_movie_other: 코드 12번이 해당 (기타)

df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E10', 'E10_m2', 'E10_m3', 'E10_m4', 'E10_m5',
        'E10_m6', 'E10_m7', 'E10_m8', 'E10_m9', 'E10_m10',
        'E10_m11', 'E10_m12', 'E10_m13'
    ],
    new_col_name='drama_movie_other',
    target_code=12
)


# (49) drama_movie_no

In [None]:
# drama_movie_no: 코드 13번이 해당 (이용하지 않음)

df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E10', 'E10_m2', 'E10_m3', 'E10_m4', 'E10_m5',
        'E10_m6', 'E10_m7', 'E10_m8', 'E10_m9', 'E10_m10',
        'E10_m11', 'E10_m12', 'E10_m13'
    ],
    new_col_name='drama_movie_no',
    target_code=13
)


# (50) variety_favorite_rank1

In [None]:
# 1. 매핑 정의
variety_genre_map = {
    1: '게임 예능',
    2: '여행 예능',
    3: '관찰 예능',
    4: '연애 리얼리티 예능',
    5: '오디션/서바이벌 예능',
    6: '교양 예능',
    7: '기타',
    8: '이용하지 않음'
}

# 2. 숫자 변환 및 매핑
df_2022['F11RANK_1'] = pd.to_numeric(df_2022['F11RANK_1'].astype(str).str.strip(), errors='coerce')
df_2022['variety_favorite_rank1'] = df_2022['F11RANK_1'].map(variety_genre_map).fillna("이용하지 않음")

# 3. 필요한 컬럼만 추출
df_variety = df_2022[['user_id', 'variety_favorite_rank1']].copy()

# 4. 기존 컬럼 삭제 후 병합
df_kcontents = df_kcontents.drop(columns=['variety_favorite_rank1'], errors='ignore')
df_kcontents = df_kcontents.merge(df_variety, on='user_id', how='left')



# (51) variety_favorite_rank2

In [None]:
# 1. 숫자 변환 및 매핑
df_2022['F11RANK_2'] = pd.to_numeric(df_2022['F11RANK_2'].astype(str).str.strip(), errors='coerce')
df_2022['variety_favorite_rank2'] = df_2022['F11RANK_2'].map(variety_genre_map).fillna("이용하지 않음")

# 2. 필요한 컬럼만 추출
df_variety2 = df_2022[['user_id', 'variety_favorite_rank2']].copy()

# 3. 기존 컬럼 삭제 후 병합
df_kcontents = df_kcontents.drop(columns=['variety_favorite_rank2'], errors='ignore')
df_kcontents = df_kcontents.merge(df_variety2, on='user_id', how='left')


# (52) variety_favorite_game

In [None]:
# variety_favorite_game: 코드 1번이 해당 (게임 예능)
df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E11', 'E11_m2', 'E11_m3', 'E11_m4', 'E11_m5',
        'E11_m6', 'E11_m7', 'E11_m8', 'E11_m9', 'E11_m10',
        'E11_m11', 'E11_m12', 'E11_m13', 'E11_m14'
    ],
    new_col_name='variety_favorite_game',
    target_code=1
)


# (53) variety_favorite_travel

In [None]:
# variety_favorite_travel: 코드 2번이 해당 (여행 예능)
df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E11', 'E11_m2', 'E11_m3', 'E11_m4', 'E11_m5',
        'E11_m6', 'E11_m7', 'E11_m8', 'E11_m9', 'E11_m10',
        'E11_m11', 'E11_m12', 'E11_m13', 'E11_m14'
    ],
    new_col_name='variety_favorite_travel',
    target_code=2
)


# (54) variety_favorite_observation

In [None]:
# variety_favorite_observation: 코드 3번이 해당 (관찰 예능)
df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E11', 'E11_m2', 'E11_m3', 'E11_m4', 'E11_m5',
        'E11_m6', 'E11_m7', 'E11_m8', 'E11_m9', 'E11_m10',
        'E11_m11', 'E11_m12', 'E11_m13', 'E11_m14'
    ],
    new_col_name='variety_favorite_observation',
    target_code=3
)


# (55) variety_favorite_romance

In [None]:
# variety_favorite_romance: 코드 4번이 해당 (연애 리얼리티 예능)

df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E11', 'E11_m2', 'E11_m3', 'E11_m4', 'E11_m5',
        'E11_m6', 'E11_m7', 'E11_m8', 'E11_m9', 'E11_m10',
        'E11_m11', 'E11_m12', 'E11_m13', 'E11_m14'
    ],
    new_col_name='variety_favorite_romance',
    target_code=4
)


# (56) variety_favorite_audition

In [None]:
# variety_favorite_audition: 코드 5번이 해당 (음악/댄스 오디션 예능)

df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E11', 'E11_m2', 'E11_m3', 'E11_m4', 'E11_m5',
        'E11_m6', 'E11_m7', 'E11_m8', 'E11_m9', 'E11_m10',
        'E11_m11', 'E11_m12', 'E11_m13', 'E11_m14'
    ],
    new_col_name='variety_favorite_audition',
    target_code=5
)


# (57) variety_favorite_sport

In [None]:
# variety_favorite_sport: 코드 6번이 해당 (스포츠/게임 서바이벌 예능)

df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E11', 'E11_m2', 'E11_m3', 'E11_m4', 'E11_m5',
        'E11_m6', 'E11_m7', 'E11_m8', 'E11_m9', 'E11_m10',
        'E11_m11', 'E11_m12', 'E11_m13', 'E11_m14'
    ],
    new_col_name='variety_favorite_sport',
    target_code=6
)


# (58) variety_favorite_culture

In [None]:
# variety_favorite_culture: 코드 7번이 해당 (교양 예능)

df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E11', 'E11_m2', 'E11_m3', 'E11_m4', 'E11_m5',
        'E11_m6', 'E11_m7', 'E11_m8', 'E11_m9', 'E11_m10',
        'E11_m11', 'E11_m12', 'E11_m13', 'E11_m14'
    ],
    new_col_name='variety_favorite_culture',
    target_code=7
)


# (59) variety_favorite_mentality

In [None]:
# variety_favorite_mentality: 코드 8번이 해당 (심리/상담 예능)

df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E11', 'E11_m2', 'E11_m3', 'E11_m4', 'E11_m5',
        'E11_m6', 'E11_m7', 'E11_m8', 'E11_m9', 'E11_m10',
        'E11_m11', 'E11_m12', 'E11_m13', 'E11_m14'
    ],
    new_col_name='variety_favorite_mentality',
    target_code=8
)


# (60) variety_favorite_edu

In [None]:
# variety_favorite_edu: 코드 9번이 해당 (육아/교육 예능)

df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E11', 'E11_m2', 'E11_m3', 'E11_m4', 'E11_m5',
        'E11_m6', 'E11_m7', 'E11_m8', 'E11_m9', 'E11_m10',
        'E11_m11', 'E11_m12', 'E11_m13', 'E11_m14'
    ],
    new_col_name='variety_favorite_edu',
    target_code=9
)


# (61) variety_favorite_talk

In [None]:
# variety_favorite_talk: 코드 10번이 해당 (토크쇼)

df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E11', 'E11_m2', 'E11_m3', 'E11_m4', 'E11_m5',
        'E11_m6', 'E11_m7', 'E11_m8', 'E11_m9', 'E11_m10',
        'E11_m11', 'E11_m12', 'E11_m13', 'E11_m14'
    ],
    new_col_name='variety_favorite_talk',
    target_code=10
)


# (62) variety_favorite_quiz

In [None]:
# variety_favorite_quiz: 코드 11번이 해당 (퀴즈쇼)

df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E11', 'E11_m2', 'E11_m3', 'E11_m4', 'E11_m5',
        'E11_m6', 'E11_m7', 'E11_m8', 'E11_m9', 'E11_m10',
        'E11_m11', 'E11_m12', 'E11_m13', 'E11_m14'
    ],
    new_col_name='variety_favorite_quiz',
    target_code=11
)


# (63) variety_favorite_show

In [None]:
# variety_favorite_show: 코드 12번이 해당 (버라이어티쇼)

df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E11', 'E11_m2', 'E11_m3', 'E11_m4', 'E11_m5',
        'E11_m6', 'E11_m7', 'E11_m8', 'E11_m9', 'E11_m10',
        'E11_m11', 'E11_m12', 'E11_m13', 'E11_m14'
    ],
    new_col_name='variety_favorite_show',
    target_code=12
)


# (64) variety_favorite_couple

In [None]:
# variety_favorite_couple: 코드 13번이 해당 (부부 예능)

df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E11', 'E11_m2', 'E11_m3', 'E11_m4', 'E11_m5',
        'E11_m6', 'E11_m7', 'E11_m8', 'E11_m9', 'E11_m10',
        'E11_m11', 'E11_m12', 'E11_m13', 'E11_m14'
    ],
    new_col_name='variety_favorite_couple',
    target_code=13
)


# (65) variety_favorite_other

In [None]:
# variety_favorite_other: 코드 14번이 해당 (기타 예능)

df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E11', 'E11_m2', 'E11_m3', 'E11_m4', 'E11_m5',
        'E11_m6', 'E11_m7', 'E11_m8', 'E11_m9', 'E11_m10',
        'E11_m11', 'E11_m12', 'E11_m13', 'E11_m14'
    ],
    new_col_name='variety_favorite_other',
    target_code=14
)


# (66) prefer_immediate_watch

In [None]:
# 1. 공백 제거 후 숫자형으로 변환
df_2022['F12_1'] = pd.to_numeric(df_2022['F12_1'].astype(str).str.strip(), errors='coerce')
df_2023['E12'] = pd.to_numeric(df_2023['E12'].astype(str).str.strip(), errors='coerce')

# 2. 컬럼명 통일
df_2022_watch = df_2022[['user_id', 'F12_1']].copy().rename(columns={'F12_1': 'prefer_immediate_watch'})
df_2023_watch = df_2023[['user_id', 'E12']].copy().rename(columns={'E12': 'prefer_immediate_watch'})

# 3. 결합
df_watch_combined = pd.concat([df_2022_watch, df_2023_watch], ignore_index=True)

# 4. 병합
df_kcontents = df_kcontents.merge(df_watch_combined, on='user_id', how='left')


# (67) usage_inconvenience

In [None]:
# 1. 공백 제거 후 숫자형으로 변환
df_2022['F13'] = pd.to_numeric(df_2022['F13'].astype(str).str.strip(), errors='coerce')
df_2023['E13'] = pd.to_numeric(df_2023['E13'].astype(str).str.strip(), errors='coerce')

# 2. 컬럼명 맞추기
df_2022_incon = df_2022[['user_id', 'F13']].copy().rename(columns={'F13': 'usage_inconvenience'})
df_2023_incon = df_2023[['user_id', 'E13']].copy().rename(columns={'E13': 'usage_inconvenience'})

# 3. 두 데이터프레임 합치기
df_incon_combined = pd.concat([df_2022_incon, df_2023_incon], ignore_index=True)

# 4. 한글 라벨 매핑
inconvenience_map = {
    1: '언어문제 (더빙, 자막 등)',
    2: '이용 요금이 비쌈',
    3: '한국 콘텐츠에 접근하기 어려움',
    4: '시청할 수 있는 콘텐츠가 적다',
    5: '내용을 이해하기 힘들다',
    6: '기타',
    7: '불편한 점이 없다'
}
df_incon_combined['usage_inconvenience'] = df_incon_combined['usage_inconvenience'].map(inconvenience_map)

# 5. 병합
df_kcontents = df_kcontents.merge(df_incon_combined, on='user_id', how='left')


# (68) nonusage_reason

In [None]:
# 1. 공백 제거 후 숫자형으로 변환
df_2022['F14'] = pd.to_numeric(df_2022['F14'].astype(str).str.strip(), errors='coerce')
df_2023['E14'] = pd.to_numeric(df_2023['E14'].astype(str).str.strip(), errors='coerce')

# 2. 컬럼명 맞추기
df_2022_nonuse = df_2022[['user_id', 'F14']].copy().rename(columns={'F14': 'nonusage_reason'})
df_2023_nonuse = df_2023[['user_id', 'E14']].copy().rename(columns={'E14': 'nonusage_reason'})

# 3. 두 데이터프레임 합치기
df_nonuse_combined = pd.concat([df_2022_nonuse, df_2023_nonuse], ignore_index=True)

# 4. 한글 라벨 매핑
nonusage_reason_map = {
    1: '한국 콘텐츠를 경험할 기회/방법이 없음',
    2: '한국 콘텐츠에 관심이 없음'
}
df_nonuse_combined['nonusage_reason'] = df_nonuse_combined['nonusage_reason'].map(nonusage_reason_map)

# 5. 병합
df_kcontents = df_kcontents.merge(df_nonuse_combined, on='user_id', how='left')


# (69) preferred_future_drama

In [None]:
future_cols_2022 = ['F16_1', 'F16_2', 'F16_3', 'F16_4', 'F16_5', 'F16_6', 'F16_7', 'F16_8']
df_temp = df_2022[['user_id'] + future_cols_2022].copy()

# 0/1 처리
for col in future_cols_2022:
    df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
    df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# 하나라도 1이면 1
df_temp['preferred_future_drama'] = df_temp[future_cols_2022].max(axis=1)

# 필요한 컬럼만
df_temp = df_temp[['user_id', 'preferred_future_drama']]

# 병합
df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')


# (70) preferred_future_movie

In [None]:
future_cols_2022 = ['F16_1', 'F16_2', 'F16_3', 'F16_4', 'F16_5', 'F16_6', 'F16_7', 'F16_8']
df_temp = df_2022[['user_id'] + future_cols_2022].copy()

# 0/1 처리
for col in future_cols_2022:
    df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
    df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# 하나라도 1이면 1
df_temp['preferred_future_movie'] = df_temp[future_cols_2022].max(axis=1)

# 필요한 컬럼만
df_temp = df_temp[['user_id', 'preferred_future_movie']]

# 병합
df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')


# (71) preferred_future_variety

In [None]:
future_cols_2022 = ['F16_1', 'F16_2', 'F16_3', 'F16_4', 'F16_5', 'F16_6', 'F16_7', 'F16_8']
df_temp = df_2022[['user_id'] + future_cols_2022].copy()

# 0/1 처리
for col in future_cols_2022:
    df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
    df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# 하나라도 1이면 1
df_temp['preferred_future_variety'] = df_temp[future_cols_2022].max(axis=1)

# 필요한 컬럼만
df_temp = df_temp[['user_id', 'preferred_future_variety']]

# 병합
df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')


# (72) preferred_future_music

In [None]:
future_cols_2022 = ['F16_1', 'F16_2', 'F16_3', 'F16_4', 'F16_5', 'F16_6', 'F16_7', 'F16_8']
df_temp = df_2022[['user_id'] + future_cols_2022].copy()

# 0/1 처리
for col in future_cols_2022:
    df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
    df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# 하나라도 1이면 1
df_temp['preferred_future_music'] = df_temp[future_cols_2022].max(axis=1)

# 필요한 컬럼만
df_temp = df_temp[['user_id', 'preferred_future_music']]

# 병합
df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')


# (73) preferred_future_webtoon

In [None]:
future_cols_2022 = ['F16_1', 'F16_2', 'F16_3', 'F16_4', 'F16_5', 'F16_6', 'F16_7', 'F16_8']
df_temp = df_2022[['user_id'] + future_cols_2022].copy()

# 0/1 처리
for col in future_cols_2022:
    df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
    df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# 하나라도 1이면 1
df_temp['preferred_future_webtoon'] = df_temp[future_cols_2022].max(axis=1)

# 필요한 컬럼만
df_temp = df_temp[['user_id', 'preferred_future_webtoon']]

# 병합
df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')


# (74) preferred_future_webnovel

In [None]:
future_cols_2022 = ['F16_1', 'F16_2', 'F16_3', 'F16_4', 'F16_5', 'F16_6', 'F16_7', 'F16_8']
df_temp = df_2022[['user_id'] + future_cols_2022].copy()

# 0/1 처리
for col in future_cols_2022:
    df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
    df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# 하나라도 1이면 1
df_temp['preferred_future_webnovel'] = df_temp[future_cols_2022].max(axis=1)

# 필요한 컬럼만
df_temp = df_temp[['user_id', 'preferred_future_webnovel']]

# 병합
df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')


# (75) preferred_future_game

In [None]:
future_cols_2022 = ['F16_1', 'F16_2', 'F16_3', 'F16_4', 'F16_5', 'F16_6', 'F16_7', 'F16_8']
df_temp = df_2022[['user_id'] + future_cols_2022].copy()

# 0/1 처리
for col in future_cols_2022:
    df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
    df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# 하나라도 1이면 1
df_temp['preferred_future_game'] = df_temp[future_cols_2022].max(axis=1)

# 필요한 컬럼만
df_temp = df_temp[['user_id', 'preferred_future_game']]

# 병합
df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')


# (76) preferred_future_other

In [None]:
future_cols_2022 = ['F16_1', 'F16_2', 'F16_3', 'F16_4', 'F16_5', 'F16_6', 'F16_7', 'F16_8']
df_temp = df_2022[['user_id'] + future_cols_2022].copy()

# 0/1 처리
for col in future_cols_2022:
    df_temp[col] = pd.to_numeric(df_temp[col], errors='coerce').fillna(0)
    df_temp[col] = df_temp[col].apply(lambda x: 1 if x != 0 else 0)

# F16_8 값만 추출
df_temp['preferred_future_other'] = df_temp['F16_8']

# 필요한 컬럼만
df_temp = df_temp[['user_id', 'preferred_future_other']]

# 병합
df_kcontents = df_kcontents.merge(df_temp, on='user_id', how='left')


# (77) desired_genre_drama

In [None]:
df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E16', 'E16_m2', 'E16_m3', 'E16_m4',
        'E16_m5', 'E16_m6', 'E16_m7', 'E16_m8'
    ],
    new_col_name='desired_genre_drama',
    target_code=1
)


# (78) desired_genre_movie

In [None]:
df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E16', 'E16_m2', 'E16_m3', 'E16_m4',
        'E16_m5', 'E16_m6', 'E16_m7', 'E16_m8'
    ],
    new_col_name='desired_genre_movie',
    target_code=2
)


# (79) desired_genre_variety

In [None]:
df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E16', 'E16_m2', 'E16_m3', 'E16_m4',
        'E16_m5', 'E16_m6', 'E16_m7', 'E16_m8'
    ],
    new_col_name='desired_genre_variety',
    target_code=3
)


# (80) desired_genre_music

In [None]:
df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E16', 'E16_m2', 'E16_m3', 'E16_m4',
        'E16_m5', 'E16_m6', 'E16_m7', 'E16_m8'
    ],
    new_col_name='desired_genre_music',
    target_code=4
)


# (81) desired_genre_webtoon

In [None]:
df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E16', 'E16_m2', 'E16_m3', 'E16_m4',
        'E16_m5', 'E16_m6', 'E16_m7', 'E16_m8'
    ],
    new_col_name='desired_genre_webtoon',
    target_code=5
)


# (82) desired_genre_webnovel

In [None]:
df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E16', 'E16_m2', 'E16_m3', 'E16_m4',
        'E16_m5', 'E16_m6', 'E16_m7', 'E16_m8'
    ],
    new_col_name='desired_genre_webnovel',
    target_code=6
)


# (83) desired_genre_game

In [None]:
df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E16', 'E16_m2', 'E16_m3', 'E16_m4',
        'E16_m5', 'E16_m6', 'E16_m7', 'E16_m8'
    ],
    new_col_name='desired_genre_game',
    target_code=7
)


# (84) desired_genre_other

In [None]:
df_kcontents = add_genre_column_2023(
    df_kcontents, df_2023,
    cols_2023=[
        'E16', 'E16_m2', 'E16_m3', 'E16_m4',
        'E16_m5', 'E16_m6', 'E16_m7', 'E16_m8'
    ],
    new_col_name='desired_genre_other',
    target_code=8
)


In [None]:
# 2022 기준 컬럼명 : [2022 컬럼, 2023 컬럼]
merge_column_pairs = {
    'preferred_future_drama':   ['preferred_future_drama', 'desired_genre_drama'],
    'preferred_future_movie':   ['preferred_future_movie', 'desired_genre_movie'],
    'preferred_future_variety': ['preferred_future_variety', 'desired_genre_variety'],
    'preferred_future_music':   ['preferred_future_music', 'desired_genre_music'],
    'preferred_future_webtoon': ['preferred_future_webtoon', 'desired_genre_webtoon'],
    'preferred_future_webnovel': ['preferred_future_webnovel', 'desired_genre_webnovel'],
    'preferred_future_game':    ['preferred_future_game', 'desired_genre_game'],
    'preferred_future_other':   ['preferred_future_other', 'desired_genre_other'],
}


In [None]:
# 병합 수행
for new_col, (col_2022, col_2023) in merge_column_pairs.items():
    df_kcontents[new_col] = df_kcontents[[col_2022, col_2023]].max(axis=1)
# 3. 2023 컬럼 삭제
drop_cols = [col_2023 for (_, col_2023) in merge_column_pairs.values()]
df_kcontents.drop(columns=drop_cols, inplace=True)

# (85) desired_genre

In [None]:
# 1. 공백 제거 후 숫자형으로 변환
df_2022['F15'] = pd.to_numeric(df_2022['F15'].astype(str).str.strip(), errors='coerce')
df_2023['E15'] = pd.to_numeric(df_2023['E15'].astype(str).str.strip(), errors='coerce')

# 2. 컬럼명 맞추기
df_2022_desired = df_2022[['user_id', 'F15']].copy().rename(columns={'F15': 'desired_genre'})
df_2023_desired = df_2023[['user_id', 'E15']].copy().rename(columns={'E15': 'desired_genre'})

# 3. 데이터 병합
df_desired_combined = pd.concat([df_2022_desired, df_2023_desired], ignore_index=True)

# 4. 한글 라벨 매핑
desired_genre_map = {
    1: '드라마/시리즈물',
    2: '영화',
    3: '예능/버라이어티',
    4: '음악',
    5: '웹툰',
    6: '웹소설',
    7: '게임',
    8: '기타'
}
df_desired_combined['desired_genre'] = df_desired_combined['desired_genre'].map(desired_genre_map)

# 5. 병합
df_kcontents = df_kcontents.merge(df_desired_combined, on='user_id', how='left')


In [None]:
# 모든 컬럼명 확인
df_kcontents.columns.tolist()


In [None]:
# 최종 DB 업로드
df_kcontents.to_sql(name="kcontents", con=engine, index=False, if_exists="replace")
print("kcontents 테이블 최종 업데이트 완료!")

In [None]:
df_kcontents