In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import os
import pandas as pd

# 1) 데이터 폴더 경로
base_path = '/content/drive/MyDrive/데이터분석'

# 2) 폴더 안 파일들 한 번 확인해보기
os.listdir(base_path)


['trip.csv',
 'cars.csv',
 'brand.csv',
 '레퍼런스_중고자동차_데이터분석_01.ipynb',
 '레퍼런스-중고자동차 데이터분석_02.ipynb',
 'fraud.csv',
 '2019.csv',
 '2021.csv',
 '2017.csv',
 '2020.csv',
 '2018.csv',
 'IT_멘탈_통합_2017_2021_카테고리컬럼명.csv',
 'IT_멘탈_통합_2017_2021_최종정리.csv',
 'IT_멘탈_통합_2017_2021_최종정리_with_year.csv',
 '칼럼 매핑.xlsx']

In [3]:
file_2017 = os.path.join(base_path, '2017.csv')
df2017 = pd.read_csv(file_2017, encoding='ISO-8859-1')

df2017.head()


Unnamed: 0,#,<strong>Are you self-employed?</strong>,How many employees does your company or organization have?,Is your employer primarily a tech company/organization?,Is your primary role within your company related to tech/IT?,Does your employer provide mental health benefitsÂ as part of healthcare coverage?,Do you know the options for mental health care available under your employer-provided health coverage?,"Has your employer ever formally discussed mental health (for example, as part of a wellness campaign or other official communication)?",Does your employer offer resources to learn more about mental health disorders and options for seeking help?,Is your anonymity protected if you choose to take advantage of mental health or substance abuse treatment resources provided by your employer?,...,What is your gender?,What country do you <strong>live</strong> in?,What US state or territory do you <strong>live</strong> in?,What is your race?,Other.3,What country do you <strong>work</strong> in?,What US state or territory do you <strong>work</strong> in?,Start Date (UTC),Submit Date (UTC),Network ID
0,e49fe87572831232dcfa51b376b22039,0,100-500,1.0,1.0,No,Yes,No,I don't know,I don't know,...,Female,United Kingdom,,,,United Kingdom,,2018-05-16 12:32:04,2018-05-16 12:42:40,464b7a12f1
1,a1eede444ac024928d247a8372d54931,0,100-500,1.0,1.0,Yes,Yes,No,No,I don't know,...,male,United Kingdom,,,,United Kingdom,,2018-05-16 12:31:13,2018-05-16 12:40:40,464b7a12f1
2,37d3fd67f62bd1e0a2dea4f9cd440d98,0,6-25,1.0,1.0,I don't know,No,I don't know,No,Yes,...,male,United States of America,Missouri,White,,United States of America,Missouri,2018-05-09 05:34:05,2018-05-09 05:46:04,1eb7e0cb94
3,519b759442c1cab0e9b5a8a1acb1b216,0,More than 1000,1.0,1.0,Yes,Yes,I don't know,I don't know,Yes,...,Male,United States of America,Washington,White,,United States of America,Washington,2018-05-04 23:19:14,2018-05-04 23:23:23,63852edbc4
4,ef0af4927b575b1a3e607c11ca37870e,1,,,,,,,,,...,female,United States of America,Illinois,More than one of the above,,United States of America,Illinois,2018-05-03 00:40:24,2018-05-03 00:53:20,43237889f1


In [4]:
rename_2017 = {
    "What is your age?": "Age",
    "What is your gender?": "Gender",
    "What country do you live in?": "Country",

    # Personal background
    "Do you have a family history of mental illness?": "family_history",
    "Are you self-employed?": "self_employed",

    # Organizational characteristics
    "How many employees does your company or organization have?": "no_employees",
    "Is your employer primarily a tech company/organization?": "tech_company",

    # Organizational Care
    "Does your employer provide mental health benefitsÂ\xa0as part of healthcare coverage?": "Organizational_Care_1",
    "Does your employer offer resources to learn more about mental health disorders and options for seeking help?": "Organizational_Care_2",

    # Organizational Access
    "Do you know the options for mental health care available under your employer-provided health coverage?": "Organizational_Access",

    # Organizational Support
    "Has your employer ever formally discussed mental health (for example, as part of a wellness campaign or other official communication)?": "Organizational_Support",

    # Organizational Protection
    "Is your anonymity protected if you choose to take advantage of mental health or substance abuse treatment resources provided by your employer?": "Organizational_Protection",

    # Organizational Leave
    "If a mental health issue prompted you to request a medical leave from work, how easy or difficult would it be to ask for that leave?": "Organizational_Leave",

    # Mental health consequence
    "Have you observed or experienced an unsupportive or badly handled response to a mental health issue in your current or previous workplace?": "mentalhealthconsequence",

    # Coworkers / Supervisors
    "Would you feel comfortable discussing a mental health issue with your coworkers?": "coworkers",
    "Would you feel comfortable discussing a mental health issue with your direct supervisor(s)?": "supervisors",

    # Organizational Mental Health Importance
    "Overall, how much importance does your employer place on physical health?": "mentalimportance",

    # Treatment
    "Do you currently have a mental health disorder?": "mh_current",
    "Have you ever been diagnosed with a mental health disorder?": "mh_diagnosed",
    "Have you had a mental health disorder in the past?": "mh_past",
    "Have you ever sought treatment for a mental health disorder from a mental health professional?": "mh_treated",

    # Work interfere
    "Do you believe your productivity is ever affected by a mental health issue?": "work_interfere_impact",
    "If yes, what percentage of your work time (time performing primary or secondary job functions) is affected by a mental health issue?": "work_interfere_percentage",
    "If you have a mental health disorder, how often do you feel that it interferes with your work when being treated effectively?": "work_interfere_treated",
    "If you have a mental health disorder, how often do you feel that it interferes with your work when NOT being treated effectively (i.e., when you are experiencing symptoms)?": "work_interfere_untreated",
}



In [5]:
import re

def remove_html_tags(text):
    return re.sub(r'<.*?>', '', str(text))

def clean_and_rename_2017(df, rename_dict):
    df = df.copy()

    # 1) 컬럼명에서 HTML 태그 제거
    df.columns = [remove_html_tags(col) for col in df.columns]

    # 2) 매핑 기준으로 실제 존재하는 컬럼만 사용
    existing_cols = [col for col in rename_dict.keys() if col in df.columns]
    missing_cols = [col for col in rename_dict.keys() if col not in df.columns]

    print("=== 2017 처리 결과 ===")
    print("사용된 컬럼 수 :", len(existing_cols))
    print("누락된 컬럼 수 :", len(missing_cols))
    if missing_cols:
        print("누락된 컬럼 목록:")
        for c in missing_cols:
            print(" -", c)

    # 3) 필요한 컬럼만 선택 + 리네임
    df_clean = df[existing_cols].rename(columns=rename_dict)
    return df_clean


In [6]:
# 1) 정리 실행
df2017_clean = clean_and_rename_2017(df2017, rename_2017)

# 2) 결과 한 번 확인
df2017_clean.head()


=== 2017 처리 결과 ===
사용된 컬럼 수 : 25
누락된 컬럼 수 : 0


Unnamed: 0,Age,Gender,Country,family_history,self_employed,no_employees,tech_company,Organizational_Care_1,Organizational_Care_2,Organizational_Access,...,supervisors,mentalimportance,mh_current,mh_diagnosed,mh_past,mh_treated,work_interfere_impact,work_interfere_percentage,work_interfere_treated,work_interfere_untreated
0,27.0,Female,United Kingdom,No,0,100-500,1.0,No,I don't know,Yes,...,Yes,6.0,Possibly,,Possibly,1,,,Sometimes,Sometimes
1,31.0,male,United Kingdom,No,0,100-500,1.0,Yes,No,Yes,...,Maybe,7.0,Possibly,,Possibly,0,,,Not applicable to me,Sometimes
2,36.0,male,United States of America,Yes,0,6-25,1.0,I don't know,No,No,...,Yes,0.0,Yes,Yes,Yes,1,,,Sometimes,Sometimes
3,22.0,Male,United States of America,I don't know,0,More than 1000,1.0,Yes,I don't know,Yes,...,Yes,7.0,Yes,Yes,No,1,,,Sometimes,Often
4,52.0,female,United States of America,Yes,1,,,,,,...,,,No,,Yes,1,Yes,1-25%,Often,Sometimes


In [7]:
# 2017이라는 Year 컬럼 추가
df2017_clean["Year"] = 2017

# Year 컬럼을 가장 왼쪽(첫 번째 열)으로 이동
cols = ["Year"] + [col for col in df2017_clean.columns if col != "Year"]
df2017_clean = df2017_clean[cols]

df2017_clean.head()


Unnamed: 0,Year,Age,Gender,Country,family_history,self_employed,no_employees,tech_company,Organizational_Care_1,Organizational_Care_2,...,supervisors,mentalimportance,mh_current,mh_diagnosed,mh_past,mh_treated,work_interfere_impact,work_interfere_percentage,work_interfere_treated,work_interfere_untreated
0,2017,27.0,Female,United Kingdom,No,0,100-500,1.0,No,I don't know,...,Yes,6.0,Possibly,,Possibly,1,,,Sometimes,Sometimes
1,2017,31.0,male,United Kingdom,No,0,100-500,1.0,Yes,No,...,Maybe,7.0,Possibly,,Possibly,0,,,Not applicable to me,Sometimes
2,2017,36.0,male,United States of America,Yes,0,6-25,1.0,I don't know,No,...,Yes,0.0,Yes,Yes,Yes,1,,,Sometimes,Sometimes
3,2017,22.0,Male,United States of America,I don't know,0,More than 1000,1.0,Yes,I don't know,...,Yes,7.0,Yes,Yes,No,1,,,Sometimes,Often
4,2017,52.0,female,United States of America,Yes,1,,,,,...,,,No,,Yes,1,Yes,1-25%,Often,Sometimes


In [8]:
output_2017 = os.path.join(base_path, '2017_clean.csv')
df2017_clean.to_csv(output_2017, index=False)
print("저장 완료:", output_2017)

저장 완료: /content/drive/MyDrive/데이터분석/2017_clean.csv


In [9]:
base_path = '/content/drive/MyDrive/데이터분석'


In [10]:
import os
import pandas as pd

file_2018 = os.path.join(base_path, '2018.csv')  # → /content/drive/MyDrive/데이터분석/2018.csv
df2019 = pd.read_csv(file_2019, encoding='ISO-8859-1')

# 데이터 형태 확인
df2018.head()


NameError: name 'file_2019' is not defined

In [11]:
import os
import pandas as pd

base_path = '/content/drive/MyDrive/데이터분석'

# 2018 파일 경로 지정
file_2018 = os.path.join(base_path, '2018.csv')

# 파일 읽기
df2018 = pd.read_csv(file_2018, encoding='ISO-8859-1')

# 데이터 확인
df2018.head()


Unnamed: 0,#,<strong>Are you self-employed?</strong>,How many employees does your company or organization have?,Is your employer primarily a tech company/organization?,Is your primary role within your company related to tech/IT?,Does your employer provide mental health benefits as part of healthcare coverage?,Do you know the options for mental health care available under your employer-provided health coverage?,"Has your employer ever formally discussed mental health (for example, as part of a wellness campaign or other official communication)?",Does your employer offer resources to learn more about mental health disorders and options for seeking help?,Is your anonymity protected if you choose to take advantage of mental health or substance abuse treatment resources provided by your employer?,...,What is your gender?,What country do you <strong>live</strong> in?,What US state or territory do you <strong>live</strong> in?,What is your race?,Other.3,What country do you <strong>work</strong> in?,What US state or territory do you <strong>work</strong> in?,Start Date (UTC),Submit Date (UTC),Network ID
0,e44a0a34f2465940beda2a1537e9b99e,0,More than 1000,1.0,0.0,Yes,Yes,Yes,Yes,Yes,...,Female,Canada,,,,Canada,,2018-12-29 23:46:38,2018-12-30 00:00:03,4bbb884ccc
1,0d698e3beca20fb75f19b9d528e36d73,0,More than 1000,1.0,1.0,Yes,Yes,No,I don't know,I don't know,...,male,United States of America,Massachusetts,White,,United States of America,Massachusetts,2018-12-27 21:40:40,2018-12-27 21:45:45,275e7543bd
2,61a40c9071eb36fa9caa254d31500c41,0,6-25,0.0,1.0,Yes,Yes,No,No,I don't know,...,Male,United States of America,Florida,White,,United States of America,Florida,2018-12-21 17:37:43,2018-12-21 18:08:01,43994c3dba
3,f8624340bead7deb08abb766704ddf6b,0,6-25,1.0,1.0,No,No,No,No,I don't know,...,male,Norway,,,,Norway,,2018-12-21 16:37:56,2018-12-21 16:44:44,907b3a3faa
4,31d3ae93b68d79e504a0a643601b6b1e,0,26-100,1.0,1.0,Yes,Yes,Yes,Yes,Yes,...,Ostensibly Male,United States of America,Tennessee,White,,United States of America,Tennessee,2018-12-20 19:39:02,2018-12-20 20:58:34,26df20fea3


In [12]:
import os
os.listdir(base_path)


['trip.csv',
 'cars.csv',
 'brand.csv',
 '레퍼런스_중고자동차_데이터분석_01.ipynb',
 '레퍼런스-중고자동차 데이터분석_02.ipynb',
 'fraud.csv',
 '2019.csv',
 '2021.csv',
 '2017.csv',
 '2020.csv',
 '2018.csv',
 'IT_멘탈_통합_2017_2021_카테고리컬럼명.csv',
 'IT_멘탈_통합_2017_2021_최종정리.csv',
 'IT_멘탈_통합_2017_2021_최종정리_with_year.csv',
 '칼럼 매핑.xlsx',
 '2017_clean.csv']

In [13]:
# 2017 기준 컬럼명 매핑 딕셔너리
rename_2017 = {
    "What is your age?": "Age",
    "What is your gender?": "Gender",
    "What country do you live in?": "Country",

    # Personal background
    "Do you have a family history of mental illness?": "family_history",
    "Are you self-employed?": "self_employed",

    # Organizational characteristics
    "How many employees does your company or organization have?": "no_employees",
    "Is your employer primarily a tech company/organization?": "tech_company",

    # Organizational Care
    "Does your employer provide mental health benefits as part of healthcare coverage?": "Organizational_Care_1",
    "Does your employer offer resources to learn more about mental health disorders and options for seeking help?": "Organizational_Care_2",

    # Organizational Access
    "Do you know the options for mental health care available under your employer-provided health coverage?": "Organizational_Access",

    # Organizational Support
    "Has your employer ever formally discussed mental health (for example, as part of a wellness campaign or other official communication)?": "Organizational_Support",

    # Organizational Protection
    "Is your anonymity protected if you choose to take advantage of mental health or substance abuse treatment resources provided by your employer?": "Organizational_Protection",

    # Organizational Leave
    "If a mental health issue prompted you to request a medical leave from work, how easy or difficult would it be to ask for that leave?": "Organizational_Leave",

    # Mental health consequence
    "Have you observed or experienced an unsupportive or badly handled response to a mental health issue in your current or previous workplace?": "mentalhealthconsequence",

    # Coworkers
    "Would you feel comfortable discussing a mental health issue with your coworkers?": "coworkers",
    "Would you feel comfortable discussing a mental health issue with your direct supervisor(s)?": "supervisors",

    # Organizational Mental Health Importance
    "Overall, how much importance does your employer place on physical health?": "mentalimportance",

    # Treatment
    "Do you currently have a mental health disorder?": "mh_current",
    "Have you ever been diagnosed with a mental health disorder?": "mh_diagnosed",
    "Have you had a mental health disorder in the past?": "mh_past",
    "Have you ever sought treatment for a mental health disorder from a mental health professional?": "mh_treated",

    # Work interfere
    "Do you believe your productivity is ever affected by a mental health issue?": "work_interfere_impact",
    "If yes, what percentage of your work time (time performing primary or secondary job functions) is affected by a mental health issue?": "work_interfere_percentage",
    "If you have a mental health disorder, how often do you feel that it interferes with your work when being treated effectively?": "work_interfere_treated",
    "If you have a mental health disorder, how often do you feel that it interferes with your work when NOT being treated effectively (i.e., when you are experiencing symptoms)?": "work_interfere_untreated",
}


In [14]:
import re

def clean_and_rename(df, rename_dict):
    # HTML 태그 제거
    df.columns = [re.sub(r'<.*?>', '', col) for col in df.columns]

    # rename_dict 기준으로 실제 df에 존재하는 컬럼만 필터링
    existing_cols = [col for col in rename_dict.keys() if col in df.columns]

    # 필터링
    df_filtered = df[existing_cols].copy()

    # rename 적용
    df_filtered = df_filtered.rename(columns=rename_dict)

    return df_filtered


In [16]:
df2018_clean = clean_and_rename(df2018, rename_2017)
df2018_clean.head()


Unnamed: 0,Age,Gender,Country,family_history,self_employed,no_employees,tech_company,Organizational_Care_1,Organizational_Care_2,Organizational_Access,...,supervisors,mentalimportance,mh_current,mh_diagnosed,mh_past,mh_treated,work_interfere_impact,work_interfere_percentage,work_interfere_treated,work_interfere_untreated
0,57,Female,Canada,Yes,0,More than 1000,1.0,Yes,Yes,Yes,...,No,8.0,Possibly,,Yes,1,,,Rarely,Sometimes
1,29,male,United States of America,Yes,0,More than 1000,1.0,Yes,I don't know,Yes,...,No,8.0,Yes,Yes,Yes,1,,,Sometimes,Often
2,46,Male,United States of America,Yes,0,6-25,0.0,Yes,No,Yes,...,No,3.0,No,,No,0,,,Not applicable to me,Not applicable to me
3,34,male,Norway,No,0,6-25,1.0,No,No,No,...,No,8.0,No,,No,0,,,Not applicable to me,Not applicable to me
4,29,Ostensibly Male,United States of America,Yes,0,26-100,1.0,Yes,Yes,Yes,...,Yes,7.0,Yes,Yes,Yes,1,,,Rarely,Sometimes


In [17]:
df2018_clean["Year"] = 2018
cols = ["Year"] + [col for col in df2018_clean.columns if col != "Year"]
df2018_clean = df2018_clean[cols]

df2018_clean.head()


Unnamed: 0,Year,Age,Gender,Country,family_history,self_employed,no_employees,tech_company,Organizational_Care_1,Organizational_Care_2,...,supervisors,mentalimportance,mh_current,mh_diagnosed,mh_past,mh_treated,work_interfere_impact,work_interfere_percentage,work_interfere_treated,work_interfere_untreated
0,2018,57,Female,Canada,Yes,0,More than 1000,1.0,Yes,Yes,...,No,8.0,Possibly,,Yes,1,,,Rarely,Sometimes
1,2018,29,male,United States of America,Yes,0,More than 1000,1.0,Yes,I don't know,...,No,8.0,Yes,Yes,Yes,1,,,Sometimes,Often
2,2018,46,Male,United States of America,Yes,0,6-25,0.0,Yes,No,...,No,3.0,No,,No,0,,,Not applicable to me,Not applicable to me
3,2018,34,male,Norway,No,0,6-25,1.0,No,No,...,No,8.0,No,,No,0,,,Not applicable to me,Not applicable to me
4,2018,29,Ostensibly Male,United States of America,Yes,0,26-100,1.0,Yes,Yes,...,Yes,7.0,Yes,Yes,Yes,1,,,Rarely,Sometimes


In [18]:
output_2018 = os.path.join(base_path, '2018_clean.csv')
df2018_clean.to_csv(output_2018, index=False)
print("저장 완료:", output_2018)


저장 완료: /content/drive/MyDrive/데이터분석/2018_clean.csv


In [19]:
import os
import glob
import pandas as pd

# 데이터 폴더 경로
base_path = '/content/drive/MyDrive/데이터분석'

# 폴더 안에 뭐가 있는지 먼저 확인 (한 번만 봐도 좋음)
print("데이터분석 폴더 내 파일 목록:")
print(os.listdir(base_path))

# '2019'가 이름에 들어가고, 확장자가 .csv인 파일 찾기
pattern_2019 = os.path.join(base_path, '*2019*.csv')
files_2019 = glob.glob(pattern_2019)

if not files_2019:
    raise FileNotFoundError("⚠ '2019'가 들어간 .csv 파일을 찾지 못했어요. os.listdir(base_path) 출력 확인해줘!")

print("\n사용할 2019 파일:", files_2019[0])

# 2019 데이터 읽기
df2019 = pd.read_csv(files_2019[0], encoding='ISO-8859-1')

# 컬럼 대략 확인
df2019.head()


데이터분석 폴더 내 파일 목록:
['trip.csv', 'cars.csv', 'brand.csv', '레퍼런스_중고자동차_데이터분석_01.ipynb', '레퍼런스-중고자동차 데이터분석_02.ipynb', 'fraud.csv', '2019.csv', '2021.csv', '2017.csv', '2020.csv', '2018.csv', 'IT_멘탈_통합_2017_2021_카테고리컬럼명.csv', 'IT_멘탈_통합_2017_2021_최종정리.csv', 'IT_멘탈_통합_2017_2021_최종정리_with_year.csv', '칼럼 매핑.xlsx', '2017_clean.csv', '2018_clean.csv']

사용할 2019 파일: /content/drive/MyDrive/데이터분석/2019.csv


Unnamed: 0,*Are you self-employed?*,How many employees does your company or organization have?,Is your employer primarily a tech company/organization?,Is your primary role within your company related to tech/IT?,Does your employer provide mental health benefits as part of healthcare coverage?,Do you know the options for mental health care available under your employer-provided health coverage?,"Has your employer ever formally discussed mental health (for example, as part of a wellness campaign or other official communication)?",Does your employer offer resources to learn more about mental health disorders and options for seeking help?,Is your anonymity protected if you choose to take advantage of mental health or substance abuse treatment resources provided by your employer?,"If a mental health issue prompted you to request a medical leave from work, how easy or difficult would it be to ask for that leave?",...,Briefly describe what you think the industry as a whole and/or employers could do to improve mental health support for employees.,"If there is anything else you would like to tell us that has not been covered by the survey questions, please use this space to do so.",Would you be willing to talk to one of us more extensively about your experiences with mental health issues in the tech industry? (Note that all interview responses would be used _anonymously_ and only with your permission.),What is your age?,What is your gender?,What country do you *live* in?,What US state or territory do you *live* in?,What is your race?,What country do you *work* in?,What US state or territory do you *work* in?
0,False,26-100,True,True,I don't know,No,Yes,Yes,I don't know,Very easy,...,,,False,25,Male,United States of America,Nebraska,White,United States of America,Nebraska
1,False,26-100,True,True,Yes,No,No,Yes,Yes,I don't know,...,,,False,51,male,United States of America,Nebraska,White,United States of America,Nebraska
2,False,26-100,True,True,I don't know,No,No,I don't know,I don't know,Somewhat difficult,...,I think opening up more conversation around th...,Thank you,True,27,Male,United States of America,Illinois,White,United States of America,Illinois
3,False,100-500,True,True,I don't know,No,Yes,Yes,Yes,Very easy,...,,,False,37,male,United States of America,Nebraska,White,United States of America,Nebraska
4,False,26-100,True,True,I don't know,No,I don't know,I don't know,I don't know,I don't know,...,,,False,46,m,United States of America,Nebraska,White,United States of America,Nebraska


In [20]:
rename_2019 = {
    "What is your age?": "Age",
    "What is your gender?": "Gender",
    "What country do you live in?": "Country",

    # Personal background
    "Do you have a family history of mental illness?": "family_history",
    "Are you self-employed?": "self_employed",

    # Organizational characteristics
    "How many employees does your company or organization have?": "no_employees",
    "Is your employer primarily a tech company/organization?": "tech_company",

    # Organizational Care
    "Does your employer provide mental health benefits as part of healthcare coverage?": "Organizational_Care_1",
    "Does your employer offer resources to learn more about mental health disorders and options for seeking help?": "Organizational_Care_2",

    # Organizational Access
    "Do you know the options for mental health care available under your employer-provided health coverage?": "Organizational_Access",

    # Organizational Support
    "Has your employer ever formally discussed mental health (for example, as part of a wellness campaign or other official communication)?": "Organizational_Support",

    # Organizational Protection
    "Is your anonymity protected if you choose to take advantage of mental health or substance abuse treatment resources provided by your employer?": "Organizational_Protection",

    # Organizational Leave
    "If a mental health issue prompted you to request a medical leave from work, how easy or difficult would it be to ask for that leave?": "Organizational_Leave",

    # Mental health consequence
    "Have you observed or experienced an unsupportive or badly handled response to a mental health issue in your current or previous workplace?": "mentalhealthconsequence",

    # Coworkers / Supervisors
    "Would you feel comfortable discussing a mental health issue with your coworkers?": "coworkers",
    "Would you feel comfortable discussing a mental health issue with your direct supervisor(s)?": "supervisors",

    # Organizational Mental Health Importance
    "Overall, how much importance does your employer place on physical health?": "mentalimportance",

    # Treatment
    "Do you currently have a mental health disorder?": "mh_current",
    "Have you ever been diagnosed with a mental health disorder?": "mh_diagnosed",
    "Have you had a mental health disorder in the past?": "mh_past",
    "Have you ever sought treatment for a mental health disorder from a mental health professional?": "mh_treated",

    # Work interfere
    "Do you believe your productivity is ever affected by a mental health issue?": "work_interfere_impact",
    "If yes, what percentage of your work time (time performing primary or secondary job functions) is affected by a mental health issue?": "work_interfere_percentage",
    "If you have a mental health disorder, how often do you feel that it interferes with your work when being treated effectively?": "work_interfere_treated",
    "If you have a mental health disorder, how often do you feel that it interferes with your work when NOT being treated effectively (i.e., when you are experiencing symptoms)?": "work_interfere_untreated",
}


In [21]:
import re

def clean_and_rename_2019(df, rename_dict):
    df = df.copy()

    # 1) 컬럼명에서 HTML 태그 제거
    df.columns = [re.sub(r'<.*?>', '', str(col)) for col in df.columns]

    # 2) 매핑 기준으로 실제 존재하는 컬럼 / 누락된 컬럼 나누기
    existing_cols = [col for col in rename_dict.keys() if col in df.columns]
    missing_cols = [col for col in rename_dict.keys() if col not in df.columns]

    print("=== 2019 처리 결과 ===")
    print("사용된 컬럼 수 :", len(existing_cols))
    print("누락된 컬럼 수 :", len(missing_cols))
    if missing_cols:
        print("누락된 컬럼 목록:")
        for c in missing_cols:
            print(" -", c)

    # 3) 필요한 컬럼만 선택 후 리네임
    df_clean = df[existing_cols].rename(columns=rename_dict)
    return df_clean


In [22]:
# 1) 2019 데이터 정리
df2019_clean = clean_and_rename_2019(df2019, rename_2019)

# 2) Year 컬럼 추가
df2019_clean["Year"] = 2019

# 3) Year를 첫 번째 컬럼으로 이동
cols = ["Year"] + [c for c in df2019_clean.columns if c != "Year"]
df2019_clean = df2019_clean[cols]

df2019_clean.head()


=== 2019 처리 결과 ===
사용된 컬럼 수 : 18
누락된 컬럼 수 : 7
누락된 컬럼 목록:
 - What country do you live in?
 - Are you self-employed?
 - Have you observed or experienced an unsupportive or badly handled response to a mental health issue in your current or previous workplace?
 - Do you currently have a mental health disorder?
 - Have you ever been diagnosed with a mental health disorder?
 - If you have a mental health disorder, how often do you feel that it interferes with your work when being treated effectively?
 - If you have a mental health disorder, how often do you feel that it interferes with your work when NOT being treated effectively (i.e., when you are experiencing symptoms)?


Unnamed: 0,Year,Age,Gender,family_history,no_employees,tech_company,Organizational_Care_1,Organizational_Care_2,Organizational_Access,Organizational_Support,Organizational_Protection,Organizational_Leave,coworkers,supervisors,mentalimportance,mh_past,mh_treated,work_interfere_impact,work_interfere_percentage
0,2019,25,Male,No,26-100,True,I don't know,Yes,No,Yes,I don't know,Very easy,Yes,Yes,5.0,No,False,,
1,2019,51,male,Yes,26-100,True,Yes,Yes,No,No,Yes,I don't know,Maybe,Maybe,5.0,Possibly,False,,
2,2019,27,Male,I don't know,26-100,True,I don't know,I don't know,No,No,I don't know,Somewhat difficult,Maybe,No,5.0,No,False,,
3,2019,37,male,Yes,100-500,True,I don't know,Yes,No,Yes,Yes,Very easy,Maybe,Yes,1.0,No,False,,
4,2019,46,m,No,26-100,True,I don't know,I don't know,No,I don't know,I don't know,I don't know,No,No,4.0,No,False,,


In [23]:
output_2019 = os.path.join(base_path, '2019_clean.csv')
df2019_clean.to_csv(output_2019, index=False)

print("✅ 2019 정제본 저장 완료 →", output_2019)


✅ 2019 정제본 저장 완료 → /content/drive/MyDrive/데이터분석/2019_clean.csv


In [24]:
df2019_clean.columns.tolist()

['Year',
 'Age',
 'Gender',
 'family_history',
 'no_employees',
 'tech_company',
 'Organizational_Care_1',
 'Organizational_Care_2',
 'Organizational_Access',
 'Organizational_Support',
 'Organizational_Protection',
 'Organizational_Leave',
 'coworkers',
 'supervisors',
 'mentalimportance',
 'mh_past',
 'mh_treated',
 'work_interfere_impact',
 'work_interfere_percentage']

In [25]:
import os
import glob
import pandas as pd

base_path = '/content/drive/MyDrive/데이터분석'

# 2020이 이름에 들어가는 csv 자동 탐색
pattern_2020 = os.path.join(base_path, '*2020*.csv')
files_2020 = glob.glob(pattern_2020)

if not files_2020:
    raise FileNotFoundError("⚠ '2020'가 들어간 .csv 파일을 찾지 못했어요. os.listdir(base_path)로 파일명을 한 번 확인해줘!")

print("사용할 2020 파일:", files_2020[0])

df2020 = pd.read_csv(files_2020[0], encoding='ISO-8859-1')

# 원본 데이터 & 컬럼 확인
display(df2020.head())
df2020.columns.tolist()


사용할 2020 파일: /content/drive/MyDrive/데이터분석/2020.csv


Unnamed: 0,#,*Are you self-employed?*,How many employees does your company or organization have?,Is your employer primarily a tech company/organization?,Is your primary role within your company related to tech/IT?,Does your employer provide mental health benefits as part of healthcare coverage?,Do you know the options for mental health care available under your employer-provided health coverage?,"Has your employer ever formally discussed mental health (for example, as part of a wellness campaign or other official communication)?",Does your employer offer resources to learn more about mental health disorders and options for seeking help?,Is your anonymity protected if you choose to take advantage of mental health or substance abuse treatment resources provided by your employer?,...,"If there is anything else you would like to tell us that has not been covered by the survey questions, please use this space to do so.",Would you be willing to talk to one of us more extensively about your experiences with mental health issues in the tech industry? (Note that all interview responses would be used _anonymously_ and only with your permission.),What is your age?,What is your gender?,What country do you *live* in?,What US state or territory do you *live* in?,What is your race?,Other.3,What country do you *work* in?,What US state or territory do you *work* in?
0,zwrffw6ykfo82ft1twvzwrffw6c6wsfv,1,,,,,,,,,...,,0,45,Male,United States of America,Connecticut,White,,United States of America,Connecticut
1,zhdmhaa8r0125c4zmoi7qzhdmtjrakhm,1,,,,,,,,,...,,1,24,female,Russia,,,,Russia,
2,x4itwa9hnlw7qke4y5xibx4itwa9yzl5,1,,,,,,,,,...,mental health should be a law by government.,1,46,Male,India,,,,India,
3,x3v3oimu5pn0043n8x3v3oizaybhwwto,1,,,,,,,,,...,,1,25,Female,Canada,,,,Canada,
4,uyp6re7bhnyx6gez09uyp6re72z0e4e4,1,,,,,,,,,...,no,1,25,F,Canada,,,,Canada,


['#',
 '*Are you self-employed?*',
 'How many employees does your company or organization have?',
 'Is your employer primarily a tech company/organization?',
 'Is your primary role within your company related to tech/IT?',
 'Does your employer provide mental health benefits as part of healthcare coverage?',
 'Do you know the options for mental health care available under your employer-provided health coverage?',
 'Has your employer ever formally discussed mental health (for example, as part of a wellness campaign or other official communication)?',
 'Does your employer offer resources to learn more about mental health disorders and options for seeking help?',
 'Is your anonymity protected if you choose to take advantage of mental health or substance abuse treatment resources provided by your employer?',
 'If a mental health issue prompted you to request a medical leave from work, how easy or difficult would it be to ask for that leave?',
 'Would you feel more comfortable talking to you

In [26]:
rename_2020 = {
    "What is your age?": "Age",
    "What is your gender?": "Gender",
    "What country do you live in?": "Country",

    # Personal background
    "Do you have a family history of mental illness?": "family_history",
    "Are you self-employed?": "self_employed",

    # Organizational characteristics
    "How many employees does your company or organization have?": "no_employees",
    "Is your employer primarily a tech company/organization?": "tech_company",

    # Organizational Care
    "Does your employer provide mental health benefits as part of healthcare coverage?": "Organizational_Care_1",
    "Does your employer offer resources to learn more about mental health disorders and options for seeking help?": "Organizational_Care_2",

    # Organizational Access
    "Do you know the options for mental health care available under your employer-provided health coverage?": "Organizational_Access",

    # Organizational Support
    "Has your employer ever formally discussed mental health (for example, as part of a wellness campaign or other official communication)?": "Organizational_Support",

    # Organizational Protection
    "Is your anonymity protected if you choose to take advantage of mental health or substance abuse treatment resources provided by your employer?": "Organizational_Protection",

    # Organizational Leave
    "If a mental health issue prompted you to request a medical leave from work, how easy or difficult would it be to ask for that leave?": "Organizational_Leave",

    # Mental health consequence
    "Have you observed or experienced an unsupportive or badly handled response to a mental health issue in your current or previous workplace?": "mentalhealthconsequence",

    # Coworkers / Supervisors
    "Would you feel comfortable discussing a mental health issue with your coworkers?": "coworkers",
    "Would you feel comfortable discussing a mental health issue with your direct supervisor(s)?": "supervisors",

    # Organizational Mental Health Importance
    "Overall, how much importance does your employer place on physical health?": "mentalimportance",

    # Treatment
    "Do you currently have a mental health disorder?": "mh_current",
    "Have you ever been diagnosed with a mental health disorder?": "mh_diagnosed",
    "Have you had a mental health disorder in the past?": "mh_past",
    "Have you ever sought treatment for a mental health disorder from a mental health professional?": "mh_treated",

    # Work interfere
    "Do you believe your productivity is ever affected by a mental health issue?": "work_interfere_impact",
    "If yes, what percentage of your work time (time performing primary or secondary job functions) is affected by a mental health issue?": "work_interfere_percentage",
    "If you have a mental health disorder, how often do you feel that it interferes with your work when being treated effectively?": "work_interfere_treated",
    "If you have a mental health disorder, how often do you feel that it interferes with your work when NOT being treated effectively (i.e., when you are experiencing symptoms)?": "work_interfere_untreated",
}


In [27]:
import re

def clean_and_rename(df, rename_dict):
    df = df.copy()

    # 1) 컬럼명에서 HTML 태그 제거
    df.columns = [re.sub(r'<.*?>', '', str(col)) for col in df.columns]

    # 2) 매핑 기준으로 실제 존재하는 컬럼 / 누락된 컬럼 나누기
    existing_cols = [col for col in rename_dict.keys() if col in df.columns]
    missing_cols = [col for col in rename_dict.keys() if col not in df.columns]

    print("=== 처리 결과 ===")
    print("사용된 컬럼 수 :", len(existing_cols))
    print("누락된 컬럼 수 :", len(missing_cols))
    if missing_cols:
        print("누락된 컬럼 목록:")
        for c in missing_cols:
            print(" -", c)

    # 3) 필요한 컬럼만 선택 후 리네임
    df_clean = df[existing_cols].rename(columns=rename_dict)
    return df_clean


In [28]:
# 1) 2020 데이터 정리
df2020_clean = clean_and_rename(df2020, rename_2020)

# 2) Year 컬럼 추가
df2020_clean["Year"] = 2020

# 3) Year를 첫 번째 컬럼으로 이동
cols_2020 = ["Year"] + [c for c in df2020_clean.columns if c != "Year"]
df2020_clean = df2020_clean[cols_2020]

display(df2020_clean.head())
df2020_clean.columns.tolist()


=== 처리 결과 ===
사용된 컬럼 수 : 18
누락된 컬럼 수 : 7
누락된 컬럼 목록:
 - What country do you live in?
 - Are you self-employed?
 - Have you observed or experienced an unsupportive or badly handled response to a mental health issue in your current or previous workplace?
 - Do you currently have a mental health disorder?
 - Have you ever been diagnosed with a mental health disorder?
 - If you have a mental health disorder, how often do you feel that it interferes with your work when being treated effectively?
 - If you have a mental health disorder, how often do you feel that it interferes with your work when NOT being treated effectively (i.e., when you are experiencing symptoms)?


Unnamed: 0,Year,Age,Gender,family_history,no_employees,tech_company,Organizational_Care_1,Organizational_Care_2,Organizational_Access,Organizational_Support,Organizational_Protection,Organizational_Leave,coworkers,supervisors,mentalimportance,mh_past,mh_treated,work_interfere_impact,work_interfere_percentage
0,2020,45,Male,Yes,,,,,,,,,,,,Don't Know,0,Yes,1-25%
1,2020,24,female,Yes,,,,,,,,,,,,Yes,1,Yes,1-25%
2,2020,46,Male,Yes,,,,,,,,,,,,,0,Yes,76-100%
3,2020,25,Female,Yes,,,,,,,,,,,,,0,Yes,1-25%
4,2020,25,F,I don't know,,,,,,,,,,,,No,0,Yes,26-50%


['Year',
 'Age',
 'Gender',
 'family_history',
 'no_employees',
 'tech_company',
 'Organizational_Care_1',
 'Organizational_Care_2',
 'Organizational_Access',
 'Organizational_Support',
 'Organizational_Protection',
 'Organizational_Leave',
 'coworkers',
 'supervisors',
 'mentalimportance',
 'mh_past',
 'mh_treated',
 'work_interfere_impact',
 'work_interfere_percentage']

In [29]:
output_2020 = os.path.join(base_path, '2020_clean.csv')
df2020_clean.to_csv(output_2020, index=False)

print("✅ 2020 정제본 저장 완료 →", output_2020)


✅ 2020 정제본 저장 완료 → /content/drive/MyDrive/데이터분석/2020_clean.csv


In [30]:
import os
import glob
import pandas as pd

base_path = '/content/drive/MyDrive/데이터분석'

# 2021 이라는 글자가 들어간 csv 파일 찾기
pattern_2021 = os.path.join(base_path, '*2021*.csv')
files_2021 = glob.glob(pattern_2021)

if not files_2021:
    raise FileNotFoundError("⚠ '2021'가 들어간 .csv 파일을 찾지 못했어요. os.listdir(base_path)로 파일명을 확인해줘!")

print("사용할 2021 파일:", files_2021[0])

# 2021 데이터 읽기
df2021 = pd.read_csv(files_2021[0], encoding='ISO-8859-1')

# 원본 데이터 확인
display(df2021.head())
df2021.columns.tolist()


사용할 2021 파일: /content/drive/MyDrive/데이터분석/2021.csv


Unnamed: 0,#,*Are you self-employed?*,How many employees does your company or organization have?,Is your employer primarily a tech company/organization?,Is your primary role within your company related to tech/IT?,Does your employer provide mental health benefits as part of healthcare coverage?,Do you know the options for mental health care available under your employer-provided health coverage?,"Has your employer ever formally discussed mental health (for example, as part of a wellness campaign or other official communication)?",Does your employer offer resources to learn more about mental health disorders and options for seeking help?,Is your anonymity protected if you choose to take advantage of mental health or substance abuse treatment resources provided by your employer?,...,What country do you *live* in?,What US state or territory do you *live* in?,What is your race?,Other.3,What country do you *work* in?,What US state or territory do you *work* in?,Have you been diagnosed with COVID-19?,Start Date (UTC),Submit Date (UTC),Network ID
0,eu6sxt9r6pyxxykpxcqs6r2qeu6sxt9r,0,26-100,1.0,1.0,I don't know,No,No,I don't know,I don't know,...,United States of America,Alaska,White,,United States of America,Alaska,,2021-12-16 20:28:29,2021-12-16 20:36:02,f3adc1b78a
1,mqgsjpmc31k0fuymqgv96vapalozx3q0,0,500-1000,1.0,1.0,Yes,No,Yes,Yes,Yes,...,Brazil,,,,Brazil,,,2021-12-16 13:42:51,2021-12-16 18:44:40,49a51b7ac1
2,fkuijv5hpifz4fusbn1fkuib4ipqt19s,0,100-500,1.0,1.0,Yes,Yes,No,I don't know,I don't know,...,Brazil,,,,Brazil,,,2021-12-16 13:01:19,2021-12-16 13:14:49,f9e26fd31a
3,eb7i5ye61yizgs5veb7i5yyttqp5zwp2,1,,,,,,,,,...,Italy,,,,Italy,,,2021-12-16 08:57:11,2021-12-16 08:59:00,73a0e1b4c5
4,0qoaw3bb3kzbb2oet0qoa3xc3yzdrvva,0,More than 1000,0.0,1.0,Yes,No,Yes,Yes,I don't know,...,Canada,,,,Canada,,,2021-12-08 14:35:27,2021-12-08 14:44:10,bade27e84d


['#',
 '*Are you self-employed?*',
 'How many employees does your company or organization have?',
 'Is your employer primarily a tech company/organization?',
 'Is your primary role within your company related to tech/IT?',
 'Does your employer provide mental health benefits as part of healthcare coverage?',
 'Do you know the options for mental health care available under your employer-provided health coverage?',
 'Has your employer ever formally discussed mental health (for example, as part of a wellness campaign or other official communication)?',
 'Does your employer offer resources to learn more about mental health disorders and options for seeking help?',
 'Is your anonymity protected if you choose to take advantage of mental health or substance abuse treatment resources provided by your employer?',
 'If a mental health issue prompted you to request a medical leave from work, how easy or difficult would it be to ask for that leave?',
 'Would you feel more comfortable talking to you

In [31]:
rename_2021 = {
    "What is your age?": "Age",
    "What is your gender?": "Gender",
    "What country do you live in?": "Country",

    # Personal background
    "Do you have a family history of mental illness?": "family_history",
    "Are you self-employed?": "self_employed",

    # Organizational characteristics
    "How many employees does your company or organization have?": "no_employees",
    "Is your employer primarily a tech company/organization?": "tech_company",

    # Organizational Care
    "Does your employer provide mental health benefits as part of healthcare coverage?": "Organizational_Care_1",
    "Does your employer offer resources to learn more about mental health disorders and options for seeking help?": "Organizational_Care_2",

    # Organizational Access
    "Do you know the options for mental health care available under your employer-provided health coverage?": "Organizational_Access",

    # Organizational Support
    "Has your employer ever formally discussed mental health (for example, as part of a wellness campaign or other official communication)?": "Organizational_Support",

    # Organizational Protection
    "Is your anonymity protected if you choose to take advantage of mental health or substance abuse treatment resources provided by your employer?": "Organizational_Protection",

    # Organizational Leave
    "If a mental health issue prompted you to request a medical leave from work, how easy or difficult would it be to ask for that leave?": "Organizational_Leave",

    # Mental health consequence
    "Have you observed or experienced an unsupportive or badly handled response to a mental health issue in your current or previous workplace?": "mentalhealthconsequence",

    # Coworkers / Supervisors
    "Would you feel comfortable discussing a mental health issue with your coworkers?": "coworkers",
    "Would you feel comfortable discussing a mental health issue with your direct supervisor(s)?": "supervisors",

    # Organizational Mental Health Importance
    "Overall, how much importance does your employer place on physical health?": "mentalimportance",

    # Treatment
    "Do you currently have a mental health disorder?": "mh_current",
    "Have you ever been diagnosed with a mental health disorder?": "mh_diagnosed",
    "Have you had a mental health disorder in the past?": "mh_past",
    "Have you ever sought treatment for a mental health disorder from a mental health professional?": "mh_treated",

    # Work interfere
    "Do you believe your productivity is ever affected by a mental health issue?": "work_interfere_impact",
    "If yes, what percentage of your work time (time performing primary or secondary job functions) is affected by a mental health issue?": "work_interfere_percentage",
    "If you have a mental health disorder, how often do you feel that it interferes with your work when being treated effectively?": "work_interfere_treated",
    "If you have a mental health disorder, how often do you feel that it interferes with your work when NOT being treated effectively (i.e., when you are experiencing symptoms)?": "work_interfere_untreated",
}


In [32]:
import re

def clean_and_rename(df, rename_dict):
    df = df.copy()

    # 1) 컬럼명에서 HTML 태그 제거
    df.columns = [re.sub(r'<.*?>', '', str(col)) for col in df.columns]

    # 2) 매핑 기준으로 실제 존재하는 컬럼 / 누락된 컬럼 나누기
    existing_cols = [col for col in rename_dict.keys() if col in df.columns]
    missing_cols = [col for col in rename_dict.keys() if col not in df.columns]

    print("=== 처리 결과 ===")
    print("사용된 컬럼 수 :", len(existing_cols))
    print("누락된 컬럼 수 :", len(missing_cols))
    if missing_cols:
        print("누락된 컬럼 목록:")
        for c in missing_cols:
            print(" -", c)

    # 3) 필요한 컬럼만 선택 후 리네임
    df_clean = df[existing_cols].rename(columns=rename_dict)
    return df_clean


In [33]:
# 1) 2021 데이터 정리
df2021_clean = clean_and_rename(df2021, rename_2021)

# 2) Year 컬럼 추가
df2021_clean["Year"] = 2021

# 3) Year를 첫 번째 컬럼으로 이동
cols_2021 = ["Year"] + [c for c in df2021_clean.columns if c != "Year"]
df2021_clean = df2021_clean[cols_2021]

display(df2021_clean.head())
df2021_clean.columns.tolist()


=== 처리 결과 ===
사용된 컬럼 수 : 18
누락된 컬럼 수 : 7
누락된 컬럼 목록:
 - What country do you live in?
 - Are you self-employed?
 - Have you observed or experienced an unsupportive or badly handled response to a mental health issue in your current or previous workplace?
 - Do you currently have a mental health disorder?
 - Have you ever been diagnosed with a mental health disorder?
 - If you have a mental health disorder, how often do you feel that it interferes with your work when being treated effectively?
 - If you have a mental health disorder, how often do you feel that it interferes with your work when NOT being treated effectively (i.e., when you are experiencing symptoms)?


Unnamed: 0,Year,Age,Gender,family_history,no_employees,tech_company,Organizational_Care_1,Organizational_Care_2,Organizational_Access,Organizational_Support,Organizational_Protection,Organizational_Leave,coworkers,supervisors,mentalimportance,mh_past,mh_treated,work_interfere_impact,work_interfere_percentage
0,2021,28,Female,I don't know,26-100,1.0,I don't know,I don't know,No,No,I don't know,Very easy,No,Maybe,1.0,Possibly,0,,
1,2021,41,male,No,500-1000,1.0,Yes,Yes,No,Yes,Yes,I don't know,Maybe,No,9.0,No,0,,
2,2021,35,Male,No,100-500,1.0,Yes,I don't know,Yes,No,I don't know,Somewhat easy,Yes,Maybe,5.0,No,0,,
3,2021,20,male,Yes,,,,,,,,,,,,Don't Know,0,No,
4,2021,35,female,No,More than 1000,0.0,Yes,Yes,No,Yes,I don't know,Difficult,Maybe,No,10.0,Yes,1,,


['Year',
 'Age',
 'Gender',
 'family_history',
 'no_employees',
 'tech_company',
 'Organizational_Care_1',
 'Organizational_Care_2',
 'Organizational_Access',
 'Organizational_Support',
 'Organizational_Protection',
 'Organizational_Leave',
 'coworkers',
 'supervisors',
 'mentalimportance',
 'mh_past',
 'mh_treated',
 'work_interfere_impact',
 'work_interfere_percentage']

In [34]:
output_2021 = os.path.join(base_path, '2021_clean.csv')
df2021_clean.to_csv(output_2021, index=False)

print("✅ 2021 정제본 저장 완료 →", output_2021)


✅ 2021 정제본 저장 완료 → /content/drive/MyDrive/데이터분석/2021_clean.csv


In [35]:
import os
import pandas as pd

base_path = '/content/drive/MyDrive/데이터분석'

# clean 파일 목록
clean_files = [
    '2017_clean.csv',
    '2018_clean.csv',
    '2019_clean.csv',
    '2020_clean.csv',
    '2021_clean.csv'
]

# 읽어서 리스트에 넣기
df_list = []
for file in clean_files:
    path = os.path.join(base_path, file)
    df = pd.read_csv(path, encoding='utf-8')   # 저장 방식 따라 encoding 바꿀 수도 있음
    df_list.append(df)

# 통합
all_years = pd.concat(df_list, ignore_index=True)

# 확인
all_years.head()
all_years.shape


(1836, 26)

In [37]:
all_years.head(40)

Unnamed: 0,Year,Age,Gender,Country,family_history,self_employed,no_employees,tech_company,Organizational_Care_1,Organizational_Care_2,...,supervisors,mentalimportance,mh_current,mh_diagnosed,mh_past,mh_treated,work_interfere_impact,work_interfere_percentage,work_interfere_treated,work_interfere_untreated
0,2017,27.0,Female,United Kingdom,No,0.0,100-500,1.0,No,I don't know,...,Yes,6.0,Possibly,,Possibly,1,,,Sometimes,Sometimes
1,2017,31.0,male,United Kingdom,No,0.0,100-500,1.0,Yes,No,...,Maybe,7.0,Possibly,,Possibly,0,,,Not applicable to me,Sometimes
2,2017,36.0,male,United States of America,Yes,0.0,6-25,1.0,I don't know,No,...,Yes,0.0,Yes,Yes,Yes,1,,,Sometimes,Sometimes
3,2017,22.0,Male,United States of America,I don't know,0.0,More than 1000,1.0,Yes,I don't know,...,Yes,7.0,Yes,Yes,No,1,,,Sometimes,Often
4,2017,52.0,female,United States of America,Yes,1.0,,,,,...,,,No,,Yes,1,Yes,1-25%,Often,Sometimes
5,2017,30.0,male,United States of America,Yes,0.0,100-500,1.0,Yes,I don't know,...,Maybe,9.0,No,,No,0,,,Rarely,Not applicable to me
6,2017,36.0,F,United States of America,Yes,0.0,6-25,1.0,Yes,No,...,Yes,10.0,Yes,Yes,No,1,,,Rarely,Often
7,2017,38.0,Female,United States of America,Yes,0.0,26-100,1.0,Yes,No,...,Yes,10.0,No,,No,1,,,Not applicable to me,Not applicable to me
8,2017,35.0,Male,Switzerland,I don't know,0.0,100-500,0.0,I don't know,No,...,Maybe,9.0,Don't Know,,No,0,,,Not applicable to me,Not applicable to me
9,2017,36.0,male,India,No,1.0,,,,,...,,,Possibly,,No,1,Unsure,,Often,Sometimes


In [38]:
output_all = os.path.join(base_path, 'all_years_2017_2021_clean.csv')
all_years.to_csv(output_all, index=False)

print("✅ 모든 연도 통합 파일 저장 완료 →", output_all)


✅ 모든 연도 통합 파일 저장 완료 → /content/drive/MyDrive/데이터분석/all_years_2017_2021_clean.csv
