# Process 2019 Data
#### 10/16/2020
---

## Load and Process Dataset
### Import Libraries

In [1]:
import pandas as pd
import numpy as np

In [2]:
# Importing the Data Set
df_2019 = pd.read_csv("2019_new-var_mapped.csv")
df_2019.head()

Unnamed: 0,DIVISION,PUMA,REGION,ST,RT,PWGTP,AGEP,CIT,COW,SCHL,...,RELSHIPP,RAC1P,SEX,INTP,WKHP,FOD1P,FOD2P,POBP,PINCP,SCIENGP
0,6,1100,3,1,P,20,19,1,2.0,19.0,...,38,2,1,0.0,32.0,,,1,1400.0,
1,6,2100,3,1,P,61,39,1,4.0,21.0,...,37,1,2,0.0,40.0,3301.0,,1,60000.0,2.0
2,6,301,3,1,P,89,19,1,1.0,19.0,...,38,2,1,0.0,31.0,,,1,390.0,
3,6,2100,3,1,P,61,39,1,4.0,21.0,...,37,1,2,0.0,40.0,3301.0,,1,60000.0,2.0
4,6,2000,3,1,P,49,21,1,1.0,19.0,...,38,1,1,0.0,12.0,,,36,4440.0,


### Encoding the data to make it easier to read

The original dataset replaced the numerical categorical values with actual word categories. 

In order to be as similar to the original as possible, we will do the same.

First, we replace column names with words instead of codes.

In [3]:
# replace column names
df_2019 = df_2019.rename(columns = 
                         {'DIVISION': 'Geographic-division',
                          'PUMA': 'area-code',
                          'REGION': 'region',
                          'ST': 'state',
                          'PWGTP': 'person-weight',
                          'AGEP': 'age', 
                          'CIT': 'citizenship-status',
                          'COW': 'class-worker',
                          'SCHL': 'education',
                          'MAR': 'marital-status',
                          'OCCP': 'occupation',
                          'RELSHIPP': 'relationship',
                          'RAC1P': 'race',
                          'SEX': 'sex',
                          'INTP': 'extra-income',
                          'WKHP': 'hours-per-week',
                          'FOD1P': 'field-of-degree',
                          'POBP': 'place-of-birth',
                          'PINCP': 'income',
                          'SCIENGP': 'stem-degree'}, inplace = False)

# drop field of degree and record type 
df_2019 = df_2019.drop(columns=['FOD2P'])

df_2019.head()

Unnamed: 0,Geographic-division,area-code,region,state,RT,person-weight,age,citizenship-status,class-worker,education,...,occupation,relationship,race,sex,extra-income,hours-per-week,field-of-degree,place-of-birth,income,stem-degree
0,6,1100,3,1,P,20,19,1,2.0,19.0,...,5240.0,38,2,1,0.0,32.0,,1,1400.0,
1,6,2100,3,1,P,61,39,1,4.0,21.0,...,3602.0,37,1,2,0.0,40.0,3301.0,1,60000.0,2.0
2,6,301,3,1,P,89,19,1,1.0,19.0,...,4720.0,38,2,1,0.0,31.0,,1,390.0,
3,6,2100,3,1,P,61,39,1,4.0,21.0,...,3602.0,37,1,2,0.0,40.0,3301.0,1,60000.0,2.0
4,6,2000,3,1,P,49,21,1,1.0,19.0,...,4150.0,38,1,1,0.0,12.0,,36,4440.0,


## Changing values from number-category to string-category 

For now, this is the same as the original dataset, but the values have changed between 1994 and 2019. The class of worker categories are slightly different - including nonprofit employment, for example. 

Education categories include all grades separated out, along with grade12-nograd, and some-college split out between less than 1 year and more than 1 year. Associates degrees are not split by academic or vocational anymore. It's unknown if these differences were present in the data or changed by the creators of the dataset.


#### Occupation and place of birth are both huge lists and will be tackled later on.

In [4]:
# replace numerical categories with string categories

# geographic-division
df_2019["Geographic-division"].replace({0: "Puerto Rico", 1: "New England",
                                        2: "Middle Atlantic", 3: "East North Central",
                                        4: "West North Central", 5: "South Atlantic",
                                        6: "East South Central", 7: "West South Central",
                                        8: "Mountain", 9: "Pacific"}, inplace=True)

# region
df_2019["region"].replace({1: "Northeast", 2: "Midwest",
                            3: "South", 4: "West",
                            9: "Puerto Rico"}, inplace=True)

# state
df_2019["state"].replace({1: "AL",
                         2: "AK",
                         4: "AZ",
                         5: "AR", 
                         6: "CA",
                         8: "CO", 
                         9: "CT", 
                        10: "DE",
                        11: "DC",
                        12: "FL", 
                        13: "GA",
                        15: "HI",
                        16: "ID",
                        17: "IL",
                        18: "IN",
                        19: "IA",
                        20: "KS",
                        21: "KY",
                        22: "LA",
                        23: "ME",
                        24: "MD",
                        25: "MA",
                        26: "MI", 
                        27: "MN",
                        28: "MS", 
                        29: "MO",
                        30: "MT", 
                        31: "NE", 
                        32: "NV", 
                        33: "NH",     
                        34: "NJ",
                        35: "NM", 
                        36: "NY",
                        37: "NC",
                        38: "ND", 
                        39: "OH",
                        40: "OK",
                        41: "OR",
                        42: "PA",
                        44: "RI",
                        45: "SC",
                        46: "SD",
                        47: "TN",
                        48: "TX",
                        49: "UT",
                        50: "VT",
                        51: "VA",
                        53: "WA",
                        54: "WV",
                        55: "WI",
                        56: "WY",
                        72: "PR"}, inplace=True)

# citizenship-status
df_2019["citizenship-status"].replace({1: "Born-US", 2: "Born-PR-Guam-USvirginislands-northernmarianas",
                                    3: "Born-abroad-US-parents", 4: "naturalized-citizen",
                                    5: "not-US-citizen"}, inplace=True)

# class-worker
df_2019["class-worker"].replace({1.0: "Private-prof", 2.0: "Private-nonprof",
                                    3.0: "Local-gov", 4.0: "State-gov",
                                    5.0: "Fed-gov", 6.0: "Self-emp-not-inc",
                                    7.0: "Self-emp-inc", 8.0: "Without-pay",
                                    9.0: "Never-worked"}, inplace=True)

# education
df_2019["education"].replace({1.0: "no-school", 2.0: "preschool",
                                    3.0: "kindergarten", 4.0: "grade1",
                                    5.0: "grade2", 6.0: "grade3",
                                    7.0: "grade4", 8.0: "grade5",
                                    9.0: "grade6", 10.0: "grade7",
                                    11.0: "grade8", 12.0: "grade9",
                                    13.0: "grade10", 14.0: "grade11",
                                    15.0: "grade12-nograd", 16.0: "HSgrad",
                                    17.0: "GED", 18.0: "some-college-less1",
                                    19.0: "some-college-great1", 20.0: "associate",
                                    21.0: "bachelor", 22.0: "master",
                                    23.0: "prof-school", 24.0: "doctorate"}, inplace=True)

# marital status
df_2019["marital-status"].replace({1.0: "Married", 2.0: "Widowed",
                                    3.0: "Divorced", 4.0: "Separated",
                                    5.0: "Never-married"}, inplace=True)

# occupation
df_2019["occupation"].replace({10: "MGR-Chief Executives And Legislators",
                                    20: "MGR-General And Operations Managers",
                                    40: "MGR-Advertising And Promotions Managers",
                                    51: "MGR-Marketing Managers",
                                    52: "MGR-Sales Managers",
                                    60: "MGR-Public Relations And Fundraising Managers",
                                    101: "MGR-Administrative Services Managers",
                                    102: "MGR-Facilities Managers",
                                    110: "MGR-Computer And Information Systems Managers",
                                    120: "MGR-Financial Managers",
                                    135: "MGR-Compensation And Benefits Managers",
                                    136: "MGR-Human Resources Managers",
                                    137: "MGR-Training And Development Managers",
                                    140: "MGR-Industrial Production Managers",
                                    150: "MGR-Purchasing Managers",
                                    160: "MGR-Transportation, Storage, And Distribution Managers",
                                    205: "MGR-Farmers, Ranchers, And Other Agricultural Managers",
                                    220: "MGR-Construction Managers",
                                    230: "MGR-Education And Childcare Administrators",
                                    300: "MGR-Architectural And Engineering Managers",
                                    310: "MGR-Food Service Managers",
                                    335: "MGR-Entertainment and Recreation Managers",
                                    340: "MGR-Lodging Managers",
                                    350: "MGR-Medical And Health Services Managers",
                                    360: "MGR-Natural Sciences Managers",
                                    410: "MGR-Property, Real Estate, And Community Association Managers",
                                    420: "MGR-Social And Community Service Managers",
                                    425: "MGR-Emergency Management Directors",
                                    440: "MGR-Other Managers",
                                    500: "BUS-Agents And Business Managers Of Artists, Performers, And Athletes",
                                    510: "BUS-Buyers And Purchasing Agents, Farm Products",
                                    520: "BUS-Wholesale And Retail Buyers, Except Farm Products",
                                    530: "BUS-Purchasing Agents, Except Wholesale, Retail, And Farm Products",
                                    540: "BUS-Claims Adjusters, Appraisers, Examiners, And Investigators",
                                    565: "BUS-Compliance Officers",
                                    600: "BUS-Cost Estimators",
                                    630: "BUS-Human Resources Workers",
                                    640: "BUS-Compensation, Benefits, And Job Analysis Specialists",
                                    650: "BUS-Training And Development Specialists",
                                    700: "BUS-Logisticians",
                                    705: "BUS-Project Management Specialists",
                                    710: "BUS-Management Analysts",
                                    725: "BUS-Meeting, Convention, And Event Planners",
                                    726: "BUS-Fundraisers",
                                    735: "BUS-Market Research Analysts And Marketing Specialists",
                                    750: "BUS-Business Operations Specialists, All Other",
                                    800: "FIN-Accountants And Auditors",
                                    810: "FIN-Property Appraisers and Assessors",
                                    820: "FIN-Budget Analysts",
                                    830: "FIN-Credit Analysts",
                                    845: "FIN-Financial And Investment Analysts",
                                    850: "FIN-Personal Financial Advisors",
                                    860: "FIN-Insurance Underwriters",
                                    900: "FIN-Financial Examiners",
                                    910: "FIN-Credit Counselors And Loan Officers",
                                    930: "FIN-Tax Examiners And Collectors, And Revenue Agents",
                                    940: "FIN-Tax Preparers",
                                    960: "FIN-Other Financial Specialists",
                                    1005: "CMM-Computer And Information Research Scientists",
                                    1006: "CMM-Computer Systems Analysts",
                                    1007: "CMM-Information Security Analysts",
                                    1010: "CMM-Computer Programmers",
                                    1021: "CMM-Software Developers",
                                    1022: "CMM-Software Quality Assurance Analysts and Testers",
                                    1031: "CMM-Web Developers",
                                    1032: "CMM-Web And Digital Interface Designers",
                                    1050: "CMM-Computer Support Specialists",
                                    1065: "CMM-Database Administrators and Architects",
                                    1105: "CMM-Network And Computer Systems Administrators",
                                    1106: "CMM-Computer Network Architects",
                                    1108: "CMM-Computer Occupations, All Other",
                                    1200: "CMM-Actuaries",
                                    1220: "CMM-Operations Research Analysts",
                                    1240: "CMM-Other Mathematical Science Occupations",
                                    1305: "ENG-Architects, Except Landscape And Naval",
                                    1306: "ENG-Landscape Architects",
                                    1310: "ENG-Surveyors, Cartographers, And Photogrammetrists",
                                    1320: "ENG-Aerospace Engineers",
                                    1340: "ENG-Biomedical And Agricultural Engineers",
                                    1350: "ENG-Chemical Engineers",
                                    1360: "ENG-Civil Engineers",
                                    1400: "ENG-Computer Hardware Engineers",
                                    1410: "ENG-Electrical And Electronics Engineers",
                                    1420: "ENG-Environmental Engineers",
                                    1430: "ENG-Industrial Engineers, Including Health And Safety",
                                    1440: "ENG-Marine Engineers And Naval Architects",
                                    1450: "ENG-Materials Engineers",
                                    1460: "ENG-Mechanical Engineers",
                                    1520: "ENG-Petroleum, Mining And Geological Engineers, Including Mining Safety Engineers",
                                    1530: "ENG-Other Engineers",
                                    1541: "ENG-Architectural And Civil Drafters",
                                    1545: "ENG-Other Drafters",
                                    1551: "ENG-Electrical And Electronic Engineering Technologists and Technicians",
                                    1555: "Other Engineering Technologists And Technicians, Except Drafters",
                                    1560: "ENG-Surveying And Mapping Technicians",
                                    1600: "SCI-Agricultural And Food Scientists",
                                    1610: "SCI-Biological Scientists",
                                    1640: "SCI-Conservation Scientists And Foresters",
                                    1650: "SCI-Other Life Scientists",
                                    1700: "SCI-Astronomers And Physicists",
                                    1710: "SCI-Atmospheric And Space Scientists",
                                    1720: "SCI-Chemists And Materials Scientists",
                                    1745: "SCI-Environmental Scientists And Specialists, Including Health",
                                    1750: "SCI-Geoscientists And Hydrologists, Except Geographers",
                                    1760: "SCI-Physical Scientists, All Other",
                                    1800: "SCI-Economists",
                                    1821: "SCI-Clinical And Counseling Psychologists",
                                    1822: "SCI-School Psychologists",
                                    1825: "SCI-Other Psychologists",
                                    1840: "SCI-Urban And Regional Planners",
                                    1860: "SCI-Other Social Scientists",
                                    1900: "SCI-Agricultural And Food Science Technicians",
                                    1910: "SCI-Biological Technicians",
                                    1920: "SCI-Chemical Technicians",
                                    1935: "SCI-Environmental Science and Geoscience Technicians, And Nuclear Technicians",
                                    1970: "SCI-Other Life, Physical, And Social Science Technicians",
                                    1980: "SCI-Occupational Health And Safety Specialists and Technicians",
                                    2001: "CMS-Substance Abuse And Behavioral Disorder Counselors",
                                    2002: "CMS-Educational, Guidance, And Career Counselors And Advisors",
                                    2003: "CMS-Marriage And Family Therapists",
                                    2004: "CMS-Mental Health Counselors",
                                    2005: "CMS-Rehabilitation Counselors",
                                    2006: "CMS-Counselors, All Other",
                                    2011: "CMS-Child, Family, And School Social Workers",
                                    2012: "CMS-Healthcare Social Workers",
                                    2013: "CMS-Mental Health And Substance Abuse Social Workers",
                                    2014: "CMS-Social Workers, All Other",
                                    2015: "CMS-Probation Officers And Correctional Treatment Specialists",
                                    2016: "CMS-Social And Human Service Assistants",
                                    2025: "CMS-Other Community and Social Service Specialists",
                                    2040: "CMS-Clergy",
                                    2050: "CMS-Directors, Religious Activities And Education",
                                    2060: "CMS-Religious Workers, All Other",
                                    2100: "LGL-Lawyers, And Judges, Magistrates, And Other Judicial Workers",
                                    2105: "LGL-Judicial Law Clerks",
                                    2145: "LGL-Paralegals And Legal Assistants",
                                    2170: "LGL-Title Examiners, Abstractors, and Searchers",
                                    2180: "LGL-Legal Support Workers, All Other",
                                    2205: "EDU-Postsecondary Teachers",
                                    2300: "EDU-Preschool And Kindergarten Teachers",
                                    2310: "EDU-Elementary And Middle School Teachers",
                                    2320: "EDU-Secondary School Teachers",
                                    2330: "EDU-Special Education Teachers",
                                    2350: "EDU-Tutors",
                                    2360: "EDU-Other Teachers and Instructors",
                                    2400: "EDU-Archivists, Curators, And Museum Technicians",
                                    2435: "EDU-Librarians And Media Collections Specialists",
                                    2440: "EDU-Library Technicians",
                                    2545: "EDU-Teaching Assistants",
                                    2555: "EDU-Other Educational Instruction and Library Workers",
                                    2600: "ENT-Artists And Related Workers",
                                    2631: "ENT-Commercial And Industrial Designers",
                                    2632: "ENT-Fashion Designers",
                                    2633: "ENT-Floral Designers",
                                    2634: "ENT-Graphic Designers",
                                    2635: "ENT-Interior Designers",
                                    2636: "ENT-Merchandise Displayers And Windows Trimmers",
                                    2640: "ENT-Other Designers",
                                    2700: "ENT-Actors",
                                    2710: "ENT-Producers And Directors",
                                    2721: "ENT-Athletes and Sports Competitors",
                                    2722: "ENT-Coaches and Scouts",
                                    2723: "ENT-Umpires, Referees, And Other Sports Officials",
                                    2740: "ENT-Dancers And Choreographers",
                                    2751: "ENT-Music Directors and Composers",
                                    2752: "ENT-Musicians and Singers",
                                    2755: "ENT-Disc Jockeys, Except Radio",
                                    2770: "ENT-Entertainers And Performers, Sports and Related Workers, All Other",
                                    2805: "ENT-Broadcast Announcers And Radio Disc Jockeys",
                                    2810: "ENT-News Analysts, Reporters And Correspondents",
                                    2825: "ENT-Public Relations Specialists",
                                    2830: "ENT-Editors",
                                    2840: "ENT-Technical Writers",
                                    2850: "ENT-Writers And Authors",
                                    2861: "ENT-Interpreters and Translators",
                                    2862: "ENT-Court Reporters and Simultaneous Captioners",
                                    2865: "ENT-Media And Communication Workers, All Other",
                                    2905: "ENT-Other Media And Communication Equipment Workers",
                                    2910: "ENT-Photographers",
                                    2920: "ENT-Television, Video, And Motion Picture Camera Operators And Editors",
                                    3000: "MED-Chiropractors",
                                    3010: "MED-Dentists",
                                    3030: "MED-Dietitians And Nutritionists",
                                    3040: "MED-Optometrists",
                                    3050: "MED-Pharmacists",
                                    3090: "MED-Physicians",
                                    3100: "MED-Surgeons",
                                    3110: "MED-Physician Assistants",
                                    3120: "MED-Podiatrists",
                                    3140: "MED-Audiologists",
                                    3150: "MED-Occupational Therapists",
                                    3160: "MED-Physical Therapists",
                                    3200: "MED-Radiation Therapists",
                                    3210: "MED-Recreational Therapists",
                                    3220: "MED-Respiratory Therapists",
                                    3230: "MED-Speech-Language Pathologists",
                                    3245: "MED-Other Therapists",
                                    3250: "MED-Veterinarians",
                                    3255: "MED-Registered Nurses",
                                    3256: "MED-Nurse Anesthetists",
                                    3258: "MED-Nurse Practitioners, And Nurse Midwives",
                                    3261: "MED-Acupuncturists",
                                    3270: "MED-Healthcare Diagnosing Or Treating Practitioners, All Other",
                                    3300: "MED-Clinical Laboratory Technologists And Technicians",
                                    3310: "MED-Dental Hygienists",
                                    3321: "MED-Cardiovascular Technologists and Technicians",
                                    3322: "MED-Diagnostic Medical Sonographers",
                                    3323: "MED-Radiologic Technologists And Technicians",
                                    3324: "MED-Magnetic Resonance Imaging Technologists",
                                    3330: "MED-Nuclear Medicine Technologists and Medical Dosimetrists",
                                    3401: "MED-Emergency Medical Technicians",
                                    3402: "MED-Paramedics",
                                    3421: "MED-Pharmacy Technicians",
                                    3422: "MED-Psychiatric Technicians",
                                    3423: "MED-Surgical Technologists",
                                    3424: "MED-Veterinary Technologists and Technicians",
                                    3430: "MED-Dietetic Technicians And Ophthalmic Medical Technicians",
                                    3500: "MED-Licensed Practical And Licensed Vocational Nurses",
                                    3515: "MED-Medical Records Specialists",
                                    3520: "MED-Opticians, Dispensing",
                                    3545: "MED-Miscellaneous Health Technologists and Technicians",
                                    3550: "MED-Other Healthcare Practitioners and Technical Occupations",
                                    3601: "HLS-Home Health Aides",
                                    3602: "HLS-Personal Care Aides",
                                    3603: "HLS-Nursing Assistants",
                                    3605: "HLS-Orderlies and Psychiatric Aides",
                                    3610: "HLS-Occupational Therapy Assistants And Aides",
                                    3620: "HLS-Physical Therapist Assistants And Aides",
                                    3630: "HLS-Massage Therapists",
                                    3640: "HLS-Dental Assistants",
                                    3645: "HLS-Medical Assistants",
                                    3646: "HLS-Medical Transcriptionists",
                                    3647: "HLS-Pharmacy Aides",
                                    3648: "HLS-Veterinary Assistants And Laboratory Animal Caretakers",
                                    3649: "HLS-Phlebotomists",
                                    3655: "HLS-Other Healthcare Support Workers",
                                    3700: "PRT-First-Line Supervisors Of Correctional Officers",
                                    3710: "PRT-First-Line Supervisors Of Police And Detectives",
                                    3720: "PRT-First-Line Supervisors Of Fire Fighting And Prevention Workers",
                                    3725: "PRT-First-Line Supervisors of Security And Protective Service Workers, All Other",
                                    3740: "PRT-Firefighters",
                                    3750: "PRT-Fire Inspectors",
                                    3801: "PRT-Bailiffs",
                                    3802: "PRT-Correctional Officers and Jailers",
                                    3820: "PRT-Detectives And Criminal Investigators",
                                    3840: "PRT-Fish And Game Wardens And Parking Enforcement Officers",
                                    3870: "PRT-Police Officers",
                                    3900: "PRT-Animal Control Workers",
                                    3910: "PRT-Private Detectives And Investigators",
                                    3930: "PRT-Security Guards And Gaming Surveillance Officers",
                                    3940: "PRT-Crossing Guards And Flaggers",
                                    3945: "PRT-Transportation Security Screeners",
                                    3946: "PRT-School Bus Monitors",
                                    3960: "PRT-Other Protective Service Workers",
                                    4000: "EAT-Chefs And Head Cooks",
                                    4010: "EAT-First-Line Supervisors Of Food Preparation And Serving Workers",
                                    4020: "EAT-Cooks",
                                    4030: "EAT-Food Preparation Workers",
                                    4040: "EAT-Bartenders",
                                    4055: "EAT-Fast Food And Counter Workers",
                                    4110: "EAT-Waiters And Waitresses",
                                    4120: "EAT-Food Servers, Nonrestaurant",
                                    4130: "EAT-Dining Room And Cafeteria Attendants And Bartender Helpers",
                                    4140: "EAT-Dishwashers",
                                    4150: "EAT-Hosts And Hostesses, Restaurant, Lounge, And Coffee Shop",
                                    4160: "EAT-Food Preparation and Serving Related Workers, All Other",
                                    4200: "CLN-First-Line Supervisors Of Housekeeping And Janitorial Workers",
                                    4210: "CLN-First-Line Supervisors Of Landscaping, Lawn Service, And Groundskeeping Workers",
                                    4220: "CLN-Janitors And Building Cleaners",
                                    4230: "CLN-Maids And Housekeeping Cleaners",
                                    4240: "CLN-Pest Control Workers",
                                    4251: "CLN-Landscaping And Groundskeeping Workers",
                                    4252: "CLN-Tree Trimmers and Pruners",
                                    4255: "CLN-Other Grounds Maintenance Workers",
                                    4330: "PRS-Supervisors Of Personal Care And Service Workers",
                                    4340: "PRS-Animal Trainers",
                                    4350: "PRS-Animal Caretakers",
                                    4400: "PRS-Gambling Services Workers",
                                    4420: "PRS-Ushers, Lobby Attendants, And Ticket Takers",
                                    4435: "PRS-Other Entertainment Attendants And Related Workers",
                                    4461: "PRS-Embalmers, Crematory Operators, And Funeral Attendants",
                                    4465: "PRS-Morticians, Undertakers, And Funeral Arrangers",
                                    4500: "PRS-Barbers",
                                    4510: "PRS-Hairdressers, Hairstylists, And Cosmetologists",
                                    4521: "PRS-Manicurists And Pedicurists",
                                    4522: "PRS-Skincare Specialists",
                                    4525: "PRS-Other Personal Appearance Workers",
                                    4530: "PRS-Baggage Porters, Bellhops, And Concierges",
                                    4540: "PRS-Tour And Travel Guides",
                                    4600: "PRS-Childcare Workers",
                                    4621: "PRS-Exercise Trainers And Group Fitness Instructors",
                                    4622: "PRS-Recreation Workers",
                                    4640: "PRS-Residential Advisors",
                                    4655: "PRS-Personal Care and Service Workers, All Other",
                                    4700: "SAL-First-Line Supervisors Of Retail Sales Workers",
                                    4710: "SAL-First-Line Supervisors Of Non-Retail Sales Workers",
                                    4720: "SAL-Cashiers",
                                    4740: "SAL-Counter And Rental Clerks",
                                    4750: "SAL-Parts Salespersons",
                                    4760: "SAL-Retail Salespersons",
                                    4800: "SAL-Advertising Sales Agents",
                                    4810: "SAL-Insurance Sales Agents",
                                    4820: "SAL-Securities, Commodities, And Financial Services Sales Agents",
                                    4830: "SAL-Travel Agents",
                                    4840: "SAL-Sales Representatives Of Services, Except Advertising, Insurance, Financial Services, And Travel",
                                    4850: "SAL-Sales Representatives, Wholesale And Manufacturing",
                                    4900: "SAL-Models, Demonstrators, And Product Promoters",
                                    4920: "SAL-Real Estate Brokers And Sales Agents",
                                    4930: "SAL-Sales Engineers",
                                    4940: "SAL-Telemarketers",
                                    4950: "SAL-Door-To-Door Sales Workers, News And Street Vendors, And Related Workers",
                                    4965: "SAL-Sales And Related Workers, All Other",
                                    5000: "OFF-First-Line Supervisors Of Office And Administrative Support Workers",
                                    5010: "OFF-Switchboard Operators, Including Answering Service",
                                    5020: "OFF-Telephone Operators",
                                    5040: "OFF-Communications Equipment Operators, All Other",
                                    5100: "OFF-Bill And Account Collectors",
                                    5110: "OFF-Billing And Posting Clerks",
                                    5120: "OFF-Bookkeeping, Accounting, And Auditing Clerks",
                                    5140: "OFF-Payroll And Timekeeping Clerks",
                                    5150: "OFF-Procurement Clerks",
                                    5160: "OFF-Tellers",
                                    5165: "OFF-Other Financial Clerks",
                                    5220: "OFF-Court, Municipal, And License Clerks",
                                    5230: "OFF-Credit Authorizers, Checkers, And Clerks",
                                    5240: "OFF-Customer Service Representatives",
                                    5250: "OFF-Eligibility Interviewers, Government Programs",
                                    5260: "OFF-File Clerks",
                                    5300: "OFF-Hotel, Motel, And Resort Desk Clerks",
                                    5310: "OFF-Interviewers, Except Eligibility And Loan",
                                    5320: "OFF-Library Assistants, Clerical",
                                    5330: "OFF-Loan Interviewers And Clerks",
                                    5340: "OFF-New Accounts Clerks",
                                    5350: "OFF-Correspondence Clerks And Order Clerks",
                                    5360: "OFF-Human Resources Assistants, Except Payroll And Timekeeping",
                                    5400: "OFF-Receptionists And Information Clerks",
                                    5410: "OFF-Reservation And Transportation Ticket Agents And Travel Clerks",
                                    5420: "OFF-Other Information And Records Clerks",
                                    5500: "OFF-Cargo And Freight Agents",
                                    5510: "OFF-Couriers And Messengers",
                                    5521: "OFF-Public Safety Telecommunicators",
                                    5522: "OFF-Dispatchers, Except Police, Fire, And Ambulance",
                                    5530: "OFF-Meter Readers, Utilities",
                                    5540: "OFF-Postal Service Clerks",
                                    5550: "OFF-Postal Service Mail Carriers",
                                    5560: "OFF-Postal Service Mail Sorters, Processors, And Processing Machine Operators",
                                    5600: "OFF-Production, Planning, And Expediting Clerks",
                                    5610: "OFF-Shipping, Receiving, And Inventory Clerks",
                                    5630: "OFF-Weighers, Measurers, Checkers, And Samplers, Recordkeeping",
                                    5710: "OFF-Executive Secretaries And Executive Administrative Assistants",
                                    5720: "OFF-Legal Secretaries and Administrative Assistants",
                                    5730: "OFF-Medical Secretaries and Administrative Assistants",
                                    5740: "OFF-Secretaries And Administrative Assistants, Except Legal, Medial, And Executive",
                                    5810: "OFF-Data Entry Keyers",
                                    5820: "OFF-Word Processors And Typists",
                                    5840: "OFF-Insurance Claims And Policy Processing Clerks",
                                    5850: "OFF-Mail Clerks And Mail Machine Operators, Except Postal Service",
                                    5860: "OFF-Office Clerks, General",
                                    5900: "OFF-Office Machine Operators, Except Computer",
                                    5910: "OFF-Proofreaders And Copy Markers",
                                    5920: "OFF-Statistical Assistants",
                                    5940: "OFF-Other Office And Administrative Support Workers",
                                    6005: "FFF-First-Line Supervisors Of Farming, Fishing, And Forestry Workers",
                                    6010: "FFF-Agricultural Inspectors",
                                    6040: "FFF-Graders And Sorters, Agricultural Products",
                                    6050: "FFF-Other Agricultural Workers",
                                    6115: "FFF-Fishing And Hunting Workers",
                                    6120: "FFF-Forest And Conservation Workers",
                                    6130: "FFF-Logging Workers",
                                    6200: "CON-First-Line Supervisors Of Construction Trades And Extraction Workers",
                                    6210: "CON-Boilermakers",
                                    6220: "CON-Brickmasons, Blockmasons, Stonemasons, And Reinforcing Iron And Rebar Workers",
                                    6230: "CON-Carpenters",
                                    6240: "CON-Carpet, Floor, And Tile Installers And Finishers",
                                    6250: "CON-Cement Masons, Concrete Finishers, And Terrazzo Workers",
                                    6260: "CON-Construction Laborers",
                                    6305: "CON-Construction Equipment Operators",
                                    6330: "CON-Drywall Installers, Ceiling Tile Installers, And Tapers",
                                    6355: "CON-Electricians",
                                    6360: "CON-Glaziers",
                                    6400: "CON-Insulation Workers",
                                    6410: "CON-Painters and Paperhangers",
                                    6441: "CON-Pipelayers",
                                    6442: "CON-Plumbers, Pipefitters, And Steamfitters",
                                    6460: "CON-Plasterers And Stucco Masons",
                                    6515: "CON-Roofers",
                                    6520: "CON-Sheet Metal Workers",
                                    6530: "CON-Structural Iron And Steel Workers",
                                    6540: "CON-Solar Photovoltaic Installers",
                                    6600: "CON-Helpers, Construction Trades",
                                    6660: "CON-Construction And Building Inspectors",
                                    6700: "CON-Elevator Installers And Repairers",
                                    6710: "CON-Fence Erectors",
                                    6720: "CON-Hazardous Materials Removal Workers",
                                    6730: "CON-Highway Maintenance Workers",
                                    6740: "CON-Rail-Track Laying And Maintenance Equipment Operators",
                                    6765: "CON-Other Construction And Related Workers",
                                    6800: "EXT-Derrick, Rotary Drill, And Service Unit Operators, And Roustabouts, Oil, Gas, And Mining",
                                    6825: "EXT-Surface Mining Machine Operators And Earth Drillers",
                                    6835: "EXT-Explosives Workers, Ordnance Handling Experts, and Blasters",
                                    6850: "EXT-Underground Mining Machine Operators",
                                    6950: "EXT-Other Extraction Workers",
                                    7000: "RPR-First-Line Supervisors Of Mechanics, Installers, And Repairers",
                                    7010: "RPR-Computer, Automated Teller, And Office Machine Repairers",
                                    7020: "RPR-Radio And Telecommunications Equipment Installers And Repairers",
                                    7030: "RPR-Avionics Technicians",
                                    7040: "RPR-Electric Motor, Power Tool, And Related Repairers",
                                    7100: "RPR-Other Electrical And Electronic Equipment Mechanics, Installers, And Repairers.",
                                    7120: "RPR-Electronic Home Entertainment Equipment Installers And Repairers",
                                    7130: "RPR-Security And Fire Alarm Systems Installers",
                                    7140: "RPR-Aircraft Mechanics And Service Technicians",
                                    7150: "RPR-Automotive Body And Related Repairers",
                                    7160: "RPR-Automotive Glass Installers And Repairers",
                                    7200: "RPR-Automotive Service Technicians And Mechanics",
                                    7210: "RPR-Bus And Truck Mechanics And Diesel Engine Specialists",
                                    7220: "RPR-Heavy Vehicle And Mobile Equipment Service Technicians And Mechanics",
                                    7240: "RPR-Small Engine Mechanics",
                                    7260: "RPR-Miscellaneous Vehicle And Mobile Equipment Mechanics, Installers, And Repairers",
                                    7300: "RPR-Control And Valve Installers And Repairers",
                                    7315: "RPR-Heating, Air Conditioning, And Refrigeration Mechanics And Installers",
                                    7320: "RPR-Home Appliance Repairers",
                                    7330: "RPR-Industrial And Refractory Machinery Mechanics",
                                    7340: "RPR-Maintenance And Repair Workers, General",
                                    7350: "RPR-Maintenance Workers, Machinery",
                                    7360: "RPR-Millwrights",
                                    7410: "RPR-Electrical Power-Line Installers And Repairers",
                                    7420: "RPR-Telecommunications Line Installers And Repairers",
                                    7430: "RPR-Precision Instrument And Equipment Repairers",
                                    7510: "RPR-Coin, Vending, And Amusement Machine Servicers And Repairers",
                                    7540: "RPR-Locksmiths And Safe Repairers",
                                    7560: "RPR-Riggers",
                                    7610: "RPR-Helpers--Installation, Maintenance, And Repair Workers",
                                    7640: "RPR-Other Installation, Maintenance, And Repair Workers",
                                    7700: "PRD-First-Line Supervisors Of Production And Operating Workers",
                                    7720: "PRD-Electrical, Electronics, And Electromechanical Assemblers",
                                    7730: "PRD-Engine And Other Machine Assemblers",
                                    7740: "PRD-Structural Metal Fabricators And Fitters",
                                    7750: "PRD-Other Assemblers And Fabricators",
                                    7800: "PRD-Bakers",
                                    7810: "PRD-Butchers And Other Meat, Poultry, And Fish Processing Workers",
                                    7830: "PRD-Food And Tobacco Roasting, Baking, And Drying Machine Operators And Tenders",
                                    7840: "PRD-Food Batchmakers",
                                    7850: "PRD-Food Cooking Machine Operators And Tenders",
                                    7855: "PRD-Food Processing Workers, All Other",
                                    7905: "PRD-Computer Numerically Controlled Tool Operators And Programmers",
                                    7925: "PRD-Forming Machine Setters, Operators, And Tenders, Metal And Plastic",
                                    7950: "PRD-Cutting, Punching, And Press Machine Setters, Operators, And Tenders, Metal And Plastic",
                                    8000: "Grinding, Lapping, Polishing, And Buffing Machine Tool",
                                    8025: "PRD-Other Machine Tool Setters, Operators, And Tenders, Metal and Plastic",
                                    8030: "PRD-Machinists",
                                    8040: "PRD-Metal Furnace Operators, Tenders, Pourers, And Casters",
                                    8100: "PRD-Model Makers, Patternmakers, And Molding Machine Setters, Metal And Plastic",
                                    8130: "PRD-Tool And Die Makers",
                                    8140: "PRD-Welding, Soldering, And Brazing Workers",
                                    8225: "PRD-Other Metal Workers And Plastic Workers",
                                    8250: "PRD-Prepress Technicians And Workers",
                                    8255: "PRD-Printing Press Operators",
                                    8256: "PRD-Print Binding And Finishing Workers",
                                    8300: "PRD-Laundry And Dry-Cleaning Workers",
                                    8310: "PRD-Pressers, Textile, Garment, And Related Materials",
                                    8320: "PRD-Sewing Machine Operators",
                                    8335: "PRD-Shoe And Leather Workers",
                                    8350: "PRD-Tailors, Dressmakers, And Sewers",
                                    8365: "PRD-Textile Machine Setters, Operators, And Tenders",
                                    8450: "PRD-Upholsterers",
                                    8465: "PRD-Other Textile, Apparel, And Furnishings Workers",
                                    8500: "PRD-Cabinetmakers And Bench Carpenters",
                                    8510: "PRD-Furniture Finishers",
                                    8530: "PRD-Sawing Machine Setters, Operators, And Tenders, Wood",
                                    8540: "PRD-Woodworking Machine Setters, Operators, And Tenders, Except Sawing",
                                    8555: "PRD-Other Woodworkers",
                                    8600: "PRD-Power Plant Operators, Distributors, And Dispatchers",
                                    8610: "PRD-Stationary Engineers And Boiler Operators",
                                    8620: "PRD-Water And Wastewater Treatment Plant And System Operators",
                                    8630: "PRD-Miscellaneous Plant And System Operators",
                                    8640: "PRD-Chemical Processing Machine Setters, Operators, And Tenders",
                                    8650: "PRD-Crushing, Grinding, Polishing, Mixing, And Blending Workers",
                                    8710: "PRD-Cutting Workers",
                                    8720: "PRD-Extruding, Forming, Pressing, And Compacting Machine Setters, Operators, And Tenders",
                                    8730: "PRD-Furnace, Kiln, Oven, Drier, And Kettle Operators And Tenders",
                                    8740: "PRD-Inspectors, Testers, Sorters, Samplers, And Weighers",
                                    8750: "PRD-Jewelers And Precious Stone And Metal Workers",
                                    8760: "PRD-Dental And Ophthalmic Laboratory Technicians And Medical Appliance Technicians",
                                    8800: "PRD-Packaging And Filling Machine Operators And Tenders",
                                    8810: "PRD-Painting Workers",
                                    8830: "PRD-Photographic Process Workers And Processing Machine Operators",
                                    8850: "PRD-Adhesive Bonding Machine Operators And Tenders",
                                    8910: "PRD-Etchers And Engravers",
                                    8920: "PRD-Molders, Shapers, And Casters, Except Metal And Plastic",
                                    8930: "PRD-Paper Goods Machine Setters, Operators, And Tenders",
                                    8940: "PRD-Tire Builders",
                                    8950: "PRD-Helpers-Production Workers",
                                    8990: "PRD-Miscellaneous Production Workers, Including Equipment Operators And Tenders",
                                    9005: "TRN-Supervisors Of Transportation And Material Moving Workers",
                                    9030: "TRN-Aircraft Pilots And Flight Engineers",
                                    9040: "TRN-Air Traffic Controllers And Airfield Operations Specialists",
                                    9050: "TRN-Flight Attendants",
                                    9110: "TRN-Ambulance Drivers And Attendants, Except Emergency Medical Technicians",
                                    9121: "TRN-Bus Drivers, School",
                                    9122: "TRN-Bus Drivers, Transit And Intercity",
                                    9130: "TRN-Driver/Sales Workers And Truck Drivers",
                                    9141: "TRN-Shuttle Drivers And Chauffeurs",
                                    9142: "TRN-Taxi Drivers",
                                    9150: "TRN-Motor Vehicle Operators, All Other",
                                    9210: "TRN-Locomotive Engineers And Operators",
                                    9240: "TRN-Railroad Conductors And Yardmasters",
                                    9265: "TRN-Other Rail Transportation Workers",
                                    9300: "TRN-Sailors And Marine Oilers, And Ship Engineers",
                                    9310: "TRN-Ship And Boat Captains And Operators",
                                    9350: "TRN-Parking Lot Attendants",
                                    9365: "TRN-Transportation Service Attendants",
                                    9410: "TRN-Transportation Inspectors",
                                    9415: "TRN-Passenger Attendants",
                                    9430: "TRN-Other Transportation Workers",
                                    9510: "TRN-Crane And Tower Operators",
                                    9570: "TRN-Conveyor, Dredge, And Hoist and Winch Operators",
                                    9600: "TRN-Industrial Truck And Tractor Operators",
                                    9610: "TRN-Cleaners Of Vehicles And Equipment",
                                    9620: "TRN-Laborers And Freight, Stock, And Material Movers, Hand",
                                    9630: "TRN-Machine Feeders And Offbearers",
                                    9640: "TRN-Packers And Packagers, Hand",
                                    9645: "TRN-Stockers And Order Fillers",
                                    9650: "TRN-Pumping Station Operators",
                                    9720: "TRN-Refuse And Recyclable Material Collectors",
                                    9760: "TRN-Other Material Moving Workers",
                                    9800: "MIL-Military Officer Special And Tactical Operations Leaders",
                                    9810: "MIL-First-Line Enlisted Military Supervisors",
                                    9825: "MIL-Military Enlisted Tactical Operations And Air/Weapons Specialists And Crew Members",
                                    9830: "MIL-Military, Rank Not Specified",
                                    9920: "Unemployed And Last Worked 5 Years Ago Or Earlier Or Never Worked"}, inplace=True)

# relationship
df_2019["relationship"].replace({0: "ref", 1: "spouse", 
                                     2: "child", 3: "adopted-child",
                                     4: "step-child", 5: "sibling", 
                                     6: "parent", 7: "grandchild", 
                                     8: "parent-inlaw", 9: "child-inlaw",
                                    10: "other-relative", 11: "boarder", 
                                    12: "house-room-mate", 13: "unmarried-partner", 
                                    14: "foster-child", 15: "non-relative-other",
                                    16: "institutional-group", 17: "noninstitutional-group"}, inplace=True)

# race
df_2019["race"].replace({1: "white-alone", 2: "black-aa-alone", 
                                     3: "american-indian-alone", 4: "alaska-native-alone",
                                     5: "indian-or-ak-native", 6: "asian-alone", 
                                     7: "native-hi-pac-isl", 8: "other-race-alone", 
                                     9: "two-or-more-races"}, inplace=True)


# sex
df_2019["sex"].replace({1: "male", 2: "female"}, inplace=True)

# field-of-degree
df_2019["field-of-degree"].replace({1100: 'General Agriculture',
                                      1101: 'Agriculture Production And Management',
                                      1102: 'Agricultural Economics',
                                      1103: 'Animal Sciences',
                                      1104: 'Food Science',
                                      1105: 'Plant Science And Agronomy',
                                      1106: 'Soil Science',
                                      1199: 'Miscellaneous Agriculture',
                                      1301: 'Environmental Science',
                                      1302: 'Forestry',
                                      1303: 'Natural Resources Management',
                                      1401: 'Architecture',
                                      1501: 'Area Ethnic And Civilization Studies',
                                      1901: 'Communications',
                                      1902: 'Journalism',
                                      1903: 'Mass Media',
                                      1904: 'Advertising And Public Relations',
                                      2001: 'Communication Technologies',
                                      2100: 'Computer And Information Systems',
                                      2101: 'Computer Programming And Data Processing',
                                      2102: 'Computer Science',
                                      2105: 'Information Sciences',
                                      2106: 'Computer Administration Management And Security',
                                      2107: 'Computer Networking And Telecommunications',
                                      2201: 'Cosmetology Services And Culinary Arts',
                                      2300: 'General Education',
                                      2301: 'Educational Administration And Supervision',
                                      2303: 'School Student Counseling',
                                      2304: 'Elementary Education',
                                      2305: 'Mathematics Teacher Education',
                                      2306: 'Physical And Health Education Teaching',
                                      2307: 'Early Childhood Education',
                                      2308: 'Science And Computer Teacher Education',
                                      2309: 'Secondary Teacher Education',
                                      2310: 'Special Needs Education',
                                      2311: 'Social Science Or History Teacher Education',
                                      2312: 'Teacher Education: Multiple Levels',
                                      2313: 'Language And Drama Education',
                                      2314: 'Art And Music Education',
                                      2399: 'Miscellaneous Education',
                                      2400: 'General Engineering',
                                      2401: 'Aerospace Engineering',
                                      2402: 'Biological Engineering',
                                      2403: 'Architectural Engineering',
                                      2404: 'Biomedical Engineering',
                                      2405: 'Chemical Engineering',
                                      2406: 'Civil Engineering',
                                      2407: 'Computer Engineering',
                                      2408: 'Electrical Engineering',
                                      2409: 'Engineering Mechanics Physics And Science',
                                      2410: 'Environmental Engineering',
                                      2411: 'Geological And Geophysical Engineering',
                                      2412: 'Industrial And Manufacturing Engineering',
                                      2413: 'Materials Engineering And Materials Science',
                                      2414: 'Mechanical Engineering',
                                      2415: 'Metallurgical Engineering',
                                      2416: 'Mining And Mineral Engineering',
                                      2417: 'Naval Architecture And Marine Engineering',
                                      2418: 'Nuclear Engineering',
                                      2419: 'Petroleum Engineering',
                                      2499: 'Miscellaneous Engineering',
                                      2500: 'Engineering Technologies',
                                      2501: 'Engineering And Industrial Management',
                                      2502: 'Electrical Engineering Technology',
                                      2503: 'Industrial Production Technologies',
                                      2504: 'Mechanical Engineering Related Technologies',
                                      2599: 'Miscellaneous Engineering Technologies',
                                      2601: 'Linguistics And Comparative Language And Literature',
                                      2602: 'French German Latin And Other Common Foreign Language',
                                      2603: 'Other Foreign Languages',
                                      2901: 'Family And Consumer Sciences',
                                      3201: 'Court Reporting',
                                      3202: 'Pre-Law And Legal Studies',
                                      3301: 'English Language And Literature',
                                      3302: 'Composition And Rhetoric',
                                      3401: 'Liberal Arts',
                                      3402: 'Humanities',
                                      3501: 'Library Science',
                                      3600: 'Biology',
                                      3601: 'Biochemical Sciences',
                                      3602: 'Botany',
                                      3603: 'Molecular Biology',
                                      3604: 'Ecology',
                                      3605: 'Genetics',
                                      3606: 'Microbiology',
                                      3607: 'Pharmacology',
                                      3608: 'Physiology',
                                      3609: 'Zoology',
                                      3611: 'Neuroscience',
                                      3699: 'Miscellaneous Biology',
                                      3700: 'Mathematics',
                                      3701: 'Applied Mathematics',
                                      3702: 'Statistics And Decision Science',
                                      3801: 'Military Technologies',
                                      4000: 'Multi/Interdisciplinary Studies',
                                      4001: 'Intercultural And International Studies',
                                      4002: 'Nutrition Sciences',
                                      4005: 'Mathematics And Computer Science',
                                      4006: 'Cognitive Science And Biopsychology',
                                      4007: 'Interdisciplinary Social Sciences',
                                      4101: 'Physical Fitness Parks Recreation And Leisure',
                                      4801: 'Philosophy And Religious Studies',
                                      4901: 'Theology And Religious Vocations',
                                      5000: 'Physical Sciences',
                                      5001: 'Astronomy And Astrophysics',
                                      5002: 'Atmospheric Sciences And Meteorology',
                                      5003: 'Chemistry',
                                      5004: 'Geology And Earth Science',
                                      5005: 'Geosciences',
                                      5006: 'Oceanography',
                                      5007: 'Physics',
                                      5008: 'Materials Science',
                                      5098: 'Multi-Disciplinary Or General Science',
                                      5102: 'Nuclear, Industrial Radiology, And Biological Technologies',
                                      5200: 'Psychology',
                                      5201: 'Educational Psychology',
                                      5202: 'Clinical Psychology',
                                      5203: 'Counseling Psychology',
                                      5205: 'Industrial And Organizational Psychology',
                                      5206: 'Social Psychology',                                    
                                      5299: 'Miscellaneous Psychology',
                                      5301: 'Criminal Justice And Fire Protection',
                                      5401: 'Public Administration',
                                      5402: 'Public Policy',
                                      5403: 'Human Services And Community Organization',
                                      5404: 'Social Work',
                                      5500: 'General Social Sciences',
                                      5501: 'Economics',
                                      5502: 'Anthropology And Archeology',
                                      5503: 'Criminology',
                                      5504: 'Geography',
                                      5505: 'International Relations',
                                      5506: 'Political Science And Government',
                                      5507: 'Sociology',
                                      5599: 'Miscellaneous Social Sciences',
                                      5601: 'Construction Services',
                                      5701: 'Electrical, Mechanical, And Precision Technologies And',
                                      5901: 'Transportation Sciences And Technologies',
                                      6000: 'Fine Arts',
                                      6001: 'Drama And Theater Arts',
                                      6002: 'Music',
                                      6003: 'Visual And Performing Arts',
                                      6004: 'Commercial Art And Graphic Design',
                                      6005: 'Film Video And Photographic Arts',
                                      6006: 'Art History And Criticism',
                                      6007: 'Studio Arts',
                                      6099: 'Miscellaneous Fine Arts',
                                      6100: 'General Medical And Health Services',
                                      6102: 'Communication Disorders Sciences And Services',
                                      6103: 'Health And Medical Administrative Services',
                                      6104: 'Medical Assisting Services',
                                      6105: 'Medical Technologies Technicians',
                                      6106: 'Health And Medical Preparatory Programs',
                                      6107: 'Nursing',
                                      6108: 'Pharmacy Pharmaceutical Sciences And Administration',
                                      6109: 'Treatment Therapy Professions',
                                      6110: 'Community And Public Health',
                                      6199: 'Miscellaneous Health Medical Professions',
                                      6200: 'General Business',
                                      6201: 'Accounting',
                                      6202: 'Actuarial Science',
                                      6203: 'Business Management And Administration',
                                      6204: 'Operations Logistics And E-Commerce',
                                      6205: 'Business Economics',
                                      6206: 'Marketing And Marketing Research',
                                      6207: 'Finance',
                                      6209: 'Human Resources And Personnel Management',
                                      6210: 'International Business',
                                      6211: 'Hospitality Management',
                                      6212: 'Management Information Systems And Statistics',
                                      6299: 'Miscellaneous Business & Medical Administration',
                                      6402: 'History',
                                      6403: 'United States History'}, inplace=True)



# place of birth
df_2019["place-of-birth"].replace({1: "Alabama/AL",
                                    2: "Alaska/AK",
                                    4: "Arizona/AZ",
                                    5: "Arkansas/AR",
                                    6: "California/CA",
                                    8: "Colorado/CO",
                                    9: "Connecticut/CT",
                                    10: "Delaware/DE",
                                    11: "District of Columbia/DC",
                                    12: "Florida/FL",
                                    13: "Georgia/GA",
                                    15: "Hawaii/HI",
                                    16: "Idaho/ID",
                                    17: "Illinois/IL",
                                    18: "Indiana/IN",
                                    19: "Iowa/IA",
                                    20: "Kansas/KS",
                                    21: "Kentucky/KY",
                                    22: "Louisiana/LA",
                                    23: "Maine/ME",
                                    24: "Maryland/MD",
                                    25: "Massachusetts/MA",
                                    26: "Michigan/MI",
                                    27: "Minnesota/MN",
                                    28: "Mississippi/MS",
                                    29: "Missouri/MO",
                                    30: "Montana/MT",
                                    31: "Nebraska/NE",
                                    32: "Nevada/NV",
                                    33: "New Hampshire/NH",
                                    34: "New Jersey/NJ",
                                    35: "New Mexico/NM",
                                    36: "New York/NY",
                                    37: "North Carolina/NC",
                                    38: "North Dakota/ND",
                                    39: "Ohio/OH",
                                    40: "Oklahoma/OK",
                                    41: "Oregon/OR",
                                    42: "Pennsylvania/PA",
                                    44: "Rhode Island/RI",
                                    45: "South Carolina/SC",
                                    46: "South Dakota/SD",
                                    47: "Tennessee/TN",
                                    48: "Texas/TX",
                                    49: "Utah/UT",
                                    50: "Vermont/VT",
                                    51: "Virginia/VA",
                                    53: "Washington/WA",
                                    54: "West Virginia/WV",
                                    55: "Wisconsin/WI",
                                    56: "Wyoming/WY",
                                    60: "American Samoa",
                                    66: "Guam",
                                    69: "Commonwealth of the Northern Mariana Islands",
                                    72: "Puerto Rico",
                                    78: "US Virgin Islands",
                                    100: "Albania",
                                    102: "Austria",
                                    103: "Belgium",
                                    104: "Bulgaria",
                                    105: "Czechoslovakia",
                                    106: "Denmark",
                                    108: "Finland",
                                    109: "France",
                                    110: "Germany",
                                    116: "Greece",
                                    117: "Hungary",
                                    118: "Iceland",
                                    119: "Ireland",
                                    120: "Italy",
                                    126: "Netherlands",
                                    127: "Norway",
                                    128: "Poland",
                                    129: "Portugal",
                                    130: "Azores Islands",
                                    132: "Romania",
                                    134: "Spain",
                                    136: "Sweden",
                                    137: "Switzerland",
                                    138: "United Kingdom, Not Specified",
                                    139: "England",
                                    140: "Scotland",
                                    142: "Northern Ireland",
                                    147: "Yugoslavia",
                                    148: "Czech Republic",
                                    149: "Slovakia",
                                    150: "Bosnia and Herzegovina",
                                    151: "Croatia",
                                    152: "Macedonia",
                                    154: "Serbia",
                                    156: "Latvia",
                                    157: "Lithuania",
                                    158: "Armenia",
                                    159: "Azerbaijan",
                                    160: "Belarus",
                                    161: "Georgia",
                                    162: "Moldova",
                                    163: "Russia",
                                    164: "Ukraine",
                                    165: "USSR",
                                    166: "Europe",
                                    167: "Kosovo",
                                    168: "Montenegro",
                                    169: "Other Europe, Not Specified",
                                    200: "Afghanistan",
                                    202: "Bangladesh",
                                    203: "Bhutan",
                                    205: "Myanmar",
                                    206: "Cambodia",
                                    207: "China",
                                    209: "Hong Kong",
                                    210: "India",
                                    211: "Indonesia",
                                    212: "Iran",
                                    213: "Iraq",
                                    214: "Israel",
                                    215: "Japan",
                                    216: "Jordan",
                                    217: "Korea",
                                    218: "Kazakhstan",
                                    219: "Kyrgyzstan",
                                    222: "Kuwait",
                                    223: "Laos",
                                    224: "Lebanon",
                                    226: "Malaysia",
                                    228: "Mongolia",
                                    229: "Nepal",
                                    231: "Pakistan",
                                    233: "Philippines",
                                    235: "Saudi Arabia",
                                    236: "Singapore",
                                    238: "Sri Lanka",
                                    239: "Syria",
                                    240: "Taiwan",
                                    242: "Thailand",
                                    243: "Turkey",
                                    245: "United Arab Emirates",
                                    246: "Uzbekistan",
                                    247: "Vietnam",
                                    248: "Yemen",
                                    249: "Asia",
                                    253: "South Central Asia, Not Specified",
                                    254: "Other Asia, Not Specified",
                                    300: "Bermuda",
                                    301: "Canada",
                                    303: "Mexico",
                                    310: "Belize",
                                    311: "Costa Rica",
                                    312: "El Salvador",
                                    313: "Guatemala",
                                    314: "Honduras",
                                    315: "Nicaragua",
                                    316: "Panama",
                                    321: "Antigua & Barbuda",
                                    323: "Bahamas",
                                    324: "Barbados",
                                    327: "Cuba",
                                    328: "Dominica",
                                    329: "Dominican Republic",
                                    330: "Grenada",
                                    332: "Haiti",
                                    333: "Jamaica",
                                    338: "St. Kitts-Nevis",
                                    339: "St. Lucia",
                                    340: "St. Vincent & the Grenadines",
                                    341: "Trinidad & Tobago",
                                    343: "West Indies",
                                    344: "Caribbean, Not Specified",
                                    360: "Argentina",
                                    361: "Bolivia",
                                    362: "Brazil",
                                    363: "Chile",
                                    364: "Colombia",
                                    365: "Ecuador",
                                    368: "Guyana",
                                    369: "Paraguay",
                                    370: "Peru",
                                    372: "Uruguay",
                                    373: "Venezuela",
                                    374: "South America",
                                    399: "Americas, Not Specified",
                                    400: "Algeria",
                                    407: "Cameroon",
                                    408: "Cabo Verde",
                                    412: "Congo",
                                    414: "Egypt",
                                    416: "Ethiopia",
                                    417: "Eritrea",
                                    420: "Gambia",
                                    421: "Ghana",
                                    423: "Guinea",
                                    425: "Ivory Coast",
                                    427: "Kenya",
                                    429: "Liberia",
                                    430: "Libya",
                                    436: "Morocco",
                                    440: "Nigeria",
                                    442: "Rwanda",
                                    444: "Senegal",
                                    447: "Sierra Leone",
                                    448: "Somalia",
                                    449: "South Africa",
                                    451: "Sudan",
                                    453: "Tanzania",
                                    454: "Togo",
                                    456: "Tunisia",
                                    457: "Uganda",
                                    459: "Democratic Republic of Congo (Zaire)",
                                    460: "Zambia",
                                    461: "Zimbabwe",
                                    462: "Africa",
                                    463: "South Sudan",
                                    464: "Northern Africa, Not Specified",
                                    467: "Western Africa, Not Specified",
                                    468: "Other Africa, Not Specified",
                                    469: "Eastern Africa, Not Specified",
                                    501: "Australia",
                                    508: "Fiji",
                                    511: "Marshall Islands",
                                    512: "Micronesia",
                                    515: "New Zealand",
                                    523: "Tonga",
                                    527: "Samoa",
                                    554: "Other US Island Areas, Oceania, Not Specified, or at Sea"}, inplace=True)



# stem-degree
df_2019["stem-degree"].replace({1: "Yes", 2: "No"}, inplace=True)


df_2019.head()

Unnamed: 0,Geographic-division,area-code,region,state,RT,person-weight,age,citizenship-status,class-worker,education,...,occupation,relationship,race,sex,extra-income,hours-per-week,field-of-degree,place-of-birth,income,stem-degree
0,East South Central,1100,South,AL,P,20,19,Born-US,Private-nonprof,some-college-great1,...,OFF-Customer Service Representatives,38,black-aa-alone,male,0.0,32.0,,Alabama/AL,1400.0,
1,East South Central,2100,South,AL,P,61,39,Born-US,State-gov,bachelor,...,HLS-Personal Care Aides,37,white-alone,female,0.0,40.0,English Language And Literature,Alabama/AL,60000.0,No
2,East South Central,301,South,AL,P,89,19,Born-US,Private-prof,some-college-great1,...,SAL-Cashiers,38,black-aa-alone,male,0.0,31.0,,Alabama/AL,390.0,
3,East South Central,2100,South,AL,P,61,39,Born-US,State-gov,bachelor,...,HLS-Personal Care Aides,37,white-alone,female,0.0,40.0,English Language And Literature,Alabama/AL,60000.0,No
4,East South Central,2000,South,AL,P,49,21,Born-US,Private-prof,some-college-great1,...,"EAT-Hosts And Hostesses, Restaurant, Lounge, A...",38,white-alone,male,0.0,12.0,,New York/NY,4440.0,


In [5]:
# create new csv of processed data
# this creates a csv that is ~150 MB - too big for Git
# df_2019.to_csv('2019_new-var_mapped_processed.csv',index=False)