In [1]:
import pandas as pd

In [2]:
# Load the dataset
file_path = "ESS10SC-subset.csv"
job_sat = pd.read_csv(file_path)


In [3]:
# Drop unnecessary columns
clean_job_sat = job_sat.drop(columns=[
    "idno", "dweight", "pweight", "pspwght", "name", "essround", "edition",
    "proddate", "edlvhpl", "edlvdse", "prob", "stratum", "psu", "yrbrn",
    "crpdwk", "pdjobyr", "njbspv", "emplno"
])

In [4]:
# Apply value filters
filters = {
    "stfmjob": 66, "happy": 66, "inprdsc": 66, "health": 7, "hlthhmp": 7,
    "rlgdgr": 66, "brncntr": 3, "gndr": 3, "agea": 666, "rshpsts": 66,
    "domicil": 7, "edulvlb": 5555, "eduyrs": 66, "emplrel": 6, "wrkctra": 6,
    "estsz": 6, "wkdcorga": 66, "wkhtot": 666, "nacer2": 666, "tporgwk": 66,
    "uemp3m": 6, "hincsrca": 66, "hinctnta": 66, "emprelp": 6, "atncrse": 7,
    "trdawrk": 6, "jbprtfp": 6, "pfmfdjba": 6, "dcsfwrka": 6,
}

In [5]:
for col, threshold in filters.items():
    clean_job_sat = clean_job_sat[clean_job_sat[col] < threshold]

In [6]:
# Convert 'cntry' to categorical
clean_job_sat["cntry"] = clean_job_sat["cntry"].astype("category")

In [7]:
clean_job_sat

Unnamed: 0,cntry,anweight,happy,inprdsc,health,hlthhmp,rlgdgr,brncntr,gndr,agea,...,uemp3m,hincsrca,hinctnta,emprelp,atncrse,stfmjob,trdawrk,jbprtfp,pfmfdjba,dcsfwrka
0,DE,0.843071,8,2,3,2,8,1,1,56,...,2,1,9,1,2,8,4,4,4,2
4,DE,0.620281,9,4,1,3,3,1,1,41,...,2,2,9,2,1,6,2,3,2,3
9,DE,0.531075,8,3,2,3,5,1,2,58,...,2,2,9,2,1,7,3,3,3,2
19,DE,0.874986,8,2,4,1,0,1,1,55,...,2,1,8,2,1,8,4,1,1,2
21,DE,0.914222,7,4,1,3,8,1,2,44,...,2,1,7,1,1,10,3,1,4,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13054,SE,0.576273,6,4,2,3,7,1,2,45,...,1,1,9,1,1,9,3,4,2,3
13056,SE,0.403964,6,1,3,2,0,1,2,46,...,1,1,7,1,1,8,5,4,2,2
13063,SE,0.506704,7,3,2,3,0,1,1,49,...,1,1,7,1,2,8,3,2,2,2
13067,SE,0.553192,6,3,1,2,6,1,1,21,...,1,3,4,1,2,8,3,1,1,2


In [8]:
# Relationship status recoding
clean_job_sat["rshpsts"] = clean_job_sat["rshpsts"].map({
    1: "Legally married",
    2: "In a legally registered civil union",
    3: "Living with my partner - not legally recognised",
    4: "Living with my partner - legally recognised",
    5: "Legally separated",
    6: "Legally divorced/Civil union dissolved"
}).astype("category")

In [9]:
# Domicile recoding
clean_job_sat["domicil"] = clean_job_sat["domicil"].map({
    1: "A big city",
    2: "Suburbs or outskirts of big city",
    3: "Town or small city",
    4: "Country village",
    5: "Farm or home in countryside"
}).astype("category")

In [10]:
# Work contract recoding
clean_job_sat["wrkctra"] = clean_job_sat["wrkctra"].map({
    1: "Unlimited",
    2: "Limited",
    3: "No contract"
}).astype("category")

In [11]:

# Employment relation recoding
clean_job_sat["emplrel"] = clean_job_sat["emplrel"].map({
    1: "Employee",
    2: "Self-employed",
    3: "Working for own family business"
}).astype("category")

In [12]:
# NACER2 occupational category recoding
def recode_nacer2(val):
    if val in [1, 2, 3, 5, 6, 7, 8, 9, 41, 42, 43, 49, 50, 51, 52, 53, 80, 81]:
        return "Physical Work"
    elif val in range(10, 34):
        return "Manufacturing"
    elif val in list(range(58, 76)) + list(range(85, 89)):
        return "Intellectual Work"
    elif val in [35, 36, 37, 38, 39, 45, 46, 47, 55, 56, 57, 77, 78, 79, 82, 84] + list(range(90, 100)):
        return "Service & Administration"
    else:
        return "Missing/Other"

clean_job_sat["nacer2"] = clean_job_sat["nacer2"].apply(recode_nacer2).astype("category")