In [1]:
import numpy as np
import pandas as pd

np.random.seed(42)

n = 200

df = pd.DataFrame({
    'experience': np.random.randint(0, 21, n),              # 0–20 years
    'primary_skill': np.random.randint(0, 5, n),             # Encoded skills
    'certification': np.random.randint(0, 2, n),             # 0 = No, 1 = Yes
    'upskilling_last_year': np.random.randint(0, 2, n),      # 0 = No, 1 = Yes
    'industry': np.random.randint(0, 5, n),                  # Encoded industries
    'skill_demand': np.random.randint(1, 11, n),             # 1–10
    'industry_layoff_risk': np.round(np.random.uniform(0, 1, n), 2),
    'role_demand': np.random.randint(0, 3, n),               # 0 = Low, 1 = Medium, 2 = High
    'company_size': np.random.randint(0, 3, n),              # 0 = Small, 1 = Mid, 2 = Large
    'salary_band': np.random.randint(0, 3, n)                # 0 = Low, 1 = Mid, 2 = High
})

# ---- CREATE TARGET VARIABLE ----
risk_score = (
    (df['experience'] < 3).astype(int) +
    (df['certification'] == 0).astype(int) +
    (df['upskilling_last_year'] == 0).astype(int) +
    (df['skill_demand'] <= 4).astype(int) +
    (df['industry_layoff_risk'] > 0.6).astype(int) +
    (df['role_demand'] == 0).astype(int) +
    (df['company_size'] == 0).astype(int) +
    (df['salary_band'] == 0).astype(int)
)

df['Layoff_Risk'] = np.select(
    [risk_score <= 2, (risk_score > 2) & (risk_score <= 4), risk_score > 4],
    [0, 1, 2]   # 0 = Low, 1 = Medium, 2 = High
)

# ---- SAVE DATASET ----
df.to_csv("layoff_synthetic_200.csv", index=False)

# ---- VERIFY ----
df.head()
df['Layoff_Risk'].value_counts()


Layoff_Risk
1    110
0     63
2     27
Name: count, dtype: int64