In [18]:
import csv
from itertools import product
import hashlib

In [19]:
# Define ranges for parameters
grades = [1, 2, 3, 4]
total_working_years = [1, 2, 4, 9, 14, 20]
years_at_company = [1, 2, 4, 9, 14, 20]
performance_ratings = [1, 2, 3, 4, 5]
average_ratings = [1, 2, 3, 4, 5]
years_since_last_promotion = [0, 2, 3, 5]
previous_hike_percent = [0, 5, 10, 14]
max_perf_rating = (max(performance_ratings))
min_perf_rating = (min(performance_ratings))            

In [20]:
# Generate all combinations
combinations = list(product(
    grades, 
    total_working_years, 
    years_at_company, 
    performance_ratings, 
    average_ratings, 
    years_since_last_promotion, 
    previous_hike_percent
))

In [21]:
# Generate a deterministic EmployeeID

def generate_employee_id(index):
    # Use a hash of the index for consistent but unique IDs
    hash_value = hashlib.sha256(str(index).encode()).hexdigest()
    return int(hash_value[:6], 16) % 1000000  # 6-digit ID

In [22]:
# Filter combinations to ensure logical consistency
def is_valid_combination(comb):
    grade, total_years, years_at_company, perf_rating, avg_rating, years_promo, prev_hike = comb
    # Years at company cannot exceed total working years
    if years_at_company > total_years:
        return False

    if grade == 1 and total_years > 4:
        return False
    if grade == 2 and total_years < 2 and total_years > 9:
        return False
    if grade == 3 and total_years < 9 and total_years > 14:
        return False
    if grade == 4 and total_years < 14:
        return False
    
    #years since last promotion should not exceed the number of years at company
    if (years_promo >= years_at_company):
        return False

    if years_at_company == 1 and avg_rating != perf_rating:
        return False
    if years_at_company > 1:
        if ((avg_rating * years_at_company) - perf_rating) >= max_perf_rating * (years_at_company -1):
            return False
        if ((avg_rating * years_at_company) - perf_rating) <= min_perf_rating *(years_at_company -1):
            return False

    if years_at_company == 1 and prev_hike > 0:
        return False

    if grade == 1 and prev_hike > 10:
        return False

    if grade == 2 and prev_hike > 14:
        return False

    if grade == 3 and prev_hike > 14:
        return False

    if grade == 4 and prev_hike > 14:
        return False

    return True

In [23]:
valid_combinations = [comb for comb in combinations if is_valid_combination(comb)]

In [24]:
# Write combinations to CSV
output_file = "employee_details.csv"
headers = [
    "EmployeeID","Grade", "TotalWorkingYears", "YearsAtCompany", "PerformanceRating", "AverageRating", 
    "YearsSinceLastPromotion", "PreviousHikePercent"
]

with open(output_file, mode="w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(headers)
    for idx, comb in enumerate(valid_combinations):
        employee_id = generate_employee_id(idx)
        writer.writerow([employee_id] + list(comb))

print(f"CSV file '{output_file}' with {len(valid_combinations)} rows generated successfully!")

CSV file 'employee_details.csv' with 6220 rows generated successfully!
