In [4]:
import pandas as pd
import numpy as np

# Load the dataset
file_path = '/content/challenge_dataset.csv'  # Replace with your file path
df = pd.read_csv(file_path)

# Remove the first row and first column
df = df.drop(df.index[0])  # Drop the first row
df = df.drop(df.columns[0], axis=1)  # Drop the first column

# Convert remaining data to numeric
df = df.apply(pd.to_numeric, errors='coerce')

# Convert DataFrame to NumPy array
data = df.to_numpy()

# Number of employees and skills
num_employees, num_skills = data.shape

# Initialize variables
group_assignment = np.random.rand(num_employees, num_skills)  # Continuous values between 0 and 1

# Gradient Descent parameters
learning_rate = 0.01
num_iterations = 1000

# Objective function: imbalance in skill totals
def objective_function(assignments):
    group1_totals = np.sum(assignments * data, axis=0)
    group2_totals = np.sum((1 - assignments) * data, axis=0)
    imbalance = np.sum(np.abs(group1_totals - group2_totals))
    return imbalance

# Gradient computation
def compute_gradients(assignments):
    group1_totals = np.sum(assignments * data, axis=0)
    group2_totals = np.sum((1 - assignments) * data, axis=0)
    gradient = -2 * (data * (group1_totals - group2_totals)) / num_employees
    return gradient

# Gradient Descent loop
for iteration in range(num_iterations):
    grad = compute_gradients(group_assignment)
    group_assignment -= learning_rate * grad
    group_assignment = np.clip(group_assignment, 0, 1)  # Ensure assignments are in [0, 1]

    # Print progress
    if iteration % 100 == 0:
        current_objective = objective_function(group_assignment)
        print(f"Iteration {iteration}, Objective Function Value: {current_objective}")

# Determine final groups
group1_assignment = (group_assignment > 0.5).astype(int)
group2_assignment = 1 - group1_assignment

group1_indices = np.where(np.sum(group1_assignment, axis=1) > 0)[0]
group2_indices = np.where(np.sum(group2_assignment, axis=1) > 0)[0]

print("Group 1 indices:", group1_indices)
print("Group 2 indices:", group2_indices)

# Validate the results
group1_data = data[group1_indices, :]
group2_data = data[group2_indices, :]

group1_totals = np.sum(group1_data, axis=0)
group2_totals = np.sum(group2_data, axis=0)

print("Group 1 totals for each skill:")
print(group1_totals)

print("Group 2 totals for each skill:")
print(group2_totals)

if np.allclose(group1_totals, group2_totals):
    print("The groups are approximately balanced in terms of skill totals.")
else:
    print("The groups are not perfectly balanced. Further adjustments may be needed.")


Iteration 0, Objective Function Value: 44313.323094156745
Iteration 100, Objective Function Value: 49347.0
Iteration 200, Objective Function Value: 49347.0
Iteration 300, Objective Function Value: 49347.0
Iteration 400, Objective Function Value: 49347.0
Iteration 500, Objective Function Value: 49347.0
Iteration 600, Objective Function Value: 49347.0
Iteration 700, Objective Function Value: 49347.0
Iteration 800, Objective Function Value: 49347.0
Iteration 900, Objective Function Value: 49347.0
Group 1 indices: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
 96 97 98]
Group 2 indices: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53