<a href="https://colab.research.google.com/github/jinjingZ/Nonlinear_Productivity_Response_Model/blob/main/01_Model_Estimation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import matplotlib as plt
from scipy.optimize import curve_fit

In [2]:
# Step 1 Creating fake data for modeling
def contribution_scoring(hours, coeff_elasticity, coeff_shape):
    return  coeff_elasticity * (coeff_shape * hours)/ (1 + coeff_shape * hours)

np.random.seed(123)

days = 30
data = {'Mary Hours': np.random.uniform(5, 10, days),
        'John Hours': np.random.uniform(5, 10, days),
        'Jane Hours': np.random.uniform(5, 10, days)}
df   = pd.DataFrame(data)

# Actual Parameters
true_params = {'Mary':  [12, 4],
               'John':  [16, 0.4],
               'Jane':  [13, 1.5]}

# Generate Individual Contributions
for name in true_params.keys():
    df[f'{name} Contribution'] = contribution_scoring(df[f'{name} Hours'], * true_params[name])

# Compute Team Total Contribution
df['Team Contribution'] = df[[i for i in df.columns if 'Contribution' in i]].sum(axis=1)

In [5]:
# Step2 Prepare data for modeling
# Drop individual contributions(as we assume they're unkown)
df1 = df.drop(columns = [i for i in df.columns if 'Contribution' in i and 'Team' not in i])
df1['Date'] = pd.date_range(freq = 'D', periods = days, start = '2025-01-01')
df1.to_parquet('data.parquet')

In [9]:
# from google.colab import files
# files.download('data.parquet')
df1.head()

Unnamed: 0,Mary Hours,John Hours,Jane Hours,Team Contribution,Date
0,8.482346,5.460525,8.346569,34.670106,2025-01-01
1,6.430697,7.168506,7.929683,35.405621,2025-01-02
2,6.134257,7.154314,8.124518,35.401032,2025-01-03
3,7.756574,7.468425,8.373445,35.653948,2025-01-04
4,8.597345,7.129151,9.211712,35.629526,2025-01-05


In [17]:
X = df1.filter(like='Hours')
X_data = X.to_numpy().T
y_data = df1['Team Contribution'].values

In [10]:
# Step 3 Define model for estimation
def total_contribution_model(X, *params):
    num_people = X.shape[0]  # Number of individuals
    contributions = np.zeros(X.shape[1])  # Initialize total contributions (for each day)

    for i in range(num_people):
        elasticity = params[2 * i]      # Get elasticity coefficient for person i
        curve = params[2 * i + 1]       # Get curve coefficient for person i
        contributions += elasticity * (curve * X[i] / (1 + curve * X[i]))  # Apply transformation

    return contributions

In [23]:
# Step 4 Estimation
num_employee = X.shape[1]
initial_guesses = [10, 1] * num_employee

# Fit the model
popt, _ = curve_fit(total_contribution_model, X_data, y_data, p0=initial_guesses)

# Extract estimated parameters
employee_lst = [i.replace(' Hours', '') for i in X.columns]
estimated_params = {
    f"{name}": {"elasticity_coefficient": popt[2*i], "curve_coefficient": popt[2*i+1]}
    for i, name in enumerate(employee_lst)
}

estimated_params

{'Mary': {'elasticity_coefficient': 12.000000000001265,
  'curve_coefficient': 4.000000000000456},
 'John': {'elasticity_coefficient': 15.999999999999641,
  'curve_coefficient': 0.3999999999999816},
 'Jane': {'elasticity_coefficient': 12.999999999999154,
  'curve_coefficient': 1.499999999999871}}