In [93]:
import glob
import random
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import pandas as pd
import joblib
from numpy.lib.stride_tricks import sliding_window_view

## Add ratio control to dataset builder

In [285]:
def empGen(num_shifts,num_emps=False,ratio=False):
    if ratio:
        #e.g (3,5)
        lower= round(num_shifts / ratio[1])
        upper= round(num_shifts / ratio[0])

        i = 0
        employee_id = []
        e = random.randint(lower, upper)
        if num_emps:
            for i in range(num_emps):
                employee_id.append(''.join(np.random.randint(9,size=(6)).astype('str')))
        else:
            for i in range(e):
                employee_id.append(''.join(np.random.randint(9,size=(6)).astype('str')))

        if len(employee_id) == 1:
            employee_id.append(''.join(np.random.randint(9,size=(6)).astype('str')))
        
    else:    
        i = 0
        lower = round(num_shifts / 2.8)
        upper = round(num_shifts / 1.2)
        employee_id = []
        e = random.randint(lower, upper)
        if num_emps:
            for i in range(num_emps):
                employee_id.append(''.join(np.random.randint(9,size=(6)).astype('str')))
        else:
            for i in range(e):
                employee_id.append(''.join(np.random.randint(9,size=(6)).astype('str')))
    
    return employee_id

In [305]:
shifts = 10
emps = empGen(shifts,num_emps=False,ratio=(3,5))

print(shifts / len(emps))
print(emps)

3.3333333333333335
['781505', '764723', '510627']


In [None]:
def buildTestSet(n,min_shifts,max_shifts,num_emps=False,ratio=False):
    while i < n:
        try:
            schedule, pool = randomProblem(min_shifts=min_shifts,max_shifts=max_shifts,num_emps=num_emps,ratio=ratio)

            tstst = sorted(glob.glob("scheduling_problems/test_set/*.csv"))

            if tstst:
                id = int(tstst[-1].split('/')[2].split('_')[1].split('.')[0]) + 1
        
            else:
                id = 1

            ratio = len(schedule) / len(pool)

            if ratio > 1 and ratio < 3:
                i += 1
                schedule, pool = schedule.to_csv(f'scheduling_problems/test_set/schedule_{str(id).zfill(2)}.csv',index=False), \
                            pool.to_csv(f'scheduling_problems/test_set/pool_{str(id).zfill(2)}.csv',index=False) 
        
            else:
                continue
        
        except ValueError as verror:
            print(verror)
            break

        except NameError as nerror:
            print(nerror)
            break

        except:
            continue

In [208]:
num_shifts = 4
lower = round(num_shifts / 5)
upper = round(num_shifts / 2)
print(lower, upper)

1 2


## Fix test set lists

In [141]:
shifts_easy_ratio_mixed = [1,2,3,4,5]

test_set_lists = ['shifts_easy_ratio_mixed']

for i in test_set_lists:
    print(i)
    print(sum(eval(i)))

shifts_easy_ratio_mixed
15


In [138]:
subdir='shifts_medium_ratio_mixed'
tstst = sorted(glob.glob(f"scheduling_problems/test_set/{subdir}/*.csv"))

tstst[0]

'scheduling_problems/test_set/shifts_medium_ratio_mixed/pool_01.csv'

In [139]:
tstst = sorted(glob.glob(f"scheduling_problems/test_set/{subdir}/*.csv"))

tpi  = pd.DataFrame(columns=['Schedule','shifts', 'Pool', 'employees','nodes','ratio'])

for i in range(int(len(tstst)/2)):
    pool = tstst[i]
    schedule = tstst[i+(int(len(tstst) / 2))]

    pool, schedule = pd.read_csv(pool,dtype={'employee_id':'str'}), \
                pd.read_csv(schedule,dtype={'shift_id':'str'})
    
    Schedule = tstst[i].split('/')[3].split('_')[1].split('.')[0]
    shifts = int(len(schedule))
    Pool = tstst[i+(int(len(tstst) / 2))].split('/')[3].split('_')[1].split('.')[0]
    employees = int(len(pool))
    nodes = shifts + employees 
    ratio = shifts / employees

    tpi.loc[0 if pd.isnull(tpi.index.max()) else tpi.index.max() + 1] = [Schedule] + [shifts] + [Pool] + [employees] + [nodes] + [ratio]

tpi

Unnamed: 0,Schedule,shifts,Pool,employees,nodes,ratio
0,1,13,1,5,18,2.6
1,2,11,2,4,15,2.75
2,3,14,3,8,22,1.75
3,4,9,4,7,16,1.285714
4,5,12,5,7,19,1.714286
5,6,9,6,7,16,1.285714
6,7,11,7,6,17,1.833333
7,8,10,8,4,14,2.5
8,9,11,9,6,17,1.833333
9,10,13,10,5,18,2.6


In [130]:
max_shifts_per_day = 2
shift_day_of_week = []
days = ['Monday','Tuesday','Wednesday','Thursday','Friday']

shift_id = [1,2,3,4]
i=0
while i < len(shift_id):
#for i in range(len(shift_id)):
    choice = random.choice(days)
    print(choice)
    if shift_day_of_week.count(choice) < max_shifts_per_day + 1:
        shift_day_of_week.append(choice)
        i+=1
    print(i)


shift_day_of_week

Tuesday
1
Thursday
2
Friday
3
Thursday
4


['Tuesday', 'Thursday', 'Friday', 'Thursday']

In [116]:
test_data  = pd.DataFrame(columns=['problem','reward', 'model', 'seed'])
test_data.to_csv('test_data/test_data.csv',index=False)


## ENV TESTING

In [160]:
class SchedulingEnv(Env):
    """A personnel scheduling environment for OpenAI gym"""

    def __init__(self, pool, schedule, reward_type):
        sfEncodings = joblib.load('shiftFeatureEncoding.joblib')
        shifts = pd.get_dummies(schedule[['shift_id']],drop_first=True)
        sfEncoded =  sfEncodings.transform(schedule[['shift_day_of_week','shift_type']])
        shift_features = pd.DataFrame(sfEncoded, columns=sfEncodings.get_feature_names_out())
        schedule = pd.merge(shifts, shift_features, left_index=True, right_index=True)

        self.shift_features = schedule.shape[1]
        # OR self.shift_features = shift_features.shape[1]

        for i in pd.get_dummies(pool).columns.to_list():
            schedule[i] = 0
        
        self.schedule = schedule
        self.state = self.schedule.to_numpy()

        # sf_index may not be needed here
        sf_start = len(schedule)-1
        sf_end = schedule.shape[1] - len(pool)
        self.sf_index = (sf_start,sf_end)

        self.count_workers = len(pool)
        self.count_shifts = len(schedule)
        self.shift_number = 0
        self.reward_type = reward_type
        self.reward_step = 0
        self.cummulative_reward = 0
        self.cummulative_violations = 0
        
        # action space: Employees we can assign to shifts
        self.action_space = Discrete(self.count_workers)
        # observation space: the latest state matrix
        self.observation_space = Box(low=0, high=1, shape=(self.state.shape[0], self.state.shape[1]),\
                                     dtype=np.float64)


    def evaluateStep(self):
        """Check the last 2 shift assignments for constraint violations.
        For each employee, compare successive shifts.
        If an employee is assigned to b2b shifts, look up the relevant features.
        If the shifts are on the same day, record a constraint violation
        If the shifts are on successive days, evening then morning, records a constraint  violation.
        Else, no constraint violation.

        :param state: state matrix
        :type state: numpy.array
        :return: count of constraint violations
        :rtype: int
        """
        count_b2b_violation = 0

        for i in range(self.count_workers):
            assignments = self.state[self.reward_step-1:self.reward_step+1,self.shift_features:][:,i]
            if sum(assignments) > 1:
                shift_feats = self.state[self.reward_step-1:self.reward_step+1,self.count_shifts-1:self.shift_features]

                # just the features for day of week
                day_feats_1 = self.state[self.reward_step-1,self.count_shifts-1:self.shift_features-1]
                day_feats_2 = self.state[self.reward_step,self.count_shifts-1:self.shift_features-1]
                day1 = [np.where(day_feats_1==1)[0].item() + 2 if np.where(day_feats_1==1)[0].size != 0 else 1][0]
                day2 = [np.where(day_feats_2==1)[0].item() + 2 if np.where(day_feats_2==1)[0].size != 0 else 1][0]
                
                # shifts are on the same day = violation
                if day1 == day2:
                    count_b2b_violation += 1
                    #print(f"shift:{self.reward_step},employee:{i}, constraint1 violated")
                
                # if shifts are on successive days, evening -> morning = violation
                if day2 == day1+1:
                    # if shift 1 type = evening and shift 2 type = morning, record violation
                    count_b2b_violation += [1 if shift_feats[:,4][0] == 1 and shift_feats[:,4][1] == 0 else 0][0]
                    #if [1 if shift_feats[:,4][0] == 1 and shift_feats[:,4][1] == 0 else 0][0] == 1:
                        #print(f"shift:{self.reward_step},employee:{i}, constraint2 violated")

        return count_b2b_violation

    def evaluateSchedule(self):
        """Check a completed schedule for constraint violations.
        For each employee, apply sliding window to compare successive shifts.
        If an employee is assigned to b2b shifts, look up the relevant features.
        If the shifts are on the same day, record a constraint violation
        If the shifts are on successive days, evening then morning, records a constraint  violation.
        Else, no constraint violation.

        :param state: state matrix
        :type state: numpy.array
        :return: count of constraint violations
        :rtype: int
        """
        count_b2b_violation = 0

        for i in range(self.count_workers):
            # using sliding window to compare successive shifts
            # checks = a list of pairwise binary shift assignment features, from last - first
            checks = sliding_window_view(self.state[:,self.shift_features+i], 2)[::-1]
            #print(checks)
            # for each check
            for j,k in enumerate(checks):
                shift_id = abs(j - len(checks))-1
                # check for b2b shifts
                # 1 = assigned, 0 = not assigned, 2 = b2b
                if sum(k) > 1:
                    #print(f"employee:{i}, shift:{shift_id},{k}")
                    # get features for b2b shifts
                    shift_feats = self.state[shift_id:shift_id+2,self.count_shifts-1:self.shift_features]

                    # just the features for day of week
                    day_feats_1 = self.state[shift_id,self.count_shifts-1:self.shift_features-1]
                    day_feats_2 = self.state[shift_id+1,self.count_shifts-1:self.shift_features-1]
                    day1 = [np.where(day_feats_1==1)[0].item() + 2 if np.where(day_feats_1==1)[0].size != 0 else 1][0]
                    day2 = [np.where(day_feats_2==1)[0].item() + 2 if np.where(day_feats_2==1)[0].size != 0 else 1][0]
                    
                    # shifts are on the same day = violation
                    if day1 == day2:
                        count_b2b_violation += 1
                        #print(f"shift:{shift_id+1},employee:{i}, constraint1 violated")
                    
                    # if shifts are on successive days, evening -> morning = violation
                    if day2 == day1+1:
                        # if shift 1 type = evening and shift 2 type = morning, record violation
                        count_b2b_violation += [1 if shift_feats[:,4][0] == 1 and shift_feats[:,4][1] == 0 else 0][0]
                        #if [1 if shift_feats[:,4][0] == 1 and shift_feats[:,4][1] == 0 else 0][0] == 1:
                            #print(f"shift:{shift_id+1},employee:{i}, constraint2 violated")

        return count_b2b_violation


In [189]:
i = ('scheduling_problems/test_set/shifts_extrahard_ratio_mixed/pool_39.csv', 'scheduling_problems/test_set/shifts_extrahard_ratio_mixed/schedule_39.csv')

pool, schedule = pd.read_csv(f'{i[0]}',dtype={'employee_id':'str'}), \
                pd.read_csv(f'{i[1]}',dtype={'shift_id':'str'})

print(len(pool))
print(len(schedule))

14
19


In [190]:
schedule['shift_day_of_week'] = schedule['shift_day_of_week'].replace(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday'],[1, 2, 3, 4, 5])
schedule['shift_type'] = schedule['shift_type'].replace(['Morning', 'Evening'],[1, 2])

env = SchedulingEnv(pool, schedule, reward_type='Step_Bonus')

In [191]:
env.state

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
        0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,

In [192]:
env.state = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,],
                      [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,],
                      [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,],
                      [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,],
                      [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,],
                      [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,],
                      [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,],
                      [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,],
                      [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,],
                      [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,],
                      [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,],
                      [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,],
                      [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,],
                      [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,],
                      [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,],
                      [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,],
                      [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,],
                      [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,],
                      [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,]])

In [193]:
count_b2b_violation = env.evaluateSchedule()
count_b2b_violation

0

In [168]:
env.count_shifts

7

In [171]:
reward = 1 - (count_b2b_violation / (env.count_shifts-1))
reward

-0.16666666666666674

In [184]:
reward = (.5/(self.count_shifts-1)) - (count_b2b_violation * (1/(self.count_shifts-1)))

0.16666666666666666

In [185]:
(.5/(7-1))

0.08333333333333333

In [188]:
1 * (1/(7-1))

0.16666666666666666

In [174]:
total=0
for i in range(count_b2b_violation):
    reward = (1/(env.count_shifts-1)) - (2 * (1/(env.count_shifts-1)))
    print(reward)
    total+=reward

total

-0.16666666666666666
-0.16666666666666666
-0.16666666666666666
-0.16666666666666666


-0.6666666666666666

## test problem list

In [83]:
def testProbList(subset):
    test_set = sorted(glob.glob(f"scheduling_problems/test_set/{subset}/*.csv"))
    test_index = pd.read_csv(f"scheduling_problems/test_set_indexes/testproblemindex_{subset}.csv")
    problist = []

    for i in range(len(test_index)):
        s = str(int(test_index.iloc[i]['Schedule'])).zfill(2)
        p = str(int(test_index.iloc[i]['Pool'])).zfill(2)

        sp = f"scheduling_problems/test_set/{subset}/schedule_{s}.csv"
        pp = f"scheduling_problems/test_set/{subset}/pool_{p}.csv"

        problist.append((sp,pp))

    return problist


In [117]:
str(i[0].split('_')[-1].split('.')[0] + i[1].split('_')[-1].split('.')[0])

'0101'

In [84]:
for i in testProbList('shifts_easy_ratio_mixed'):
    print(i[0])

scheduling_problems/test_set/shifts_easy_ratio_mixed/schedule_01
scheduling_problems/test_set/shifts_easy_ratio_mixed/schedule_02
scheduling_problems/test_set/shifts_easy_ratio_mixed/schedule_03
scheduling_problems/test_set/shifts_easy_ratio_mixed/schedule_04
scheduling_problems/test_set/shifts_easy_ratio_mixed/schedule_05
scheduling_problems/test_set/shifts_easy_ratio_mixed/schedule_06
scheduling_problems/test_set/shifts_easy_ratio_mixed/schedule_07
scheduling_problems/test_set/shifts_easy_ratio_mixed/schedule_08
scheduling_problems/test_set/shifts_easy_ratio_mixed/schedule_09
scheduling_problems/test_set/shifts_easy_ratio_mixed/schedule_10
scheduling_problems/test_set/shifts_easy_ratio_mixed/schedule_11
scheduling_problems/test_set/shifts_easy_ratio_mixed/schedule_12
scheduling_problems/test_set/shifts_easy_ratio_mixed/schedule_13
scheduling_problems/test_set/shifts_easy_ratio_mixed/schedule_14
scheduling_problems/test_set/shifts_easy_ratio_mixed/schedule_15
scheduling_problems/test_

In [72]:
test_index.iloc[0]#['Schedule']
s = str(int(test_index.iloc[0]['Schedule'])).zfill(2)
p = str(int(test_index.iloc[0]['Pool'])).zfill(2)

dir=''
sp = f"scheduling_problems/test_set/{dir}/schedule_{s}"
pp = f"scheduling_problems/test_set/{dir}/pool_{p}"

(sp,pp)


'scheduling_problems/test_set//schedule_01'

In [69]:
test_set[51]

'scheduling_problems/test_set/schedule_02.csv'

In [54]:
test_set = sorted(glob.glob("scheduling_problems/test_set/*.csv"))
test_index = pd.read_csv("scheduling_problems/testproblemindex.csv")
test_index

for i in range(len(test_index):
    test_index.iloc
    {str(id).zfill(2)}

Unnamed: 0,Schedule,shifts,Pool,employees,nodes,ratio
0,1,3,1,2,5,1.5
1,2,3,2,2,5,1.5
2,3,7,3,3,10,2.333333
3,4,3,4,2,5,1.5
4,5,7,5,4,11,1.75
5,6,5,6,4,9,1.25
6,7,6,7,3,9,2.0
7,8,8,8,3,11,2.666667
8,9,5,9,4,9,1.25
9,10,8,10,4,12,2.0


In [53]:
loadTestProblem(num_shifts=False)

('scheduling_problems/test_set/pool_01.csv',
 'scheduling_problems/test_set/schedule_01.csv')

In [50]:
def loadTestProblem(n=False, num_shifts=False):
    tstst = sorted(glob.glob("scheduling_problems/test_set/*.csv"))

    try:
        if num_shifts:
            shfts = 0
            while not shfts == num_shifts:
                n = random.randint(1, (len(tstst) / 2))
                p = tstst[n-1]
                s = tstst[n+(int(len(tstst) / 2)-1)]

                schedule = pd.read_csv(f'{tstst[n+(int(len(tstst) / 2)-1)]}',dtype={'shift_id':'str'})
                if len(schedule) == num_shifts:
                    shfts = num_shifts

        else:
            n = random.randint(1, (len(tstst) / 2))
            p = tstst[n-1]
            s = tstst[n+(int(len(tstst) / 2)-1)]

        return (p,s)

    except ValueError as verror:
        print(verror)
        

    except NameError as nerror:
        print(nerror)
        

    except:
        print(f"No problem with {num_shifts} shifts.")

In [23]:
def testProblemIndex():
    tstst = sorted(glob.glob("scheduling_problems/test_set/*.csv"))

    tpi  = pd.DataFrame(columns=['Schedule','shifts', 'Pool', 'employees','nodes','ratio'])

    for i in range(int(len(tstst)/2)):
        pool = tstst[i]
        schedule = tstst[i+(int(len(tstst) / 2))]

        pool, schedule = pd.read_csv(pool,dtype={'employee_id':'str'}), \
                    pd.read_csv(schedule,dtype={'shift_id':'str'})
        
        Schedule = tstst[i].split('/')[2].split('_')[1].split('.')[0]
        shifts = int(len(schedule))
        Pool = tstst[i+(int(len(tstst) / 2))].split('/')[2].split('_')[1].split('.')[0]
        employees = int(len(pool))
        nodes = shifts + employees 
        ratio = shifts / employees

        tpi.loc[0 if pd.isnull(tpi.index.max()) else tpi.index.max() + 1] = [Schedule] + [shifts] + [Pool] + [employees] + [nodes] + [ratio]

    sdf = df.groupby(['shifts']).agg({
    'shifts': 'count',
    'nodes': 'mean',
    'ratio': 'mean',
    'employees': 'mean'
        })

    sdf.columns =['count','avg_nodes','avg_ratio','avg_employees']


    return tpi, sdf


In [39]:
 df = testProblemIndex()
 df

Unnamed: 0,Schedule,shifts,Pool,employees,nodes,ratio
0,1,3,1,2,5,1.5
1,2,3,2,2,5,1.5
2,3,7,3,3,10,2.333333
3,4,3,4,2,5,1.5
4,5,7,5,4,11,1.75
5,6,5,6,4,9,1.25
6,7,6,7,3,9,2.0
7,8,8,8,3,11,2.666667
8,9,5,9,4,9,1.25
9,10,8,10,4,12,2.0
