In [None]:
import pandas as pd
# pd.options.mode.copy_on_write = False # from Pandas 3 on, this is True as default
import seaborn as sns
from pathlib import Path
#from cycler import cycler

from serv.servCore import user_scores, get_schedule_frame
from serv.servCore import get_b_e_quad_factor, get_b_e_step_factor, get_b_e_factor
from serv.servInterface import load_user_with_function, load_service_from_csv, export_service_to_csv
from serv.servTest import get_test_user
from serv.config import trans_dict, set_config

# ToDo logging

#default_cycler = (cycler(color=['r', 'g', 'b', 'y']) + cycler(linestyle=['-', '--', ':', '-.']))
sns.set_theme(rc={'figure.figsize':(10,6)})

#pd.__version__

# Definitions

1. function.csv is leading document regarding people and service
    a. Mitglieder.csv (from www.vereinsflieger.de) is NOT master document

## ToDo

1. Introduce logging \
2. Code structure: Review about outsourced functions to core

In [None]:
# path variables
vac_path = Path(r'vacation/*.*')
user_path = Path(r'vf_sources/functions.csv')
#serv_path = Path(r'vf_sources/Fluglehrer.csv') # single one
serv_paths = [Path(r'vf_sources/' + insert + '.csv') for insert in list(trans_dict.values())] # get list of all function csv lists, export vereinsflieger

#users = get_test_user(simple=True) # test user setup
users = load_user_with_function(user_path)
service_functions = list(users.keys()) # = list(trans_dict.keys())

# Schedule

In [None]:
# get schedule frame with dates
df_schedule = get_schedule_frame(start_date='2025-03-28', end_date='2025-11-02')

# add empty service columns
for col in service_functions: # get services from user definition with function
    df_schedule[col] = ''

#df_schedule
#df_schedule.to_excel('vacation.xlsx') # template vacation dates

In [None]:
# get vacation files (data)
vac_files = [vac_file for vac_file in Path(vac_path.parent).glob(vac_path.name)]
print('{} vacation files found:'.format(len(vac_files)), *vac_files, sep='\n')

In [None]:
#debug
#idx=0
#vac_files
#df_vac = pd.read_csv(vac_files[idx], sep=';', usecols=[0, 2])
#df_vac

In [None]:
# read vacation data from files
for vac_file in vac_files:
    df_vac = pd.read_csv(vac_file, sep=';', usecols=[0, 2]) # col must be 'date' and 'name'
    df_vac = df_vac.set_index('Date')
    
    #display(df_vac)
    #print(vac_file, df_vac.dtypes) # debug type error: str instead expected float
    
    df_schedule['Vacation', df_vac.columns[0]] = df_vac # add vacation data as column per name to schedule data frame

#df_schedule

In [None]:
#['{} {}'.format(p, list(trans_dict.keys())[i]) for i, p in enumerate(serv_paths)] 
# read all (predefined) service csv from vereinsflieger and write to list
df_services_in = [load_service_from_csv(p, function=list(trans_dict.keys())[i]) for i, p in enumerate(serv_paths)] 

#df_service_in = load_service_from_csv(serv_path)
#df_service_in

In [None]:
# set user data frame
user_frames = [pd.DataFrame(data = 0.0, index=users[key], columns=['score']) for key in users]
df_user = pd.concat(user_frames, keys=service_functions, names=['Function', 'Name'])

for key, value in user_scores.items():
    #print(key, value)
    df_user[key] = value

#df_user

In [None]:
# configuration
# test modifications for users to check functions
#df_user.loc['Winch'].at['Sand', 'max'] = 5
#df_user.loc['Winch'].at['Mas', 'max'] = 8
#df_user.loc['Winch'].at['Mas', 'weight'] = 0.8
#df_user.loc['Winch'].at['Mas', 'pref_day'] = 'Sun'
#df_user.loc['Winch'].at['Col', 'b-e'] = 0.7
#df_user
#df_user.loc['Pilot', 'max'] = 5.0

df_user = set_config(df_user)
df_user

In [None]:
# add predefined services to schedule df
for df_fct in df_services_in:
    print(df_fct.columns[0])
    display(df_fct)
    df_schedule[df_fct.columns[0]] = df_fct


#df_schedule['Teacher'] = df_service_in['Teacher'] # set predefined teacher function to schedule
#df_schedule

In [None]:
# analyse data frame
func_analyse = 'Pilot'
df_analyse = pd.DataFrame(data=0.0, columns=df_user.loc[func_analyse]['score'].index, index=list(range(0, df_schedule.shape[0])))#index=df_schedule.index)
#df_analyse

In [None]:
df_schedule

In [None]:
# fill schedule
# function: holidays, double, max, b-e
# Teacher can be skipped, hard coded
# ToDo: weight, pref_day
day = 0
last_idx = ''
predef2date = pd.to_datetime('04.05.2025', dayfirst=True) # date to wich the schedule is predefined from a e. g. first run
pilot_last_date = pd.to_datetime('31.08.2025', dayfirst=True)

# go through each day / row
# index is date
for idx, row in df_schedule.iterrows():
    
    day = day + 1
    print('DEBUG: (1. loop: day) {} {} | day = {}'.format(df_schedule.iloc[day-1][0], idx, day))
    if day >= 2:
        print('DEBUG: day before {} was {}'.format(df_schedule.iloc[day-1][0], df_schedule.iloc[day-2][0]))

    # go through functions
    for function in df_user.index.levels[0]:
        print('DEBUG: (2. loop: function)', function)
        
        current_date = pd.to_datetime(idx, dayfirst=True) # set current date 
        
        # predefined date, file from here (if available)
        if current_date <= predef2date:
            print('DEBUG: predefined schedule till: {} (current date = {})'.format(predef2date.date(), current_date.date()))
            continue
        
        # pilot end date, stop filling from here
        if current_date > pilot_last_date:
            print('DEBUG: pilot schedule ended from here: {} (current date = {})'.format(pilot_last_date.date(), current_date.date()))
            if 'Pilot' in function: 
                continue
        
        if 'Teacher' in function: # compleate season: schedule for function Teacher is predefined
            continue

        # get user list with lowest score first 
        for name in df_user.loc[function].sort_values('score').index:
            print('DEBUG: criteria lowest score (3. loop: name)', name)
            
            increment = 1.0
            
            # check if vacations information is available and if user on vacation
            if ('Vacation', name) in df_schedule.columns and row[('Vacation', name)] >= 0.0: # 1 => vacation = True
                print('INFO: {}: User {} for function {} is on vacation.'.format(idx, name, function))
            else:
                # check double service
                if name in df_schedule.loc[idx].values:
                    print(name, ' in', df_schedule.loc[idx].values)
                    print('INFO: {}: User {} for function {} is already in service.'.format(idx, name, function))
                else:
                    # check if day is preferred day
                    if df_user.loc[function].at[name, 'pref_day'] is not None:
                        if df_schedule.loc[idx, 'Weekday'] != df_user.loc[function].at[name, 'pref_day']:
                            print('INFO: continue for {}'.format(name))
                            continue
                    
                    # check if user is willing to do a complete weekend (Sat & Sun)
                    if not df_user.loc[function].at[name, 'comp_we'] and df_schedule.iloc[day-2][0] == 'Sat':
                        if df_schedule.iloc[day-2][function] == name:
                            print('INFO: continue for {}'.format(name))
                            continue
                    
                    # check max definition per user and modify score
                    if df_user.loc[function].at[name, 'max'] is not None:
                        if df_user.loc[function].at[name, 'max'] <= df_user.loc[function].at[name, 'stat']:
                            if df_user.loc[function].at[name, 'max'] == 0.0 or df_user.loc['Pilot', 'max'].sum() > df_user.loc['Pilot', 'stat'].sum():
                                print('INFO: continue for {}'.format(name))
                                continue                                
                            else:
                                df_schedule.at[idx, function] = '' # empty entry
                                print('INFO: Max number {} reached for user {} at function {}'.format(df_user.loc[function].at[name, 'stat'], name, function))
                                print('DEBUG: (max num break)')
                                break
                        else:
                            print('INFO: User {} for function {} has max = {} definition.'.format(name, function, df_user.loc[function].at[name, 'max']))
                            if df_user.loc[function]['max'].isna().sum() < 1:
                                pass
                            else:
                                # ((number of all days - sum of all max days of users) / number of users without max day definition + 1.0) / number of max day defintion of user
                                increment = ((df_schedule.shape[0] - df_user.loc[function]['max'].sum()) / df_user.loc[function]['max'].isna().sum() + 1.0) / df_user.loc[function].at[name, 'max']
                                print('INFO: Increment for user {} modified from 1.0 to {} (max function)'.format(name, increment))
                    
                    # check b-e (begin-end) definition for unequal distribution of services
                    if df_user.loc[function].at[name, 'b-e'] is not None:
                        # function input: day, base, max days, option level=1.0
                        factor = get_b_e_factor(day-1, df_user.loc[function].at[name, 'b-e'], df_schedule.shape[0], window=0.5)
                        if (factor != 1.0):
                            increment = increment * factor
                            print('INFO: Increment for user {} modified to {} (b-e function, factor = {})'.format(name, increment, factor))

                    # set weight factor
                    weight = 1.0 / df_user.loc[function].at[name, 'weight']
                    increment = increment * weight
                    if (weight != 1.0):
                        print('INFO: Increment for user {} modified to {} (weight function, factor = {})'.format(name, increment, weight))
                    
                    # set correction from mean value
#                    if df_user.loc[function].shape[0] < (0.5 * day):
#                        corr = (df_user.loc[function]['stat'].mean() / df_user.loc[function].at[name, 'stat'])
#                        if corr < 1.0:
#                            increment = increment * corr
#                            print('INFO: Correction factor = {} for user {} at function {}'.format(corr, name, function))#                            print('DEBUG: num users = {} | 2 * day = {}'.format(df_user.loc[function].shape[0], 0.5*day))
                    
                    # debug analyse
                    if function == func_analyse:
                        df_analyse.at[day, '{}_inc'.format(name)] = increment
                        df_analyse.at[day, name] = df_analyse.at[day, name] + 1
                    
                    # write data
                    df_user.loc[function].at[name, 'score'] = df_user.loc[function].at[name, 'score'] + increment # increment score
                    df_user.loc[function].at[name, 'stat'] = df_user.loc[function].at[name, 'stat'] + 1
                    print(idx, function, name, '({})'.format(df_user.loc[function].at[name, 'stat']))
                    df_schedule.at[idx, function] = name # set user to service at date
                    print('DEBUG: (4. break)')
                    break
    

# Analyse

In [None]:
#df_analyse['Maier, Christian Johannes_inc'] = None
df_analyse[df_user.loc[func_analyse]['score'].index].cumsum().plot(grid=True)
df_analyse.fillna(0.0)[['{}_inc'.format(n) for n in df_user.loc[func_analyse]['score'].index]].plot(grid=True)
#df_analyse.cumsum().plot(y=['Mas', 'Sand'], grid=True)

In [None]:
for func in service_functions:
    display(df_schedule[func].value_counts())

In [None]:
try:
    df_schedule.to_excel('output.xlsx')
    display(df_schedule)

except IOError as e:
    print("An error occurred:", e)

In [None]:
export_service_to_csv(df_schedule, *service_functions)

# Testing

In [None]:
#df_schedule.to_csv('output.csv')
#df_schedule.to_excel('output.xlsx')

In [None]:
#df_user_hist = df_schedule[func_analyse].hist(bins=df_user.loc[func_analyse].shape[0], xlabelsize=5)

In [None]:
for i in df_analyse.index:
    print(df_analyse.iloc[i])