In [1]:
import pandas as pd
import numpy as np

# People

In [2]:
people = pd.read_csv("data/people.csv", sep=";")

In [3]:
# remove lines that delimit comp., master, rec
people = people.dropna()
# replace HH with H
people.replace({'Class': {'HH': 'H'}}, inplace=True)
# cast level as int
people = people.astype({'Level': 'int32'})
# names as index
people.set_index('Name', inplace=True)

In [4]:
people['Class'].value_counts(dropna=False)

M     38
H     14
MH    13
L     10
Name: Class, dtype: int64

In [5]:
people['Level'].value_counts(dropna=False)

1    31
2    28
3    11
0     5
Name: Level, dtype: int64

In [6]:
people.to_csv('data_processed/people.csv')

# Boats

In [7]:
boats = pd.read_csv('data/boats.csv', sep=";", header=[1])
boats.drop(columns=["owner"], inplace=True)
boats.set_index('name', inplace=True)

In [8]:
replacments = {np.nan: 0, 'x': 1}
boats.replace({col: replacments for col in ['L', 'M', 'MH', 'H']}, inplace=True)

In [9]:
boats.to_csv('data_processed/boats.csv')

# Competitive

## Prefs

In [10]:
prefs = pd.read_csv('data/comp/time_prefs.csv', sep=";")
prefs = prefs.drop(columns=['Level', 'Class'])

# repeat names 3 times
prefs['Name'].fillna(method='ffill', inplace=True)
# multi-index cols with (day, time of day)
prefs = prefs.pivot(index='Name', columns='Preff', values=['M', 'T', 'W', 'Th', 'F', 'S'])
# drop people that have not given any prefs
prefs.dropna(how='all', inplace=True)
# replace with preferences values (-1 for a secondary choice, 0 for a first choice)
prefs.replace({'x': 0, 'X': 0, '0': -1, 'o': -1, 'O': -1}, inplace=True)

In [11]:
prefs.columns.set_names(['day', 'time'], inplace=True)

In [12]:
prefs.to_csv('data_processed/comp/time_prefs.csv')