In [1]:
import pandas
from datetime import datetime


# Prepare

In [2]:
def get_employee_data(source_path: str) -> pandas.DataFrame:
    """Reads employee data from a CSV file and returns it as a DataFrame.

    Args:
        source_path (str): The path to the CSV file containing employee data.

    Returns:
        pandas.DataFrame: A DataFrame containing the employee data with the specified schema.

    """
    schema = {
        'Emp-id': int,
        'Emp-name': str,
        'Emp-lastname': str
    }
    employee_df = pandas.read_csv(filepath_or_buffer=source_path, dtype=schema)
    
    return employee_df

In [3]:
def get_team_data(source_path: str) -> pandas.DataFrame:
    """Reads team data from a CSV file and returns it as a DataFrame.

    Args:
        source_path (str): The path to the CSV file containing team data.

    Returns:
        pandas.DataFrame: A DataFrame containing the team data with the specified schema.
    """
    schema = {
        'name': str,
        'type': str,
        'desc': str
    }
    team_df = pandas.read_csv(filepath_or_buffer=source_path, dtype=schema)
    
    return team_df

In [4]:
def get_team_history_data(source_path: str) -> pandas.DataFrame:
    """Reads team history data from a CSV file and returns it as a DataFrame.

    Args:
        source_path (str): The path to the CSV file containing team history data.

    Returns:
        pandas.DataFrame: A DataFrame containing the team history data with the specified schema.
    """
    schema = {
        'Start-date': str,
        'End-date': str,
        'type': str,
        'team': str,
        'Emp-id': int
    }
    date_columns = ['Start-date', 'End-date']
    team_history_df = pandas.read_csv(filepath_or_buffer=source_path, dtype=schema)

    for date_column in date_columns:
        team_history_df[date_column] = pandas.to_datetime(team_history_df[date_column], format='%Y-%m-%d')

    return team_history_df

In [5]:
def get_calendar_df(start_date: str, end_date: str) -> pandas.DataFrame:
    """Generates a DataFrame with a range of dates from start_date to end_date.

    Args:
        start_date (str): The start date in the format 'YYYY-MM-DD'.
        end_date (str): The end date in the format 'YYYY-MM-DD'.

    Returns:
        pandas.DataFrame: A DataFrame containing a column 'date' with dates from start_date to end_date.
    """
    start_date = datetime.strptime(start_date, "%Y-%m-%d")
    end_date = datetime.strptime(end_date, "%Y-%m-%d")
    date_range = pandas.date_range(start_date, end_date)
    date_df = pandas.DataFrame({'date': date_range})

    return date_df

In [6]:
def fill_na_history_df(history_df: pandas.DataFrame) -> pandas.DataFrame:
    """Fills NA values in the 'End-date' column of the history DataFrame.

    The function sorts the DataFrame by 'Emp-id' and 'Start-date' and then fills 
    the NA values in the 'End-date' column with the value of 'Start-date' of the 
    next row minus one day.

    Args:
        history_df (pandas.DataFrame): The history DataFrame with 'Emp-id', 'Start-date', and 'End-date' columns.

    Returns:
        pandas.DataFrame: The cleaned history DataFrame with NA values in 'End-date' filled.
    """
    cleaned_history_df = history_df.sort_values(by=['Emp-id', 'Start-date'])
    cleaned_history_df['End-date'] = cleaned_history_df['End-date'].fillna(
        cleaned_history_df['Start-date'].shift(-1) + pandas.Timedelta(days=-1)
    )
    
    return cleaned_history_df

In [7]:
def join_employee_team_history_df(
    employee_df: pandas.DataFrame, 
    team_df: pandas.DataFrame, 
    team_history_df: pandas.DataFrame
) -> pandas.DataFrame:
    """Joins the team history DataFrame with the team DataFrame based on the team name,
    and then merges the result with the employee DataFrame based on the employee ID.

    Args:
        employee_df (pandas.DataFrame): DataFrame containing employee data.
        team_df (pandas.DataFrame): DataFrame containing team data.
        team_history_df (pandas.DataFrame): DataFrame containing team history data.

    Returns:
        pandas.DataFrame: A DataFrame containing the merged employee team history.
    """
    detail_team_history_df = team_history_df.merge(team_df, left_on="team", right_on="name")
    employee_team_history = detail_team_history_df.merge(employee_df, left_on="Emp-id", right_on="Emp-id")

    return employee_team_history

In [8]:
def cross_join_date(df: pandas.DataFrame, date_df: pandas.DataFrame) -> pandas.DataFrame:
    """Performs a cross join between the given DataFrame and a date DataFrame.

    Args:
        df (pandas.DataFrame): The DataFrame to be cross-joined with the date DataFrame.
        date_df (pandas.DataFrame): The date DataFrame to be cross-joined with the given DataFrame.

    Returns:
        pandas.DataFrame: A DataFrame resulting from the cross join of the input DataFrames.
    """
    df['key'] = 0
    date_df['key'] = 0
    cross_date_df = date_df.merge(df, how='outer', on='key').drop(columns='key', axis=1)
    
    return cross_date_df

In [9]:
def filter_date_between_start_end(df: pandas.DataFrame) -> pandas.DataFrame:
    """Filters the DataFrame to include only rows where the date is between Start-date and End-date.

    Args:
        df (pandas.DataFrame): The input DataFrame containing 'date', 'Start-date', and 'End-date' columns.

    Returns:
        pandas.DataFrame: A DataFrame containing only the rows where 'date' is between 'Start-date' and 'End-date'.
    """
    filtered_df = df[(df['date'] >= df['Start-date']) & (df['date'] <= df['End-date'])]
    
    return filtered_df

In [10]:
def display_result_df(df: pandas.DataFrame) -> pandas.DataFrame:
    """Displays and returns the result DataFrame with selected columns.

    The function selects specific columns, sorts the DataFrame by 'date' and 'Emp-id', 
    resets the index, prints the DataFrame, and returns it.

    Args:
        df (pandas.DataFrame): The input DataFrame to be displayed and returned.

    Returns:
        pandas.DataFrame: The processed DataFrame ready for display.
    """
    display_columns = ['team', 'Emp-id', 'type', 'desc', 'Emp-name', 'Emp-lastname', 'date']
    result_df = df[display_columns].sort_values(by=['date', 'Emp-id'])
    result_df.reset_index(inplace=True, drop=True)
    print(result_df)
    
    return result_df

# Execution

In [12]:
source_employee_file_path = "../dataset/emp.csv"
source_team_file_path = "../dataset/team.csv"
source_team_history_file_path = "../dataset/team-history.csv"
target_file_location = "../dataset/result-employee-team-by-date.csv"
start_date = "2020-01-01"
end_date = "2020-01-10"

# read source data
employee_df = get_employee_data(source_path=source_employee_file_path)
team_df = get_team_data(source_path=source_team_file_path)
team_history_df = get_team_history_data(source_path=source_team_history_file_path)
date_df = get_calendar_df(start_date=start_date, end_date=end_date)

# cleaning
cleaned_team_history_df = fill_na_history_df(history_df=team_history_df)

# transform
joined_df = join_employee_team_history_df(employee_df=employee_df, team_df=team_df, team_history_df=cleaned_team_history_df)
cross_join_date_df = cross_join_date(df=joined_df, date_df=date_df)
filtered_df = filter_date_between_start_end(df=cross_join_date_df)

# display
result_df = display_result_df(df=filtered_df)

# save result
# result_df.to_csv(target_file_location, header=True)

       team  Emp-id  type     desc Emp-name Emp-lastname       date
0      TU-1       1    TU     ซ่อม        a          aaa 2020-01-01
1      TU-1       2    TU     ซ่อม        b          bbb 2020-01-01
2      TU-2       3    TU     ซ่อม        c          ccc 2020-01-01
3      TU-2       4    TU     ซ่อม        d          ddd 2020-01-01
4      DW-1       5    DW  ติดตั้ง        e          eee 2020-01-01
..      ...     ...   ...      ...      ...          ...        ...
115    DW-1       8    DW  ติดตั้ง        h          hhh 2020-01-10
116    DW-2       9    DW  ติดตั้ง        I          iii 2020-01-10
117    DW-1      10    DW  ติดตั้ง        j          jjj 2020-01-10
118  SALE-2      11  SALE      ขาย        k          kkk 2020-01-10
119  SALE-2      12  SALE      ขาย        l          lll 2020-01-10

[120 rows x 7 columns]
