In [None]:
import pandas as pd
import numpy as np
import re
import os
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
"""
Teamwork Time Entries Processor & Visualizer
--------------------------------------------

This script processes a Teamwork time log CSV file and generates per-person
CSV reports and scatter plots, showing daily time entries by project.

Steps Performed:
----------------
1. Reads time entry CSV (e.g., Teamwork export).
2. Strips timezone data from the date column.
3. Converts and sorts date values chronologically.
4. Builds a list of unique people and creates a time-filled dataframe per person.
5. Fills in missing days with zero-hours rows for consistent timelines.
6. Generates per-user CSV files and Seaborn scatter plots of daily time entry hours.
7. Saves results (CSV + PNG) in an export folder (cleared on each run).

Assumptions:
------------
- The CSV must have at least these columns: ['date', 'person-id', 'person-first-name',
  'person-last-name', 'hoursDecimal', 'project-name'].
- The file name format is time_entries_YYYY_MM_DD_*.csv.
- Time entries are for a single quarter or known time range.

Output:
-------
- CSV and PNG scatter plots stored per user in the `save_dir` path.

Author: Gabe McWilliams
"""


In [None]:
df = pd.read_csv(".csv")
save_dir = fr"d:/exports/time_entries/q1/src"

In [None]:
def remove_tz(date_string: str) -> str:
    # print(type(date_string))
    result = re.match(r'([\d-]+).*', date_string)
    if result:
        # print(result)
        return result.group(1)
    return None

In [None]:
def quarterly_dates(quarter_num: int) -> dict:
    if quarter_num == 1:
        return {
            "start": pd.to_datetime("2024-01-01"),
            "end": pd.to_datetime("2024-03-31")
        }
    elif quarter_num == 2:
        return {
            "start": pd.to_datetime("2024-04-01"),
            "end": pd.to_datetime("2024-06-30")
        }
    elif quarter_num == 3:
        return {
            "start": pd.to_datetime("2024-07-01"),
            "end": pd.to_datetime("2024-09-30")
        }
    elif quarter_num == 4:
        return {
            "start": pd.to_datetime("2024-10-01"),
            "end": pd.to_datetime("2024-12-31")
        }

    else:
        print("Must Choose: [1,2,3,4]")

In [None]:
df['date'] = df['date'].apply(remove_tz)

In [None]:
df.loc[:, 'date'] = pd.to_datetime(df['date'])

In [None]:
df.sort_values('date', inplace=True, ascending=False)

In [None]:
df.loc[:, "day"] = df["date"].apply(lambda x: (pd.to_datetime(x)).strftime(format='%Y-%m-%d'))

In [None]:
max_df_day = df.day.max()
print(max_df_day)

min_df_day = df.day.min()
print(min_df_day)

In [None]:
person_id_list = list(set(df["person-id"]))

In [None]:
person_data_list = []

for person_id in person_id_list:
    person_name = fr"{(df[df['person-id'] == person_id]['person-first-name'].unique())[0]} {(df[df['person-id'] == person_id]['person-last-name'].unique())[0]}"
    person_data_list.append({
        "person_id": person_id,
        "person_name": person_name,
        "df": df[df['person-id'] == person_id]
    })

In [None]:
# clear all files in save folder to eliminate old bad data 
for root, dirs, files in os.walk(save_dir):
    for f in files:
        os.unlink(os.path.join(root, f))

# iterate over all people to create data points    
for person in person_data_list[:]:
    df_user = person['df']
    time_array = np.arange(
        dt.datetime.strptime(min_df_day, '%Y-%m-%d'),
        dt.datetime.strptime(max_df_day, '%Y-%m-%d'),
        step=dt.timedelta(1)
    )

    # create array of empty time days to fill out the timeline
    # shrt_time_array = [f'{str(t)[6:10]}' for t in time_array]
    df_xticks = pd.DataFrame(columns=['day'], data=time_array)
    df_xticks['hoursDecimal'] = 0.00
    df_xticks['date'] = pd.to_datetime(df_xticks['day'])
    df_xticks.sort_values('day', ascending=False, inplace=True)

    df_xticks.loc[:, "day"] = df_xticks["date"].apply(lambda x: (pd.to_datetime(x)).strftime(format='%Y-%m-%d'))
    df_user.loc[:,'day'] = pd.to_datetime(df_user['day'], format='%Y-%m-%d')
    

    df_user = pd.concat([df_xticks, df_user], ignore_index=True)
    df_user.loc[:,'hoursDecimal'] = df_user['hoursDecimal'].astype('float')

    df_user.set_index('day', inplace=True)

    # export csv data per agent
    df_user.to_csv(
        f"{save_dir}/teamwork_time_entries_{person['person_id']}_{(person['person_name']).lower()}_{min_df_day}_{max_df_day}.csv",
        index=False)

    fig = plt.figure(figsize=(60, 10))
    ax = sns.scatterplot(data=df_user,
                         x='day',
                         y='hoursDecimal',
                            
                         hue='project-name',
                         size='hoursDecimal',
                         sizes=(50, 300),
                         )

    plt.xticks(ticks=list(map(str,df_user.index)))
    ax.tick_params(axis='x', labelrotation=45)

    plt.xlabel("Day of the Month",
               fontsize=10,
               fontweight='bold',
               labelpad=2)
    
    

    plt.ylabel("Hours in Decimal Form",
               fontsize=10,
               fontweight='bold',
               labelpad=2)

    plt.title(
        f"Teamwork - Time Entries - {person['person_id']} - {person['person_name']} - {min_df_day} to {max_df_day}",
        pad=10,
        fontsize=20,
        fontweight='bold')

    plt.legend(bbox_to_anchor=(1.12, .49), loc='right', borderaxespad=0)
    #    hue='company-name')

    # fig.tight_layout()

    fig.savefig(
        f"{save_dir}/teamwork_time_entries_{person['person_id']}_{(person['person_name']).lower()}_{min_df_day}_{max_df_day}.png")
    plt.close()

In [None]:
df_user