In [71]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import ipywidgets as widgets

In [72]:
correct_hours = pd.read_csv('data.csv')

bad_name_columns = ['Unnamed: 13', 'Unnamed: 12', 'Unnamed: 11', 'Unnamed: 10','Unnamed: 7','Please specify why the above dates and times were not entered correctly in UD Time.  For any request over your approved contract hours, your faculty adviser should email krystalp@udel.edu and jabrow@udel.edu noting approval for non-standard hours.', 'Please sign your name verifying that your hours are approved by your faculty advisor and are in compliance with UD policy and the Department of Labor ', 'Please list Date(s) Time(s) that you need corrected in UD Time (example: 2/20/21 from 8:00am - 1:00pm)','Bi-Weekly Pay Period Needing Correction (example: 1/11-25/2020)']
correct_hours['Reason'] = correct_hours['Please specify why the above dates and times were not entered correctly in UD Time.  For any request over your approved contract hours, your faculty adviser should email krystalp@udel.edu and jabrow@udel.edu noting approval for non-standard hours.']
correct_hours['Signature'] = correct_hours['Please sign your name verifying that your hours are approved by your faculty advisor and are in compliance with UD policy and the Department of Labor ']
correct_hours['Correction_Time'] = correct_hours['Please list Date(s) Time(s) that you need corrected in UD Time (example: 2/20/21 from 8:00am - 1:00pm)']
correct_hours['Bi_Weekly_Period_Correction'] = correct_hours['Bi-Weekly Pay Period Needing Correction (example: 1/11-25/2020)']

correct_hours.drop(columns=bad_name_columns, inplace=True)
correct_hours.columns

Index(['Timestamp', 'Email Address', 'Student Name:', 'Student UD ID Number:',
       'Student Work Assignment:', 'Reason', 'Signature', 'Correction_Time',
       'Bi_Weekly_Period_Correction'],
      dtype='object')

In [73]:
correct_hours.dtypes

Timestamp                      object
Email Address                  object
Student Name:                  object
Student UD ID Number:          object
Student Work Assignment:       object
Reason                         object
Signature                      object
Correction_Time                object
Bi_Weekly_Period_Correction    object
dtype: object

In [74]:
def process_timestamp(timestamp: str | float | pd.Timestamp) -> pd.Timestamp | float:
    if type(timestamp) is pd.Timestamp:
        return timestamp
    elif type(timestamp) is float:
        return np.nan
    else:
        [date_time, specific_time] = timestamp.split(" ")
        [year, month, day] = date_time.split('-')
        [hour, minute, second] = specific_time.split(":")
        return pd.Timestamp(year=int(year), month=int(month), day=int(day), hour=int(hour), minute=int(minute), second=int(float(second)))

In [75]:
correct_hours['Timestamp'] = correct_hours['Timestamp'].apply(process_timestamp)
correct_hours.head()

Unnamed: 0,Timestamp,Email Address,Student Name:,Student UD ID Number:,Student Work Assignment:,Reason,Signature,Correction_Time,Bi_Weekly_Period_Correction
0,2022-12-12 23:49:33,3773f0bf855e507e6e7492336e4ee5bd89f6af0b7da3e8...,61b8ab63d1420b410aa08cf5f407b86ff870d55b1424ce...,74c2d693c171f330b0642aca401d49a4325c0c8e50b490...,Student Worker Class II,forgot to clock out on workforce,61b8ab63d1420b410aa08cf5f407b86ff870d55b1424ce...,12/02/22 2:03pm - 3:45 pm,11/27/1022 - 12/10/2022
1,2022-12-18 13:26:22,26c4a808ae8889b8fb2e8e47fbedf9087ccfdd05e5b92d...,059d42167f14f9360ae62eee2b2e941ca6072ff79af76b...,06cb0487f6ecfa5471a6daed9a371e1ad1634d958e8bba...,Student Worker Class II,Forgot to clock out,059d42167f14f9360ae62eee2b2e941ca6072ff79af76b...,12/06/22 9:45AM - 2:00PM,11/27/2022 - 12/10/2022
2,2022-12-18 17:33:59,6cb4453e4f367fd20dc79f9f2f833a1d80da9848abafd8...,b17b291c9834bbbdde35ad4b7fff49f8d533db47ffa16d...,a10c86ce87e1593ee6b55da4adce75966159fa85243026...,CISC 220 TA,Forgot to clock out both days.,adb6c6989028b739a3e470ec86877863c11413592d89cc...,12/05/22 5:15pm - 8:30pm 12/05/22 3:30pm - 5:30pm,11/27/2022 - 12/10/2022
3,2022-12-18 19:28:45,9e89060ae6a7a51e01c0d18420f144357a32ff89beb38a...,5c6632ce29bc301dc9b371bc70a016fe6edf1cf7de68b5...,1d02878b0247dad3c6dfb98f67bb116423879d17338e94...,Excusing/extending projects for students in need,"I clocked in, but forgot to clock out when I w...",5c6632ce29bc301dc9b371bc70a016fe6edf1cf7de68b5...,12/17/22 from 2:03am - 2:40am,12/11-24/2022
4,2022-12-18 20:26:31,dbefd7772cd313291b9de37b3cf174fbd065ffcb9769f8...,21abb140ad59cfc4947b0a9f816de8ac626e85df6d8914...,9a67c831cec80403be893e02fe74cbab334e300fb9e4ea...,CISC Teaching Assistant 2022-Computer & Info S...,Forgo to clock out,21abb140ad59cfc4947b0a9f816de8ac626e85df6d8914...,12/15/2022 from 6:30pm - 9:30pm\n12/17/2022 fr...,12/11/2022 - 12/24/2022


In [76]:
q1 = correct_hours.copy()
q1['Month_Submitted'] = q1['Timestamp'].apply(lambda x: x.month)

month_names = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

months = {
    1: 0,
    2: 0,
    3: 0,
    4: 0,
    5: 0,
    6: 0,
    7: 0,
    8: 0,
    9: 0,
    10: 0,
    11: 0,
    12: 0
}

for each_month in months.keys():
    months[each_month] = len(q1[q1['Month_Submitted'] == each_month])

px.pie(values=months.values(), names=month_names, title="Submissions Per Months")

In [77]:
q2 = q1.copy()

all_people = q2['Student UD ID Number:']
all_people_set = set(all_people)

people_count = {}

for each_person in all_people:
    if each_person in all_people_set:
        if each_person in people_count:
            people_count[each_person] += 1
        else:
            people_count[each_person] = 0

sorted_people_by_submission_count: list[tuple[str, int]] = sorted(people_count.items(), key=lambda x: x[1], reverse=True)

top_10_people = [x[0] for x in sorted_people_by_submission_count[:10]]
top_10_values = [x[1] for x in sorted_people_by_submission_count[:10]]

px.pie(values=top_10_values, names=top_10_people, title="Top 10 Form Submitters")

In [78]:
from datetime import datetime

q2_5 = q2.copy()

today = datetime.now()
today_datetime = pd.to_datetime(pd.Timestamp(year=today.year, month=today.month, day=today.day, hour=today.hour, minute=today.minute, second=today.second))
q2_5['Months_Since'] = q2_5['Timestamp'].apply(lambda x: (today_datetime.year - x.year) * 12 + (today_datetime.month - x.month))

num_slider = widgets.IntSlider(value=0, min=0, max=np.max(q2_5['Months_Since']), description='Last X Months', readout=True, readout_format='d', orientation='horizontal')

output = widgets.Output()

def update_slider(change):
    new_change = change['new']
    if type(new_change) is dict and 'value' in new_change:
        value = new_change['value']
        found_form_submissions = q2_5[q2_5['Months_Since'] <= value]['Student UD ID Number:']
        with output:
            output.clear_output()
            for each_ud_id in found_form_submissions:
                print(each_ud_id)
    elif type(new_change) is int:
        found_form_submissions = q2_5[q2_5['Months_Since'] <= new_change]['Student UD ID Number:']
        with output:
            output.clear_output()
            for each_ud_id in found_form_submissions:
                print(each_ud_id)

num_slider.observe(update_slider)
container = widgets.VBox([num_slider, output])
container.layout.height = '200px'
display(container)

VBox(children=(IntSlider(value=0, description='Last X Months', max=6), Output()), layout=Layout(height='200px'…

In [121]:
q3 = q2_5.copy()

last_month_select = widgets.Select(options=['Overall'] + list(range(1, np.max(q3['Months_Since']))), description="Months Since", layout=widgets.Layout(width='1000px'))
x_submissions_select = widgets.IntSlider(min=0, max=np.max([x[1] for x in sorted_people_by_submission_count]), description="< X Submissions", layout=widgets.Layout(width='1000px'))

output = widgets.Output(layout=widgets.Layout(height='100px', width='1000px'))

def process_people_submission_count(x_submissions: int, last_months = None):
    if last_months is None:
        # Overall, find all submissions less than x
        found_people = list(filter(lambda x: x[1] <= x_submissions, sorted_people_by_submission_count))
        with output:
            output.clear_output()
            for each_person in found_people:
                print(each_person[0])
    else:
        last_month_submissions = q3[q3['Months_Since'] <= last_months]
        people_who_submitted = last_month_submissions['Student UD ID Number:']
        submission_count = {}
        for each_person in people_who_submitted:
            if each_person in submission_count:
                submission_count[each_person] += 1
            else:
                submission_count[each_person] = 1
        filtered_people = list(filter(lambda x: x[1] <= x_submissions, submission_count.items()))
        with output:
            output.clear_output()
            for each_person in filtered_people:
                print(each_person[0])

def onchange(change):
    selected_value_1 = last_month_select.value
    slider_value = x_submissions_select.value
    process_people_submission_count(int(str(slider_value)), None if selected_value_1 == "Overall" else int(str(selected_value_1)))

last_month_select.observe(onchange)
x_submissions_select.observe(onchange)

vbox_layout = widgets.Layout(display='flex', flex_flow='column', align_items='stretch', height='300px', justify_content='space-between')
inputs = widgets.HBox([last_month_select, x_submissions_select])
container = widgets.VBox([inputs, output], layout=vbox_layout)
container.layout.height = '200px'
display(container)

VBox(children=(HBox(children=(Select(description='Months Since', layout=Layout(width='1000px'), options=('Over…

In [137]:
q4 = q3.copy()

last_month_select = widgets.Select(options=['Overall'] + list(range(1, np.max(q3['Months_Since']))), description="Months Since", layout=widgets.Layout(width='1000px'))
vbox_layout = widgets.Layout(display='flex', flex_flow='column', align_items='stretch', height='300px', justify_content='space-between')
output = widgets.Output(layout=widgets.Layout(height='100px', width='1000px'))

def process_months_since(months_since: int | str):
    if months_since == 'Overall':
        total_counts = [x[1] for x in sorted_people_by_submission_count]
        min_value = np.min(total_counts)
        max_value = np.max(total_counts)
        median_value = np.median(total_counts)
        avg_value = np.average(total_counts)

        with output:
            output.clear_output()
            print(f'MIN: {min_value}\nMAX: {max_value}\nMEDIAN: {median_value}\nAVERAGE: {round(avg_value, 2)}')
    else:
        number_of_submissions_since = q4[q4['Months_Since'] <= months_since]
        people_submitting = number_of_submissions_since['Student UD ID Number:']
        people_submitting_count = {}
        for each_person in people_submitting:
            if each_person in people_submitting_count:
                people_submitting_count[each_person] += 1
            else:
                people_submitting_count[each_person] = 1
        total_counts = list(people_submitting_count.values())
        min_value = np.min(total_counts)
        max_value = np.max(total_counts)
        median_value = np.median(total_counts)
        avg_value = np.average(total_counts)
        with output:
            output.clear_output()
            print(f'MIN: {min_value}\nMAX: {max_value}\nMEDIAN: {median_value}\nAVERAGE: {round(avg_value, 2)}')


def onchange(change):
    selected_value_1 = last_month_select.value
    if selected_value_1 == 'Overall':
        process_months_since(str(selected_value_1))
    else:
        process_months_since(int(str(selected_value_1)))

last_month_select.observe(onchange)
container = widgets.VBox([last_month_select, output], layout=vbox_layout)
container.layout.height = '200px'
display(container)

VBox(children=(Select(description='Months Since', layout=Layout(width='1000px'), options=('Overall', 1, 2, 3, …

In [147]:
q5 = [x[1] for x in sorted_people_by_submission_count[::-1]]
median_ind = len(q5) // 2
median_lower_half = median_ind // 2
median_upper_half = median_ind + (median_ind // 2)
iqr = q5[median_upper_half] - q5[median_lower_half]
one_half_iqr = 1.5 * iqr
upper_one_half_iqr = q5[median_upper_half] + one_half_iqr
lower_one_half_iqr = q5[median_lower_half] - one_half_iqr

outliers = list(filter(lambda x: x <= lower_one_half_iqr or x >= upper_one_half_iqr, q5))
print(outliers)

[8, 9, 10, 10]
