In [43]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import math

In [44]:
def process_timestamp_to_pd_timestamp(date: str) -> pd.Timestamp | float:
    """
    Takes in a date string from the dataframe, and processes it into a pandas timestamp

    Args:
        date (str): The string of the date to parse

    Returns:
        pd.Timestamp: The generated timestamp from the string
    """
    if type(date) is float:
        return math.nan
    [date_info, time_info] = date.strip().split(" ")
    [year, month, day] = date_info.split("-")
    [hour, minute, second] = time_info.split(":")
    return pd.Timestamp(year=int(year), month=int(month), day=int(day), hour=int(hour), minute=int(minute), second=int(float(second)))

In [52]:
key_data = pd.read_csv('data.csv')
key_data['Approving_Faculty'] = key_data['Name(s) of the Faculty who is approving my key(s) (i.e. please list each faculty if different for each key)']
key_data['Sent_Request'] = key_data['I have asked my Faculty(s), who is approving my key(s), and requested him/her to send jabrow@udel.edu their approval.']
key_data['Building_Requested'] = key_data['What building(s) # and room(s) # are you requesting a key? (i.e. 101-H Smith Hall)']
key_data['Has_Lost'] = key_data['I have lost CIS key(s) and will explain the details below']
key_data['Requester_Student_Status'] = key_data['Status (i.e. undergraduate student, graduate student)']
key_data['Expected_Departure'] = key_data['Expected departure date from CIS Dept or UD graduation date']
key_data['Received Key'] = key_data[' Received Key']
key_data['Timestamp'] = key_data['Timestamp'].apply(lambda x: math.nan if type(x) is float or len(x.strip()) == 0 else x)
key_data.drop(columns=['Name(s) of the Faculty who is approving my key(s) (i.e. please list each faculty if different for each key)', 'I have asked my Faculty(s), who is approving my key(s), and requested him/her to send jabrow@udel.edu their approval.', 'What building(s) # and room(s) # are you requesting a key? (i.e. 101-H Smith Hall)', 'I have lost CIS key(s) and will explain the details below','Status (i.e. undergraduate student, graduate student)','Expected departure date from CIS Dept or UD graduation date',' Received Key'], inplace=True)

key_data['Timestamp'] = key_data['Timestamp'].apply(process_timestamp_to_pd_timestamp)
key_data['Timestamp[MS]'] = key_data['Timestamp'].apply(lambda x: x.timestamp() if type(x) is pd.Timestamp else x)
key_data.head(2)

Unnamed: 0,Timestamp,Email Address,Last Name,First Name,Email Address.1,UDID #,Comments or questions?,Date,Unnamed: 15,Approving_Faculty,Sent_Request,Building_Requested,Has_Lost,Requester_Student_Status,Expected_Departure,Received Key,Timestamp[MS]
0,2020-07-17 17:55:56,cc9f1d1a8f8b671f4f543185cb931e4544a23753c90287...,1bfe1d5b2f9d5b7a3ad74ef1f86f51364aeb11239fc45f...,ee286ba3370879a600b35ef67cf90c68d88e01e3fbf618...,cc9f1d1a8f8b671f4f543185cb931e4544a23753c90287...,5156c08dc3e446c6ea03b067b00b4358b508591e365cca...,I already have this key.,,,Dr. Li Liao,No,218.0,,graduate,2021-12-31 00:00:00,Done,1595008556.0
1,2020-07-17 17:57:54,2929495bc54a7ec367e00337dc2a61a1a991ab808c7de6...,00c1e468e4e4b1d39812fd02c8c5a0cba81828a5682ece...,00e4537e5c99c2628979d0db9697b4b6dd3efe480875e4...,2929495bc54a7ec367e00337dc2a61a1a991ab808c7de6...,dfc1028b2fe9013269de00c694bd06fddb8128a1644909...,,,,James Clause,Yes,"Room 213 and 203, Smith Hall building key",,graduate student,2022-06-01 00:00:00,Done,1595008674.0


In [46]:
key_data.dtypes

Timestamp                   datetime64[ns]
Email Address                       object
Last Name                           object
First Name                          object
Email Address.1                     object
UDID #                              object
Comments or questions?              object
Date                                object
Unnamed: 15                         object
Approving_Faculty                   object
Sent_Request                        object
Building_Requested                  object
Has_Lost                            object
Requester_Student_Status            object
Expected_Departure                  object
Received Key                        object
dtype: object

In [47]:
key_data.columns

Index(['Timestamp', 'Email Address', 'Last Name', 'First Name',
       'Email Address.1', 'UDID #', 'Comments or questions?', 'Date',
       'Unnamed: 15', 'Approving_Faculty', 'Sent_Request',
       'Building_Requested', 'Has_Lost', 'Requester_Student_Status',
       'Expected_Departure', 'Received Key'],
      dtype='object')

In [90]:
impute_clone = key_data.copy()
mean = np.mean(impute_clone['Timestamp[MS]'])
print(mean)
impute_clone['Timestamp[MS]'] = impute_clone['Timestamp[MS]'].astype(object).where(impute_clone['Timestamp[MS]'].notnull(), np.nan)
impute_clone.fillna(value=float(mean), inplace=True)
impute_clone['Timestamp'] = impute_clone['Timestamp[MS]'].apply(lambda x: pd.Timestamp.fromtimestamp(x))
impute_clone['Timestamp'].unique()

1641126960.1740742


<DatetimeArray>
[       '2020-07-17 13:55:56',        '2020-07-17 13:57:54',
        '2020-07-17 14:00:06',        '2020-07-17 14:01:45',
        '2020-07-17 14:10:56',        '2020-07-17 14:14:39',
        '2020-07-17 14:16:37',        '2020-07-17 14:24:40',
        '2020-07-17 14:43:09',        '2020-07-17 14:59:12',
 ...
        '2023-04-19 09:14:02',        '2023-04-26 09:47:06',
        '2023-05-09 07:43:36',        '2023-05-10 15:49:07',
        '2023-06-05 16:53:34',        '2023-06-09 10:56:47',
        '2023-06-12 09:20:40',        '2023-06-12 09:26:56',
        '2023-05-15 06:37:00', '2022-01-02 07:36:00.174074']
Length: 270, dtype: datetime64[ns]

In [93]:
not_received_key = impute_clone[key_data['Received Key'].isna()]

q1 = pd.DataFrame(not_received_key[['First Name', 'Last Name', 'Email Address', 'Timestamp']]).sort_values('Timestamp')

q1

Unnamed: 0,First Name,Last Name,Email Address,Timestamp
46,cdfc4d16d4d6b87c1a2bdbb01aa47ebb5faac6f01ab824...,f31de62e4765a23bba385fe88d2b8f01fe71c3e6a5b05d...,bb6c2037223e8584c4fe3815cbc9e4308d16d4f330e2a8...,2021-06-16 10:31:38
55,9e60deee6af9ee74df162fe7858dc30e0183ca55761144...,b4672cb87ff350ef828cd72685a9fb34310508bd6b7821...,270c8600cf096099a24bc5b00d233339a5ea46f7b8c73f...,2021-07-14 20:00:00
67,cb14bf5073ebaf6d9d04b63164b7017b2011d3558fb2f8...,b9ae62ede2dad179198540d5a84bf5e432f8f36c370d90...,70c3430c92aa5234fc9b8ed874273f469a98f2f2c41e97...,2021-08-16 12:49:22
73,a435e2d26d33b5972b6638363c8dedb38f25166b363b27...,d9aa89fdd15ad5c41d9c128feffe9e07dc828b83f85296...,9b5882f7299bf684b366d5d42bfdd392b6c604769ec2c1...,2021-08-29 06:01:29
76,edd2916124c93479ced1dd30f618d002478a35eeec25f6...,c3a0de68a2e6d41be6bc64ba1f04e427e1012866900b00...,267322a731d4fbbb2e30ff0baf29a1d8809cb4edac5aec...,2021-08-31 05:54:12
...,...,...,...,...
269,093de0b0eeb1edce1def56262d4775111263d9a0a36bb4...,8ad4bcb926dc9dbc2f3a2fd89fca3e602d7a4ce217bc11...,7d870b52d7cf9bb9dfa04906d214695009508381d2ea26...,2023-05-15 06:37:00
265,3090ad7f5b83a40b050aad6e04d2f663049aca5cf0253e...,6b17e3543fd927d1a98501b267f894050b3d4a56c1607a...,95aeac91012f2eac509741293be01d8bf6c75ad2a7b874...,2023-06-05 16:53:34
266,190eb3ebae2b41124493ac98ac49717fbd290156ca2e0d...,f581a0ce4c142795193b7b06909ca18b84471012379d54...,96e3c4bbb492c9070922f9af92c77ba7ba418cedbff594...,2023-06-09 10:56:47
267,5d33424039b4446e006ca13c5f68eaf1fea9d14ad8c605...,e6942dcf79e29371040e00a9c577ea4246477b76f982ae...,f3b26c4350de31d052f8776d1af1eef9147744dc765e30...,2023-06-12 09:20:40


In [114]:
q2 = impute_clone.copy()

q2['Month_Submitted'] = q2['Timestamp'].apply(lambda x: x.month)

months = {
    1: 0,
    2: 0,
    3: 0,
    4: 0,
    5: 0,
    6: 0,
    7: 0,
    8: 0,
    9: 0,
    10: 0,
    11: 0,
    12: 0
}

for each_month in months.keys():
    months[each_month] = len(q2[q2['Month_Submitted'] == each_month])

month_names = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

px.pie(values=months.values(), names=month_names, title="Building Key Requests Organized by Months", template="plotly_dark")

In [125]:
q3 = q2.copy()

all_ids = q3['UDID #'].tolist()
ids_set = set(all_ids)

ids_count = {}

for each_name in all_ids:
    if each_name in ids_set:
        if each_name in ids_count:
            ids_count[each_name] += 1
        else:
            ids_count[each_name] = 1

top_10 = sorted(ids_count.items(), key=lambda x: x[1], reverse=True)[:10]
top_10 = [x[0] for x in top_10]
top_10_values = [x[1] for x in top_10]

px.pie(values=top_10_values, names=top_10, title="People with the most building key requests")

In [142]:
q4 = q3.copy()

approving_faculty_count = {}
all_faculty = list(filter(lambda x: len(x) > 0, [x.strip() if type(x) is str else '' for x in q4['Approving_Faculty'].tolist()]))
all_faculty_set = set(all_faculty)

for each_faculty_id in all_faculty:
    if each_faculty_id in all_faculty_set:
        if each_faculty_id in approving_faculty_count:
            approving_faculty_count[each_faculty_id] += 1
        else:
            approving_faculty_count[each_faculty_id] = 1


sorted_faculty = sorted(approving_faculty_count.items(), key=lambda x: x[1], reverse=True)

top_10 = sorted_faculty[:10]
top_10_names = [x[0] for x in top_10]
top_10_values = [x[1] for x in top_10]

px.pie(values=top_10_values, names=top_10_names, title="Top 5 Faculty Approvers")

In [143]:
sorted_faculty

[('Sunita Chandrasekaran', 25),
 ('Matthew Saponaro', 12),
 ('Chandra Kambhamettu', 10),
 ('Debra Yarrington', 10),
 ('Lori Pollock', 8),
 ('Dr. Bart', 7),
 ('Rui Zhang', 6),
 ('Xi Peng', 6),
 ('Keith Decker', 5),
 ('Chien-Chung Shen', 5),
 ('Ilya Safro', 4),
 ('Jackie Brown', 4),
 ('Jacqueline Brown', 4),
 ('Matt Saponaro', 4),
 ('Prof. Chien-Chung Shen', 3),
 ('Kathy McCoy', 3),
 ('Dr. Austin Bart', 3),
 ('Li Liao', 3),
 ('Dr. Yarrington', 3),
 ('Matt', 3),
 ('Dr. Li Liao', 2),
 ('Leila Barmaki', 2),
 ('Lena Mashayekhy', 2),
 ('Rahmat Beheshti', 2),
 ('Lena Mashayekhy, Ph.D.', 2),
 ('Dr. Xi Peng', 2),
 ('Dr. Matthew Louis Mauriello', 2),
 ('Prof. Sunita Chandrasekaran', 2),
 ('Dr. Ilya Safro', 2),
 ('Dr. Chandrasekaran', 2),
 ('Austin Cory Bart', 2),
 ('Dr. James Clause', 2),
 ('Xing Gao', 2),
 ('Bryan Youse', 2),
 ('Prof. Andrew Roosen', 2),
 ('Dr. Harvey', 2),
 ('Professor Roosen', 2),
 ('Andrew Roosen', 2),
 ('Professor Xing Gao', 2),
 ('James Clause', 1),
 ('Decker?', 1),
 ('Hagi