In [1]:
# Importing libraries
import pandas as pd
import numpy as np
import pytz
from datetime import datetime

import matplotlib.pyplot as plt

%matplotlib qt

In [2]:
# Read csv file into a pandas dataframe
ci_df = pd.read_csv("dailycheckins.csv")
ci_df

Unnamed: 0,user,timestamp,hours,project
0,ned,2019-09-27 00:00:00 UTC,8.00,bizdev
1,robert,09/27/2019 12:00 AM,8.00,bizdev
2,ned,26 сентября 2019 00:00,4.00,bizdev
3,ned,2019-09-26 00:00:00 UTC,1.00,cultureandmanagement
4,ned,2019-09-26 00:00:00 UTC,1.50,project-00
5,ned,2019-09-26 00:00:00 UTC,1.00,project-43
6,jaime,12/21/2018 12:00 AM,2.00,project-00
7,jaime,2018-12-21 00:00:00 UTC,0.50,project-47
8,jaime,2018-12-21 00:00:00 UTC,3.50,project-47
9,jaime,2018-12-20 00:00:00 UTC,1.50,project-00


In [3]:
# Knowing which rows have null values
ci_df[ci_df['user'].isna()]

Unnamed: 0,user,timestamp,hours,project
15797,,2017-12-27 10:36:14.000121 UTC,4.0,project-40
15798,,2017-12-27 10:36:14.000121 UTC,3.0,learning
17572,,2017-10-12 10:31:44.000227 UTC,2.75,project-47
17573,,2017-10-12 10:31:44.000227 UTC,4.0,bizdev
17574,,2017-10-12 10:31:44.000227 UTC,1.0,transit


In [4]:
ci_df.project.unique()

array(['bizdev', 'cultureandmanagement', 'project-00', 'project-43',
       'project-47', 'project-30', 'opsandadmin', 'security', 'blogideas',
       'project-10', 'internal', 'project-51', 'transit', 'learning',
       'products', 'project-64', 'project-26', 'project-66', 'events',
       'project-25', 'project-32', 'project-31', 'finance', 'project-40',
       'engineering', 'project-09', 'project-65', 'project-28',
       'project-27', 'website', 'marketing', 'blog-ideas', 'hiring',
       'project-46', 'project-06', 'pm', 'project-07', 'legal',
       'project-33', 'strategy', 'project-05', 'project-62',
       'datastrategy', 'project-22', 'project-70', 'project-69',
       'project-04', 'opdandadmin', 'project-38', 'internals', 'leave',
       'recruitment', 'clutureandmanagement', 'project-01',
       'datastorytelling', 'project-08', 'project-19', 'project-73',
       'lunch', 'sqool', 'project-20', 'onboarding', 'workshops',
       'project-36', 'data', 'machine-learning', 'w

In [5]:
ci_df.project.value_counts()

opsandadmin             2824
bizdev                  1784
cultureandmanagement    1273
learning                1179
project-00               946
project-10               672
website                  639
project-40               612
blogideas                567
project-43               526
project-68               485
project-64               484
project-25               467
project-65               465
project-20               456
hiring                   446
project-13               385
project-06               329
project-55               321
project-73               310
transit                  293
workshops                285
engineering              271
project-30               268
project-16               246
project-47               225
events                   219
project-51               215
project-27               206
project-32               200
                        ... 
dogood                     1
opandadmin                 1
project-60                 1
overthinking  

In [6]:
ci_df.user.unique()

array(['ned', 'robert', 'jaime', 'catelyn', 'cersei', 'daenerys', 'jorah',
       'viserys', 'jon', 'sansa', 'arya', 'robb', 'theon', 'bran',
       'joffrey', 'hound', 'tyrion', 'khal', 'littlefinger', 'davos',
       'samwell', 'stannis', 'melisandre', 'jeor', 'bronn', 'varys',
       'shae', 'margaery', 'tywin', 'talisa', 'ygritte', 'gendry',
       'tormund', 'brienne', nan, 'ramsay', 'gilly', 'daario',
       'missandei', 'ellaria', 'tommen'], dtype=object)

In [7]:
ci_df[ci_df['project']=='misc']

Unnamed: 0,user,timestamp,hours,project
7965,robb,2018-08-07 12:15:26.000233 UTC,1.0,misc
8217,robb,2018-08-03 00:00:00 UTC,4.0,misc
8242,robb,2 августа 2018 14:22,1.0,misc
8382,robb,2018-08-01 10:16:51.00001 UTC,0.5,misc
8445,robb,2018-07-31 12:52:43.000424 UTC,4.0,misc
8565,robb,2018-07-30 10:20:52.000009 UTC,2.0,misc
8794,joffrey,2018-07-26 00:00:00 UTC,1.0,misc
8800,joffrey,2018-07-26 00:00:00 UTC,0.25,misc
19826,tyrion,2017-07-28 02:15:31.518069 UTC,0.5,misc
19977,tommen,2017-07-21 10:53:12.388865 UTC,1.0,misc


In [8]:
def project_cleaner(proj):
    if proj[:2]=="op":
        return "opsandadmin"
    elif proj[:4]=="cult":
        return "cultureandmanagement"
    elif proj[:4]=="hirn":
        return "hiring"
    else:
        return proj

ci_df.project = ci_df.project.apply(lambda x: project_cleaner(x))

In [9]:
ci_df.project.unique()

array(['bizdev', 'cultureandmanagement', 'project-00', 'project-43',
       'project-47', 'project-30', 'opsandadmin', 'security', 'blogideas',
       'project-10', 'internal', 'project-51', 'transit', 'learning',
       'products', 'project-64', 'project-26', 'project-66', 'events',
       'project-25', 'project-32', 'project-31', 'finance', 'project-40',
       'engineering', 'project-09', 'project-65', 'project-28',
       'project-27', 'website', 'marketing', 'blog-ideas', 'hiring',
       'project-46', 'project-06', 'pm', 'project-07', 'legal',
       'project-33', 'strategy', 'project-05', 'project-62',
       'datastrategy', 'project-22', 'project-70', 'project-69',
       'project-04', 'project-38', 'internals', 'leave', 'recruitment',
       'clutureandmanagement', 'project-01', 'datastorytelling',
       'project-08', 'project-19', 'project-73', 'lunch', 'sqool',
       'project-20', 'onboarding', 'workshops', 'project-36', 'data',
       'machine-learning', 'weeklygoals', 'q

In [10]:
ci_df.project.value_counts()

opsandadmin             2831
bizdev                  1784
cultureandmanagement    1278
learning                1179
project-00               946
project-10               672
website                  639
project-40               612
blogideas                567
project-43               526
project-68               485
project-64               484
project-25               467
project-65               465
project-20               456
hiring                   447
project-13               385
project-06               329
project-55               321
project-73               310
transit                  293
workshops                285
engineering              271
project-30               268
project-16               246
project-47               225
events                   219
project-51               215
project-27               206
project-32               200
                        ... 
internals                  2
project-24                 2
pandas                     2
internal      

In [27]:
ci_df[ci_df['user']=='shae']

Unnamed: 0,user,timestamp,hours,project
1837,shae,2018-10-26 00:00:00 UTC,10.0,project-43
1838,shae,2018-10-26 00:00:00 UTC,20.0,project-30
1839,shae,2018-10-26 00:00:00 UTC,2.0,project-10
1840,shae,2018-10-26 00:00:00 UTC,1.0,project-43
1841,shae,10/26/2018 12:00 AM,6.0,project-30
2818,shae,2018-10-15 00:00:00 UTC,0.5,cultureandmanagement
2819,shae,2018-10-15 00:00:00 UTC,3.0,project-30
2820,shae,2018-10-15 00:00:00 UTC,4.5,project-43
2821,shae,2018-10-15 00:00:00 UTC,3.0,project-10
2822,shae,2018-10-15 00:00:00 UTC,13.0,project-43


In [31]:
ci_df[ci_df['project']=='project-43']

Unnamed: 0,user,timestamp,hours,project
5,ned,2019-09-26 00:00:00 UTC,1.00,project-43
17,catelyn,2018-11-26 14:47:36.0429 UTC,0.68,project-43
19,catelyn,2018-11-26 14:47:36.0429 UTC,1.53,project-43
33,jorah,2018-11-26 12:49:17.0405 UTC,1.50,project-43
75,theon,2018-11-26 09:00:55.0212 UTC,1.00,project-43
110,tyrion,2018-11-23 00:00:00 UTC,3.08,project-43
118,theon,2018-11-23 00:00:00 UTC,1.00,project-43
157,theon,2018-11-22 15:45:27.0511 UTC,3.00,project-43
224,tyrion,2018-11-22 00:00:00 UTC,3.00,project-43
245,melisandre,2018-11-22 00:00:00 UTC,3.45,project-43


In [32]:
ci_df[ci_df['project']=='fatnesscheckin']

Unnamed: 0,user,timestamp,hours,project
18189,davos,2017-09-22 11:09:54.000052 UTC,2.5,fatnesscheckin
