In [1]:
import pandas as pd

df = pd.read_csv('bookings20191113.csv', na_values='', skiprows=1)
#df.head()

# convert the 'Date' column to datetime format 
df['Start']= pd.to_datetime(df['Start']) 
df['End']= pd.to_datetime(df['End']) 
df['Duration'] = df['End'] - df['Start']

df['DurationH'] = df.apply(lambda row: row.Duration.total_seconds() / 3600, axis=1)
df['DurationH'] = df['DurationH'].round(decimals=1)
# Check the format of 'Date' column 


# https://stackoverflow.com/questions/35321812/move-column-in-pandas-dataframe/35322540
from pandas import DataFrame

def move_columns(df: DataFrame, cols_to_move: list, new_index: int) -> DataFrame:
    """
    This method re-arranges the columns in a dataframe to place the desired columns at the desired index.
    ex Usage: df = move_columns(df, ['Rev'], 2)   
    :param df:
    :param cols_to_move: The names of the columns to move. They must be a list
    :param new_index: The 0-based location to place the columns.
    :return: Return a dataframe with the columns re-arranged
    """
    other = [c for c in df if c not in cols_to_move]
    start = other[0:new_index]
    end = other[new_index:]
    return df[start + cols_to_move + end]

df = move_columns(df,['DurationH'], 4)

#df.info() 
#df.head()

In [2]:
# check which 3I entries have price info
tmp = df[df.Resource.str.startswith("3I Marianas") & (~df.Charges.isnull()) ]
print(tmp["Resource"].unique())

['3I Marianas no lasers ']


In [3]:
# find bookings that are longer than 1 day
day = pd.Timedelta("1 day")
df[df["Duration"] > day]

Unnamed: 0,Date of booking,Start,End,Duration,DurationH,Resource,Type,Provider,Title,Comments,...,Cost center,Request ID,Operator,Products,Label,Setup,BookedBy,Project,Form,Form (JSON)


In [4]:
addons = [ \
          #"3I Marianas base with lasers", "3I Marianas no lasers ", 
          "3I Marianas", \
          "3I 405", "3I 488", "3I 561", "3I 640", "3I marianas no laser add-on","3I marianas no lasers", \
          "Zeiss Z.1 LightSheet Lasers", "Light sheet 405", "Light sheet 445", "Light sheet 488", "Light sheet 514", "Light sheet 561", "Light sheet 640", "Light sheet no laser add on", "Zeiss Z.1 LightSheet Data management", \
          "LSM700 405", "LSM700 488","LSM700 555","LSM700 639", \
          "SP5 HCS-A 405", "SP5 HCS-A Argon","SP5 HCS-A 561","SP5 HCS-A 633", \
          "SP5 MP 405", "SP5 MP Argon", "SP5 MP 561", "SP5 MP 594", "SP5 MP 633", "SP5 MP Laser MP", \
          "SP8  STED 592 STED", "SP8 STED 405", "SP8 STED Argon","SP8 STED 561","SP8 STED 633", \
          "SP8 upright 405", "SP8 upright 488 ", "SP8 upright 552", "SP8 upright 638", \
          "No laser (admin only)", \
          "Sheep (TESTING AND DEVELOPMENT PURPOSE", "Super testers practice instrument"]

ignore_groups = ["Group Raimi research inc", "TEST Viktor"]
ignore_statuses = ["Canceled", "Upcoming", "Undefined"]

#print(addons)

# remove lines where resource is an add-on
df = df[~df['Resource'].isin(addons)]

# remove lines with booking status that should be ignored
df = df[~df['Status'].isin(ignore_statuses)]

df = df[~df['Group'].isin(ignore_groups)]

df = df[~df['BookedBy'].isin(['iris@science-it.ch'])]

# drop timedelta column
df = df.drop(columns=['Duration'])

# rename hours column
df = df.rename(columns={"DurationH": "Duration"})

# save maintenance bookings
df2 = df[df['Type'].str.contains("Maintenance")]

# remove maintenance bookings from original
df = df[~df['Type'].str.contains("Maintenance")]


In [5]:
# save as CSV

df.to_csv('bookings2019-edited.csv', index=False, na_rep='')
df2.to_csv('bookings2019-maintenance.csv', index=False, na_rep='')


In [6]:
print(sorted(df.Resource.unique()))
print(sorted(df.Status.unique()))

['3D Workstation', '3I Marianas no lasers ', 'Aurox Clarity', 'HELMI 3D VM', 'LS Workstation', 'Leica SP8 STED', 'Leica SP8 upright', 'MolecularDevices Nano', 'SP8 upright 488', 'Zeiss LSM700']
['Elapsed']


In [7]:
# calculate group totals

df2 = df.groupby(["Group"]).sum()
df2['Duration'] = df2['Duration'].round(decimals=0)
df2.index.names = ['IRIS group']
df2 = df2.rename(columns={"Duration": "IRIS total hours"})
df2 = df2.drop(columns=['Request ID','Operator','Products', 'Project'])
#df2.info()
df2.to_csv('bookings2019-group-totals.csv', index=True, na_rep='')


#df3 = pd.DataFrame()
#df3['IRIS_group'] = sorted(df.Group.unique())
#df3['IRIS_total_hours'] = df3.apply(lambda row: row.IRIS_group, axis=1)


In [8]:
# find bookings with no WBS

lmu_staff = ["Harri.Jaalinoja@helsinki.fi", "marko.crivaro@helsinki.fi", "kimmo.tanhuanpaa@helsinki.fi", "mika.molin@helsinki.fi", "viktor.raimi@helsinki.fi"]
previous_billing_date = '2019-5-23'

df[df["Cost center"].isnull() & ((df.Start > previous_billing_date))]


Unnamed: 0,Date of booking,Start,End,Duration,Resource,Type,Provider,Title,Comments,User,...,Cost center,Request ID,Operator,Products,Label,Setup,BookedBy,Project,Form,Form (JSON)
