In [58]:
import pandas as pd

In [59]:
# Load file into df
df = pd.read_csv('03_Library Systembook.csv')
df.head(5)


Unnamed: 0,Id,Books,Book checkout,Book Returned,Days allowed to borrow,Customer ID
0,1.0,Catcher in the Rye,"""20/02/2023""",25/02/2023,2 weeks,1.0
1,2.0,Lord of the rings the two towers,"""24/03/2023""",21/03/2023,2 weeks,2.0
2,3.0,Lord of the rings the return of the kind,"""29/03/2023""",25/03/2023,2 weeks,3.0
3,4.0,The hobbit,"""02/04/2023""",25/03/2023,2 weeks,4.0
4,5.0,Dune,"""02/04/2023""",25/03/2023,2 weeks,5.0


In [60]:
# Covert IDs to integers
df['Id'] = df['Id'].astype('Int64')
df['Customer ID'] = df['Customer ID'].astype('Int64')

In [61]:
# Format dates columns
df['Book checkout'] = pd.to_datetime(df['Book checkout'].str.replace('"', ''), dayfirst = True, errors='coerce')
df['Book Returned'] = pd.to_datetime(df['Book Returned'], dayfirst = True, errors='coerce')

In [62]:
# Drop empty rows
df.dropna(how='all', inplace=True)

In [63]:
# Identify books where returns are before checkouts
df['Incorrect Date'] = df['Book Returned'] < df['Book checkout']

In [64]:
# Standardise book names
df['Books'] = df['Books'].str.strip().str.title()
df.head(10)

Unnamed: 0,Id,Books,Book checkout,Book Returned,Days allowed to borrow,Customer ID,Incorrect Date
0,1,Catcher In The Rye,2023-02-20,2023-02-25,2 weeks,1,False
1,2,Lord Of The Rings The Two Towers,2023-03-24,2023-03-21,2 weeks,2,True
2,3,Lord Of The Rings The Return Of The Kind,2023-03-29,2023-03-25,2 weeks,3,True
3,4,The Hobbit,2023-04-02,2023-03-25,2 weeks,4,True
4,5,Dune,2023-04-02,2023-03-25,2 weeks,5,True
5,6,Little Women,2023-04-02,2023-05-01,2 weeks,1,False
6,7,It,2063-04-10,2023-04-03,2 weeks,6,True
7,8,Misery,2023-04-15,2023-04-03,2 weeks,7,True
8,9,Catch 22,2023-04-15,2023-04-16,2 weeks,7,False
9,10,Animal Farm,2023-04-20,2023-04-24,2 weeks,2,False


In [65]:
# Identify unique book names
unique_books = df['Books'].unique()
print(unique_books)

['Catcher In The Rye' 'Lord Of The Rings The Two Towers'
 'Lord Of The Rings The Return Of The Kind' 'The Hobbit' 'Dune'
 'Little Women' 'It' 'Misery' 'Catch 22' 'Animal Farm' '1984'
 'East Of Eden' 'America Is In The Heart' 'Wuthering Heights' 'Dark Tales'
 'The Bloody Chamber' 'Les Miserables' 'Dracula' 'Frankenstein' nan]


In [66]:
# Fix title spelling
df['Books'] = df['Books'].str.replace('return of the kind', 'Return of the King', case=False)

In [67]:
# Replace all unknown values with 'Unknown'
df = df.fillna(0)

In [68]:
# Convert days allowed to borrow and create due column
df['Book checkout'] = pd.to_datetime(df['Book checkout'])
df['Days allowed to borrow'] = df['Days allowed to borrow'].str.extract('(\d+)').astype(float)*7
df['Due Date'] = df['Book checkout'] + pd.to_timedelta(df['Days allowed to borrow'], unit='D')

In [69]:
df

Unnamed: 0,Id,Books,Book checkout,Book Returned,Days allowed to borrow,Customer ID,Incorrect Date,Due Date
0,1,Catcher In The Rye,2023-02-20,2023-02-25,14.0,1,False,2023-03-06
1,2,Lord Of The Rings The Two Towers,2023-03-24,2023-03-21,14.0,2,True,2023-04-07
2,3,Lord Of The Rings The Return of the King,2023-03-29,2023-03-25,14.0,3,True,2023-04-12
3,4,The Hobbit,2023-04-02,2023-03-25,14.0,4,True,2023-04-16
4,5,Dune,2023-04-02,2023-03-25,14.0,5,True,2023-04-16
5,6,Little Women,2023-04-02,2023-05-01,14.0,1,False,2023-04-16
6,7,It,2063-04-10,2023-04-03,14.0,6,True,2063-04-24
7,8,Misery,2023-04-15,2023-04-03,14.0,7,True,2023-04-29
8,9,Catch 22,2023-04-15,2023-04-16,14.0,7,False,2023-04-29
9,10,Animal Farm,2023-04-20,2023-04-24,14.0,2,False,2023-05-04
