In [1]:
pip install pandas Faker

Collecting Faker
  Downloading Faker-25.1.0-py3-none-any.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Faker
Successfully installed Faker-25.1.0


In [7]:
import pandas as pd
from faker import Faker
import random

fake = Faker()

# Settings
num_libraries = 5
num_books_per_library = 20
num_copies_per_book = 3
num_users = 50
num_loans = 100
num_holds = 50

# Generate Libraries
libraries = pd.DataFrame({
    'library_id': range(1, num_libraries + 1),
    'name': [fake.company() for _ in range(num_libraries)],
    'location': [fake.address() for _ in range(num_libraries)]
})

# Generate Books
books = pd.DataFrame({
    'book_id': range(1, num_libraries * num_books_per_library + 1),
    'title': [fake.sentence(nb_words=5) for _ in range(num_libraries * num_books_per_library)],
    'author': [fake.name() for _ in range(num_libraries * num_books_per_library)],
    'category': [random.choice(['Self-improvement', 'Biography', 'Fantasy', 'Romance', 'Science Fiction']) for _ in range(num_libraries * num_books_per_library)]
})

# Generate Book Copies
book_copies = pd.DataFrame({
    'copy_id': range(1, num_libraries * num_books_per_library * num_copies_per_book + 1),
    'book_id': sum([[i] * num_copies_per_book for i in books['book_id']], []),
    'library_id': sum([[i] * num_books_per_library * num_copies_per_book for i in range(1, num_libraries + 1)], []),
    'is_available': [random.choice([True, False]) for _ in range(num_libraries * num_books_per_library * num_copies_per_book)]
})

# Check if there are enough unavailable copies before attempting to create loan records
if len(book_copies[book_copies['is_available'] == False]) < num_loans:
    raise ValueError("Not enough unavailable copies to generate the requested number of loans.")

# Generate Users
users = pd.DataFrame({
    'user_id': range(1, num_users + 1),
    'username': [fake.user_name() for _ in range(num_users)],
    'email': [fake.email() for _ in range(num_users)],
    'password_hash': [fake.sha256() for _ in range(num_users)],
    'registered_on': [fake.date_between(start_date='-2y', end_date='today') for _ in range(num_users)]
})

# Generate Loans
loans = pd.DataFrame({
    'loan_id': range(1, num_loans + 1),
    'copy_id': random.choices(book_copies[book_copies['is_available'] == False]['copy_id'].tolist(), k=num_loans),
    'user_id': random.choices(users['user_id'].tolist(), k=num_loans),
    'loan_date': [fake.date_between(start_date='-1y', end_date='today') for _ in range(num_loans)],
    'due_date': [fake.date_between(start_date='+1d', end_date='+15d') for _ in range(num_loans)],
    'return_date': [random.choice([None, fake.date_between(start_date='+1d', end_date='+15d')]) for _ in range(num_loans)]
})

# Generate Holds
holds = pd.DataFrame({
    'hold_id': range(1, num_holds + 1),
    'book_id': random.choices(books['book_id'], k=num_holds),
    'user_id': random.choices(users['user_id'], k=num_holds),
    'hold_placed_date': [fake.date_between(start_date='-6m', end_date='today') for _ in range(num_holds)],
    'expiration_date': [fake.date_between(start_date='+7d', end_date='+14d') for _ in range(num_holds)]
})