In [1]:
%pip install pandas faker

Collecting faker
  Downloading Faker-30.0.0-py3-none-any.whl.metadata (15 kB)
Downloading Faker-30.0.0-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m32.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: faker
Successfully installed faker-30.0.0
Note: you may need to restart the kernel to use updated packages.


In [21]:
import csv  
import random  
from datetime import datetime, timedelta  
  
# Sample authors  
authors = [  
    "Thomas H. Cormen", "Stuart Russell", "Andrew Hunt", "Bjarne Stroustrup", "Donald Knuth",  
    "Martin Fowler", "Robert C. Martin", "Elon Musk", "Ada Lovelace", "Alan Turing",  
    "Grace Hopper", "Linus Torvalds", "Dennis Ritchie", "Ken Thompson", "John McCarthy",  
    "Peter Norvig", "Margaret Hamilton", "Barbara Liskov", "John von Neumann", "Tim Berners-Lee",  
    "Vinton Cerf", "David Parnas", "John Backus", "Niklaus Wirth", "Guido van Rossum",  
    "James Gosling", "John Carmack", "Brian Kernighan", "Richard Stallman", "Robert Morris",  
    "Steve Wozniak", "Bill Gates", "Larry Page", "Sergey Brin", "Mark Zuckerberg",  
    "Jeff Bezos", "Satya Nadella", "Tim Cook", "Sundar Pichai", "Marissa Mayer",  
    "Sheryl Sandberg", "Susan Wojcicki", "Angela Ahrendts", "Meg Whitman", "Ginni Rometty",  
    "Reshma Saujani", "Padmasree Warrior", "Diane Greene", "Safra Catz", "Jennifer Doudna"  
]  
  
# Function to generate unique titles  
def generate_unique_titles(num_titles):  
    subjects = [  
        "Algorithms", "Artificial Intelligence", "Machine Learning", "Data Science",   
        "Software Engineering", "Cybersecurity", "Computer Networking", "Quantum Computing",  
        "Computer Vision", "Natural Language Processing", "Deep Learning", "Web Development",  
        "Game Development", "Blockchain Technology", "Cloud Computing", "Mobile App Development",  
        "Big Data", "Embedded Systems", "Augmented Reality", "Virtual Reality"  
    ]  
      
    prefixes = [  
        "A Comprehensive Guide to", "Understanding", "The Future of", "Exploring",  
        "Mastering", "The Essentials of", "Advanced Techniques in", "The Complete Handbook of",  
        "Practical Insights into", "Innovations in"  
    ]  
      
    suffixes = [  
        "for Beginners", "for Professionals", "in Practice", "with Python", "from Scratch",  
        "and Applications", "in the Modern World", "and Beyond", "for Everyone",   
        "and Future Trends"  
    ]  
      
    titles = set()  
      
    while len(titles) < num_titles:  
        title = f"{random.choice(prefixes)} {random.choice(subjects)} {random.choice(suffixes)}"  
        titles.add(title)  
      
    return list(titles)  
  
# Generate a unique list of titles  
unique_titles = generate_unique_titles(1500)  # 1500 unique titles  
  
# Publishers  
publishers = [  
    "MIT Press", "Prentice Hall", "O'Reilly Media", "Springer",   
    "Wiley", "Cambridge University Press", "McGraw-Hill",   
    "Elsevier", "Harvard University Press", "Packt Publishing"  
]  
  
# Description templates  
description_templates = [  
    "{title} provides a comprehensive overview of its subject matter. It delves into the intricacies of {title}, making it accessible to both beginners and advanced readers. With thorough explanations and practical examples, it aims to equip you with the knowledge necessary to excel in this field.",  
    "In {title}, the author explores the fundamental principles and methodologies that define the field. This enlightening read not only highlights key concepts but also challenges the reader to think critically about the implications of {title} in the modern world.",  
    "Discover the world of {title} through this enlightening book. It provides a detailed examination of critical theories and frameworks, helping readers to develop a robust understanding of the subject.",  
    "With {title}, readers are invited to embark on an intellectual adventure. This book tackles the fundamental questions surrounding {title} with a fresh perspective.",  
    "Are you ready to dive deep into the world of {title}? This book is designed to take you through the essentials while also expanding your understanding of the nuances involved.",  
    # Add more unique templates to ensure variety  
]  
  
# Function to generate a random date  
def random_date(start, end):  
    return start + timedelta(days=random.randint(0, (end - start).days))  
  
# Function to get the correct edition suffix  
def get_edition_suffix(num):  
    if 10 <= num % 100 <= 20:  # Handle special cases for teens  
        suffix = "th"  
    else:  
        suffix = {1: "st", 2: "nd", 3: "rd"}.get(num % 10, "th")  
    return f"{num}{suffix}"  
  
# Generate CSV  
with open('books.csv', 'w', newline='', encoding='utf-8') as csvfile:  
    fieldnames = ['bookId', 'title', 'author', 'rating', 'description', 'language',  
                  'isbn', 'bookFormat', 'edition', 'pages', 'publisher', 'publishDate',  
                  'firstPublishDate', 'likedPercent', 'price']  
      
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)  
    writer.writeheader()  
      
    for bookId in range(1, 20001):  
        title = random.choice(unique_titles)  
        author = random.choice(authors)  
        rating = round(random.uniform(1, 5), 1)  
          
        # Generate a random description from the templates  
        description = random.choice(description_templates).format(title=title)  
          
        language = "English"  
        isbn = f"978-{random.randint(0, 9)}-{random.randint(0, 999)}-{random.randint(0, 9999)}-{random.randint(0, 9)}"  
        bookFormat = random.choice(["Hardcover", "Paperback", "eBook"])  
        edition = get_edition_suffix(random.randint(1, 5))  # Correct edition suffix  
        pages = random.randint(100, 1200)  
        publisher = random.choice(publishers)  
          
        publishDate = random_date(datetime(1990, 1, 1).date(), datetime(2023, 12, 31).date())  
        firstPublishDate = random_date(datetime(1950, 1, 1).date(), publishDate)  
          
        likedPercent = random.randint(70, 100)  
        price = round(random.uniform(9.99, 99.99), 2)  
  
        writer.writerow({  
            'bookId': bookId,  
            'title': title,  
            'author': author,  
            'rating': rating,  
            'description': description,  
            'language': language,  
            'isbn': isbn,  
            'bookFormat': bookFormat,  
            'edition': edition,  
            'pages': pages,  
            'publisher': publisher,  
            'publishDate': publishDate,  
            'firstPublishDate': firstPublishDate,  
            'likedPercent': likedPercent,  
            'price': price  
        })  
  
print("CSV file 'books.csv' created with 20,000 records.")  


CSV file 'books.csv' created with 20,000 records.
