In [4]:
import csv
import psycopg2
import os
from faker import Faker
import random
from datetime import datetime, timedelta
from tabulate import tabulate  # Importing tabulate for printing tables

# Initialize Faker
fake = Faker()

# Database connection parameters (adjust as necessary)
db_params = {
    'dbname': 'customer_data',
    'user': 'postgres',
    'password': 'password',
    'host': 'localhost',  # or your db host
    'port': '5432'
}

# Uncomment the following lines if you decide to connect to the database later
# try:
#     conn = psycopg2.connect(**db_params)
#     cur = conn.cursor()
# except Exception as e:
#     print(f"Error connecting to the database: {e}")
#     exit(1)

# Drop existing tables if they exist
# Commented out since we're not interacting with the database
# tables = ["posts", "threads", "homework", "discussions", "students"]
# for table in tables:
#     try:
#         cur.execute(f"DROP TABLE IF EXISTS {table} CASCADE;")
#     except Exception as e:
#         print(f"Error dropping table {table}: {e}")
# conn.commit()

# Define table schemas (commented out)
# cur.execute("""
#     CREATE TABLE students (
#         student_id SERIAL PRIMARY KEY,
#         first_name VARCHAR(50),
#         last_name VARCHAR(50),
#         email VARCHAR(100),
#         major VARCHAR(100),
#         enrollment_year INT
#     );
# """)

# cur.execute("""
#     CREATE TABLE homework (
#         hw_id SERIAL PRIMARY KEY,
#         hw_name VARCHAR(255),
#         hw_content TEXT,
#         due_date DATE,
#         grade INT,
#         course_name VARCHAR(255),
#         student_id INT REFERENCES students(student_id)
#     );
# """)

# cur.execute("""
#     CREATE TABLE threads (
#         thread_id SERIAL PRIMARY KEY,
#         title VARCHAR(255),
#         created_by INT REFERENCES students(student_id),
#         creation_date DATE
#     );
# """)

# cur.execute("""
#     CREATE TABLE posts (
#         post_id SERIAL PRIMARY KEY,
#         thread_id INT REFERENCES threads(thread_id),
#         author_id INT REFERENCES students(student_id),
#         content TEXT,
#         post_date DATE
#     );
# """)
# conn.commit()

# Create a data directory if it doesn't exist
data_dir = 'data'
if not os.path.exists(data_dir):
    os.makedirs(data_dir)

# Generate synthetic data for `students`
def generate_students(num_students=20):
    majors = ["Computer Science", "Mathematics", "Literature", "Physics", "Biology",
              "Chemistry", "History", "Economics", "Psychology", "Engineering"]
    students = []
    for i in range(1, num_students + 1):
        first_name = fake.first_name()
        last_name = fake.last_name()
        email = f"{first_name.lower()}.{last_name.lower()}@fakeuniversity.edu"
        major = random.choice(majors)
        enrollment_year = random.randint(2018, 2023)
        students.append([i, first_name, last_name, email, major, enrollment_year])
    return students

students_data = generate_students()

# Generate synthetic data for `homework`
def generate_homework(students, num_assignments=5):
    course_names = ["Intro to Programming", "Calculus I", "Literature Studies",
                   "Physics 101", "Advanced Mathematics", "Biology Basics",
                   "Chemistry Fundamentals", "World History", "Economics Principles",
                   "Psychology Introduction"]
    submission_types = ["Essay", "Project", "Quiz", "Lab Report", "Programming Assignment"]
    homework = []
    hw_id = 1
    for student in students:
        student_id = student[0]
        course = random.choice(course_names)
        for a in range(1, num_assignments + 1):
            hw_name = f"Assignment {a}"
            hw_content = f"Complete {submission_types[a % len(submission_types)].lower()} for {course}."
            # Assign due dates within the next 60 days
            due_date = fake.date_between(start_date='today', end_date='+60d')
            # Randomly decide if the homework is graded or not
            grade = random.choice([None, random.randint(60, 100)])
            homework.append([hw_id, hw_name, hw_content, due_date.strftime('%Y-%m-%d'),
                             grade, course, student_id])
            hw_id += 1
    return homework

homework_data = generate_homework(students_data)

# Generate synthetic data for `threads` and `posts`
def generate_discussions(students, num_threads=10, max_posts_per_thread=10):
    threads = []
    posts = []
    thread_id = 1
    post_id = 1
    for _ in range(num_threads):
        title = fake.sentence(nb_words=6).rstrip('.')
        created_by = random.choice(students)[0]
        creation_date = fake.date_between(start_date='-30d', end_date='today')
        threads.append([thread_id, title, created_by, creation_date.strftime('%Y-%m-%d')])
        
        # Generate posts for the thread
        num_posts = random.randint(2, max_posts_per_thread)
        for _ in range(num_posts):
            author_id = random.choice(students)[0]
            content = fake.paragraph(nb_sentences=3)
            # Ensure post_date is after thread creation_date
            post_date = fake.date_between(start_date=creation_date, end_date='today')
            posts.append([post_id, thread_id, author_id, content, post_date.strftime('%Y-%m-%d')])
            post_id += 1
        thread_id += 1
    return threads, posts

threads_data, posts_data = generate_discussions(students_data)

# Helper function to write CSV files
def write_csv(filename, headers, data):
    with open(filename, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(headers)
        writer.writerows(data)

# Write data to CSV files
write_csv(os.path.join(data_dir, 'students.csv'),
          ["student_id", "first_name", "last_name", "email", "major", "enrollment_year"],
          students_data)

write_csv(os.path.join(data_dir, 'homework.csv'),
          ["hw_id", "hw_name", "hw_content", "due_date", "grade", "course_name", "student_id"],
          homework_data)

write_csv(os.path.join(data_dir, 'threads.csv'),
          ["thread_id", "title", "created_by", "creation_date"],
          threads_data)

write_csv(os.path.join(data_dir, 'posts.csv'),
          ["post_id", "thread_id", "author_id", "content", "post_date"],
          posts_data)

# Commented out the database uploading sections
# def load_csv_into_table(conn, table_name, csv_file, columns):
#     try:
#         with open(csv_file, 'r', encoding='utf-8') as f:
#             next(f)  # Skip header line
#             cur = conn.cursor()
#             copy_sql = f"COPY {table_name} ({', '.join(columns)}) FROM STDIN WITH CSV"
#             cur.copy_expert(copy_sql, f)
#             conn.commit()
#     except Exception as e:
#         print(f"Error loading data into {table_name}: {e}")
#         conn.rollback()

# # Load data into the `students` table
# load_csv_into_table(conn, "students", os.path.join(data_dir, "students.csv"),
#                     ["student_id", "first_name", "last_name", "email", "major", "enrollment_year"])

# # Load data into the `homework` table
# load_csv_into_table(conn, "homework", os.path.join(data_dir, "homework.csv"),
#                     ["hw_id", "hw_name", "hw_content", "due_date", "grade", "course_name", "student_id"])

# # Load data into the `threads` table
# load_csv_into_table(conn, "threads", os.path.join(data_dir, "threads.csv"),
#                     ["thread_id", "title", "created_by", "creation_date"])

# # Load data into the `posts` table
# load_csv_into_table(conn, "posts", os.path.join(data_dir, "posts.csv"),
#                     ["post_id", "thread_id", "author_id", "content", "post_date"])

# Print the generated data using tabulate
print("\nSTUDENTS TABLE:")
print(tabulate(students_data, headers=["student_id", "first_name", "last_name", "email", "major", "enrollment_year"], tablefmt="psql"))

print("\nHOMEWORK TABLE:")
print(tabulate(homework_data, headers=["hw_id", "hw_name", "hw_content", "due_date", "grade", "course_name", "student_id"], tablefmt="psql"))

print("\nTHREADS TABLE:")
print(tabulate(threads_data, headers=["thread_id", "title", "created_by", "creation_date"], tablefmt="psql"))

print("\nPOSTS TABLE:")
print(tabulate(posts_data, headers=["post_id", "thread_id", "author_id", "content", "post_date"], tablefmt="psql"))

# Close the database connection if it was opened
# cur.close()
# conn.close()

print("\nData generation complete. Data has been printed above and saved to CSV files in the 'data' directory.")



STUDENTS TABLE:
+--------------+--------------+-------------+--------------------------------------+-------------+-------------------+
|   student_id | first_name   | last_name   | email                                | major       |   enrollment_year |
|--------------+--------------+-------------+--------------------------------------+-------------+-------------------|
|            1 | Paul         | Keith       | paul.keith@fakeuniversity.edu        | Physics     |              2022 |
|            2 | Mary         | Webster     | mary.webster@fakeuniversity.edu      | Engineering |              2018 |
|            3 | Lacey        | Cohen       | lacey.cohen@fakeuniversity.edu       | History     |              2019 |
|            4 | Rhonda       | Long        | rhonda.long@fakeuniversity.edu       | Physics     |              2022 |
|            5 | Cassandra    | Welch       | cassandra.welch@fakeuniversity.edu   | Biology     |              2023 |
|            6 | April        |

In [3]:
!pip install faker

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting faker
  Downloading Faker-33.1.0-py3-none-any.whl.metadata (15 kB)
Downloading Faker-33.1.0-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m109.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faker
Successfully installed faker-33.1.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [10]:
import csv
import os
from faker import Faker
import random
from datetime import datetime, timedelta
from tabulate import tabulate

fake = Faker()

data_dir = 'data'
if not os.path.exists(data_dir):
    os.makedirs(data_dir)

def generate_students(num_students=20):
    majors = [
        "Computer Science", "Mathematics", "Literature", "Physics", "Biology",
        "Chemistry", "History", "Economics", "Psychology", "Engineering"
    ]
    students = []
    for i in range(1, num_students + 1):
        first_name = fake.first_name()
        last_name = fake.last_name()
        email = f"{first_name.lower()}.{last_name.lower()}@fakeuniversity.edu"
        major = random.choice(majors)
        enrollment_year = random.randint(2018, 2023)
        students.append([
            i, first_name, last_name, email, major, enrollment_year
        ])
    return students

students_data = generate_students()

def generate_homework(students, num_assignments=5):
    course_name = "Classical Mechanics"
    submission_types = ["Essay", "Project", "Quiz", "Lab Report", "Problem Set"]
    homework = []
    hw_id = 1
    for student in students:
        student_id = student[0]
        for a in range(1, num_assignments + 1):
            hw_name = f"Homework {a}"
            hw_content = f"Complete the {submission_types[a % len(submission_types)].lower()} for {course_name}."
            due_date = fake.date_between(start_date='today', end_date='+60d')
            grade = random.choice([None, random.randint(60, 100)])
            homework.append([
                hw_id, hw_name, hw_content, due_date.strftime('%Y-%m-%d'),
                grade, course_name, student_id
            ])
            hw_id += 1
    return homework

homework_data = generate_homework(students_data)

def generate_hardcoded_discussions():
    discussions = []
    discussion_id = 1

    threads = [
        {
            "title": "Understanding Newton's Laws",
            "author_id": 5,
            "content": "I'd like to start a discussion on Newton's Laws of Motion. Specifically, how the third law applies in everyday scenarios.",
            "date": "2024-11-15"
        },
        {
            "title": "Conservation of Momentum",
            "author_id": 2,
            "content": "Let's delve into the conservation of momentum. How does it play a role in collision scenarios?",
            "date": "2024-11-10"
        },
        {
            "title": "Energy Conservation in Mechanics",
            "author_id": 10,
            "content": "Discussing the principles of energy conservation within mechanical systems. Any insights or examples?",
            "date": "2024-11-08"
        },
        {
            "title": "Thermodynamics Basics",
            "author_id": 12,
            "content": "Starting a thread on the basics of thermodynamics. How do the laws apply to real-world engineering problems?",
            "date": "2024-11-12"
        },
        {
            "title": "Quantum Mechanics Introduction",
            "author_id": 4,
            "content": "Introducing the fundamentals of Quantum Mechanics. How does it differ from classical mechanics in explaining particle behavior?",
            "date": "2024-11-14"
        },
        {
            "title": "Relativity Theory Overview",
            "author_id": 7,
            "content": "A brief overview of Einstein's Theory of Relativity. How does it impact our understanding of space and time?",
            "date": "2024-11-13"
        },
        {
            "title": "Fluid Dynamics Applications",
            "author_id": 9,
            "content": "Exploring the applications of fluid dynamics in modern engineering. Any interesting projects or case studies?",
            "date": "2024-11-16"
        }
    ]

    replies = {
        1: [
            {
                "author_id": 3,
                "content": "Newton's third law is evident when you push against a wall. The wall pushes back with equal force, even though it doesn't move.",
                "date": "2024-11-16"
            },
            {
                "author_id": 7,
                "content": "Absolutely! Another example is when swimming. Your hands push water backward, and the water pushes you forward.",
                "date": "2024-11-17"
            }
        ],
        2: [
            {
                "author_id": 8,
                "content": "In car collisions, conservation of momentum helps in understanding the resulting movements of the vehicles involved.",
                "date": "2024-11-11"
            },
            {
                "author_id": 6,
                "content": "Yes, and it's also crucial in analyzing rocket propulsion where expelled gases carry momentum.",
                "date": "2024-11-12"
            }
        ],
        3: [
            {
                "author_id": 1,
                "content": "Energy conservation allows us to predict how energy transforms from potential to kinetic in a roller coaster ride.",
                "date": "2024-11-09"
            },
            {
                "author_id": 9,
                "content": "Exactly, and it also helps in designing efficient mechanical systems by minimizing energy loss.",
                "date": "2024-11-10"
            }
        ],
        4: [
            {
                "author_id": 11,
                "content": "Thermodynamics principles are essential for understanding how engines convert heat into work efficiently.",
                "date": "2024-11-13"
            },
            {
                "author_id": 5,
                "content": "They also play a role in HVAC systems, ensuring optimal temperature control in buildings.",
                "date": "2024-11-14"
            }
        ],
        5: [
            {
                "author_id": 2,
                "content": "Quantum Mechanics introduces probabilities in particle positions, which is a significant shift from deterministic classical mechanics.",
                "date": "2024-11-15"
            },
            {
                "author_id": 14,
                "content": "It's fascinating how particles can exist in multiple states simultaneously, thanks to superposition.",
                "date": "2024-11-16"
            }
        ],
        6: [
            {
                "author_id": 10,
                "content": "Relativity has revolutionized our understanding of gravity, showing it as the curvature of spacetime caused by mass.",
                "date": "2024-11-14"
            },
            {
                "author_id": 13,
                "content": "And the concept of time dilation has profound implications for high-speed travel and cosmology.",
                "date": "2024-11-15"
            }
        ],
        7: [
            {
                "author_id": 4,
                "content": "Fluid dynamics is crucial in designing efficient pipelines and understanding weather patterns.",
                "date": "2024-11-17"
            },
            {
                "author_id": 12,
                "content": "Absolutely, and it's also vital in aerospace engineering for optimizing aircraft and spacecraft designs.",
                "date": "2024-11-18"
            }
        ]
    }

    for thread in threads:
        discussions.append([
            discussion_id,
            None,
            thread["title"],
            thread["author_id"],
            thread["content"],
            thread["date"]
        ])
        current_thread_id = discussion_id
        discussion_id += 1

        for reply in replies.get(current_thread_id, []):
            discussions.append([
                discussion_id,
                current_thread_id,
                None,
                reply["author_id"],
                reply["content"],
                reply["date"]
            ])
            discussion_id += 1

    return discussions

discussions_data = generate_hardcoded_discussions()

def write_csv(filename, headers, data):
    with open(filename, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(headers)
        writer.writerows(data)

write_csv(
    os.path.join(data_dir, 'students.csv'),
    ["StudentID", "FirstName", "LastName", "Email", "Major", "EnrollmentYear"],
    students_data
)

write_csv(
    os.path.join(data_dir, 'homework.csv'),
    ["HomeworkID", "HomeworkName", "HomeworkContent", "DueDate", "Grade", "CourseName", "StudentID"],
    homework_data
)

write_csv(
    os.path.join(data_dir, 'discussions.csv'),
    ["DiscussionID", "ParentDiscussionID", "Title", "AuthorID", "Content", "DateCreated"],
    discussions_data
)

print("\nSTUDENTS TABLE:")
print(tabulate(
    students_data,
    headers=["StudentID", "FirstName", "LastName", "Email", "Major", "EnrollmentYear"],
    tablefmt="psql"
))

print("\nHOMEWORK TABLE:")
print(tabulate(
    homework_data,
    headers=["HomeworkID", "HomeworkName", "HomeworkContent", "DueDate", "Grade", "CourseName", "StudentID"],
    tablefmt="psql"
))

print("\nDISCUSSIONS TABLE:")
print(tabulate(
    discussions_data,
    headers=["DiscussionID", "ParentDiscussionID", "Title", "AuthorID", "Content", "DateCreated"],
    tablefmt="psql"
))

print("\nData generation complete. Data has been printed above and saved to CSV files in the 'data' directory.")



STUDENTS TABLE:
+-------------+-------------+------------+----------------------------------------+------------------+------------------+
|   StudentID | FirstName   | LastName   | Email                                  | Major            |   EnrollmentYear |
|-------------+-------------+------------+----------------------------------------+------------------+------------------|
|           1 | Pamela      | Murray     | pamela.murray@fakeuniversity.edu       | Computer Science |             2022 |
|           2 | Mary        | Mcdaniel   | mary.mcdaniel@fakeuniversity.edu       | Chemistry        |             2018 |
|           3 | Bradley     | Gibson     | bradley.gibson@fakeuniversity.edu      | Mathematics      |             2022 |
|           4 | Lisa        | Robinson   | lisa.robinson@fakeuniversity.edu       | Physics          |             2022 |
|           5 | Christopher | Rogers     | christopher.rogers@fakeuniversity.edu  | Literature       |             2020 |
|      