<a href="https://colab.research.google.com/github/boritaserey/cosmetic-manufacturer/blob/main/cosmetics_manufacturer_db.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pandas faker openpyxl

Collecting faker
  Downloading Faker-33.0.0-py3-none-any.whl.metadata (15 kB)
Downloading Faker-33.0.0-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m19.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faker
Successfully installed faker-33.0.0


In [None]:
import pandas as pd
from faker import Faker
import sqlite3
from random import randint, uniform
from datetime import datetime, timedelta

In [None]:
#Create data with Faker
fake = Faker()

#Connect to sql database
conn = sqlite3.connect('cosmetics_manufacturer.db')
cursor = conn.cursor()        #cursor fuction is used to retrieve and manipulate data one row at a time

#Step 1: Create Table based on ERD

#Product
cursor.execute('''
CREATE TABLE IF NOT EXISTS Product (
    Product_ID INTEGER PRIMARY KEY,
    Product_Name VARCHAR(30),
    Category VARCHAR(30),
    Size VARCHAR(10),
    Color VARCHAR(30),
    Smell VARCHAR(30),
    Ingredients VARCHAR(255),
    Packaging VARCHAR(30),
    Price DECIMAL(10, 2),
    StockQuantity INTEGER,
    ManufacturingDate DATE,
    Expirationdate DATE,
    Supplier_ID INTERGER
)
''')

#Suppliers
cursor.execute('''
CREATE TABLE IF NOT EXISTS Suppliers (
    Supplier_ID INTEGER PRIMARY KEY,
    Supplier_Name VARCHAR(50),
    Supplier_ContactNumber VARCHAR(15),
    Supplier_Address VARCHAR(100),
    Supplier_City VARCHAR(30),
    Supplier_Country VARCHAR(30),
    ProductsSupplied VARCHAR(50)
)
''')

#Inventory
cursor.execute('''CREATE TABLE IF NOT EXISTS Inventory (
    Inventory_ID INTEGER PRIMARY KEY,
    Product_ID INTEGER,
    WarehouseID INTEGER,
    QuantityInStock INTEGER,
    UnitPrice DECIMAL(15, 2),
    ReorderLevel INTEGER,
    DateReceived DATE
)''')

#Warehouse
cursor.execute('''
CREATE TABLE IF NOT EXISTS Warehouse (
    WarehouseID INTEGER PRIMARY KEY,
    WarehouseLocation VARCHAR(30),
    Capacity_CubicMeters DECIMAL(10, 2),
    Manager_ID INTEGER
)
''')

#Customer
cursor.execute('''
CREATE TABLE IF NOT EXISTS Customer (
    Customer_ID INTEGER PRIMARY KEY,
    Customer_Name VARCHAR(50),
    Customer_Email VARCHAR(50),
    Customer_Address VARCHAR(100),
    Customer_ContactNumber VARCHAR(15),
    LoyaltyStatus ENUM
)
''')

#Order
cursor.execute('''
CREATE TABLE IF NOT EXISTS "Order" (
    Order_ID INTEGER PRIMARY KEY,
    Order_Date DATETIME,
    Customer_ID INTEGER,
    TotalAmount DECIMAL(10, 2),
    ShippingDate DATETIME,
    Order_Status VARCHAR(20)
)
''')

#OrderItem
cursor.execute('''CREATE TABLE IF NOT EXISTS OrderItem (
    OrderItem_ID INTEGER PRIMARY KEY,
    Order_ID INTEGER,
    Product_ID INTEGER,
    Order_Quantity INTEGER,
    UnitPrice DECIMAL(10 ,2)
)''')

#Batch
cursor.execute('''CREATE TABLE IF NOT EXISTS Batch (
    BatchID INTEGER PRIMARY KEY,
    Product_ID INTEGER,
    BatchDate DATE,
    ExpirationDate DATE,
    QuantityProduced INTEGER,
    QualityCheckStatus VARCHAR(20)
)''')

#Manager
cursor.execute('''CREATE TABLE IF NOT EXISTS Manager (
    Manager_ID INTEGER PRIMARY KEY,
    Manager_Name VARCHAR(50),
    Department VARCHAR(30),
    Manager_ContactNumber VARCHAR(15)
)''')

#Step 2: Generate Synthetic Data and Populate Tables

# Define a function to generate a random date
def random_date(start, end):
    return start + timedelta(days=randint(0, (end - start).days))

# Populate Product
products = []
cursor.execute("SELECT COUNT(*) FROM Product")
product_count = cursor.fetchone()[0]
start_id = product_count + 1

for i in range(start_id, start_id + 101):
    products.append((
        i,
        fake.word(),
        fake.word(),
        f"{randint(1, 5)}kg",
        fake.color_name(),
        fake.word(),
        fake.sentence(),
        fake.word(),
        round(uniform(5, 500), 2),
        randint(0, 1000),
        random_date(datetime(2022, 1, 1), datetime(2023, 1, 1)).strftime('%Y-%m-%d'),
        random_date(datetime(2023, 1, 1), datetime(2025, 1, 1)).strftime('%Y-%m-%d'),
        randint(1, 10)
    ))
# Insert data into the Products table
cursor.executemany('''
INSERT INTO Product (Product_ID, Product_Name, Category, Size, Color, Smell, Ingredients, Packaging, Price, StockQuantity, ManufacturingDate, ExpirationDate, Supplier_ID)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', products)

# Populate Suppliers
suppliers = []
cursor.execute("SELECT COUNT(*) FROM Suppliers")
supplier_count = cursor.fetchone()[0]
start_id = supplier_count + 1


for i in range(start_id, start_id + 11):
    suppliers.append((
        i,
        fake.company(),
        fake.phone_number(),
        fake.address(),
        fake.city(),
        fake.country(),
        fake.word()
    ))
# Insert data into the Suppliers table
cursor.executemany('''
INSERT INTO Suppliers (Supplier_ID, Supplier_Name, Supplier_ContactNumber, Supplier_Address, Supplier_City, Supplier_Country, ProductsSupplied)
VALUES (?, ?, ?, ?, ?, ?, ?)
''', suppliers)


# Populate Inventory
cursor.execute("SELECT MAX(Inventory_ID) FROM Inventory") #get the max existing Inventory_ID
max_id = cursor.fetchone()[0]
if max_id is None:
    max_id = 0 #start at 1 if table is empty
start_id = max_id + 1


inventory = []
for i in range(start_id, start_id + 151):  # Assuming 150 inventory records
    inventory.append((
        i,
        randint(1, 100),  # Product_ID (1-100)
        randint(1, 10),  # WarehouseID (1-10)
        randint(0, 1000),  # QuantityInStock (0-1000)
        round(uniform(5, 500), 2),  # UnitPrice (5-500)
        randint(10, 50),  # ReorderLevel (10-50)
        random_date(datetime(2023, 1, 1), datetime(2023, 12, 31)).strftime('%Y-%m-%d')  # DateReceived
    ))
# Insert data into the Inventory table
cursor.executemany('''INSERT INTO Inventory (Inventory_ID, Product_ID, WarehouseID, QuantityInStock, UnitPrice, ReorderLevel, DateReceived) VALUES (?, ?, ?, ?, ?, ?, ?)''', inventory)


#Populate Warehouse
warehouses = []
cursor.execute("SELECT COUNT(*) FROM Warehouse")
warehouse_count = cursor.fetchone()[0]
start_id = warehouse_count + 1

for i in range(start_id, start_id + 11):  # Assuming 10 warehouses
    warehouses.append((
        i,
        fake.city(),  # WarehouseLocation
        round(uniform(100, 1000), 2),  # Capacity_CubicMeters (100 to 1000 cubic meters)
        randint(1, 10)  # Manager_ID (assuming we have 10 managers)
    ))
# Insert data into the Warehouse table
cursor.executemany('''
    INSERT INTO Warehouse (WarehouseID, WarehouseLocation, Capacity_CubicMeters, Manager_ID)
    VALUES (?, ?, ?, ?)
''', warehouses)


# Populate Customers
customers = []
cursor.execute("SELECT COUNT(*) FROM Customer")  # Get the current count of customers
customer_count = cursor.fetchone()[0]
start_id = customer_count + 1

for i in range(start_id, start_id + 51):
    customers.append((
        i,
        fake.name(),
        fake.email(),
        fake.address(),
        fake.phone_number(),
        fake.random_element(elements=('Bronze', 'Silver', 'Gold', 'Platinum'))
    ))
# Insert data into the Customers table
cursor.executemany('''
INSERT INTO Customer (Customer_ID, Customer_Name, Customer_Email, Customer_Address, Customer_ContactNumber, LoyaltyStatus)
VALUES (?, ?, ?, ?, ?, ?)
''', customers)


# Populate Orders
orders = []
# Get the current maximum Order_ID to avoid duplicates
cursor.execute("SELECT MAX(Order_ID) FROM `Order`")
max_order_id = cursor.fetchone()[0]
if max_order_id is None:
    max_order_id = 0  # Start at 1 if the table is empty
start_id = max_order_id + 1


for i in range(start_id, start_id + 101):
    orders.append((
        i,
        random_date(datetime(2023, 1, 1), datetime(2023, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),
        randint(1, 50),
        round(uniform(20, 2000), 2),
        random_date(datetime(2023, 1, 1), datetime(2024, 1, 1)).strftime('%Y-%m-%d %H:%M:%S'),
        fake.random_element(elements=('Pending', 'Shipped', 'Delivered', 'Cancelled'))
    ))
# Insert data into the Orders table
cursor.executemany('''
INSERT INTO "Order" (Order_ID, Order_Date, Customer_ID, TotalAmount, ShippingDate, Order_Status)
VALUES (?, ?, ?, ?, ?, ?)
''', orders)

#populate OrderItem
order_items = []
# Get the current maximum OrderItem_ID to avoid duplicates
cursor.execute("SELECT MAX(OrderItem_ID) FROM OrderItem")
max_order_item_id = cursor.fetchone()[0]
if max_order_item_id is None:
    max_order_item_id = 0  # Start at 1 if the table is empty
start_id = max_order_item_id + 1

for i in range(start_id, start_id + 201):  # Assuming 200 order items
    order_items.append((
        i,
        randint(1, 100),  # Order_ID (1-100)
        randint(1, 100),  # Product_ID (1-100)
        randint(1, 20),  # Order_Quantity (1-20)
        round(uniform(5, 500), 2)  # UnitPrice (5-500)
    ))
# Insert data into the OrderItem table
cursor.executemany('''INSERT INTO OrderItem (OrderItem_ID, Order_ID, Product_ID, Order_Quantity, UnitPrice) VALUES (?, ?, ?, ?, ?)''', order_items)

#Populate Batch
batches = []
cursor.execute("SELECT MAX(BatchID) FROM Batch")
max_batch_id = cursor.fetchone()[0]
if max_batch_id is None:
    max_batch_id = 0  # Start at 1 if the table is empty
start_id = max_batch_id + 1


for i in range(start_id, start_id + 101):  # Assuming 100 batches
    batches.append((
        i,
        randint(1, 100),  # Product_ID (1-100)
        random_date(datetime(2023, 1, 1), datetime(2023, 6, 30)).strftime('%Y-%m-%d'),  # BatchDate
        random_date(datetime(2024, 1, 1), datetime(2025, 12, 31)).strftime('%Y-%m-%d'),  # ExpirationDate
        randint(500, 5000),  # QuantityProduced (500-5000)
        fake.random_element(elements=('Pass', 'Fail'))  # QualityCheckStatus
    ))
# Insert data into the Batches table
cursor.executemany('''INSERT INTO Batch (BatchID, Product_ID, BatchDate, ExpirationDate, QuantityProduced, QualityCheckStatus) VALUES (?, ?, ?, ?, ?, ?)''', batches)


# Populate Manager
managers = []
# Get the current maximum Manager_ID to avoid duplicates
cursor.execute("SELECT MAX(Manager_ID) FROM Manager")
max_manager_id = cursor.fetchone()[0]
if max_manager_id is None:
    max_manager_id = 0  # Start at 1 if the table is empty
start_id = max_manager_id + 1

for i in range(start_id, start_id + 11):  # Assuming 10 managers
    managers.append((
        i,
        fake.name(),  # Manager_Name
        fake.job(),  # Department
        fake.phone_number()  # Manager_ContactNumber
    ))
# Insert data into the Manager table
cursor.executemany('''INSERT INTO Manager (Manager_ID, Manager_Name, Department, Manager_ContactNumber) VALUES (?, ?, ?, ?)''', managers)



print("Database setup and data population complete!")

Database setup and data population complete!


In [None]:
import os
print(os.getcwd())

/content


In [None]:
from google.colab import drive
drive.mount('/content/drive')
conn = sqlite3.connect('/content/drive/My Drive/cosmetics_manufacturer.db')


Mounted at /content/drive


In [None]:
import sqlite3
from google.colab import drive

drive.mount('/content/drive')  # Mount Google Drive

conn = sqlite3.connect('/content/drive/My Drive/cosmetics_manu.db')
cursor = conn.cursor()

# Your table creation and data insertion code here...

conn.commit()  # Commit changes to the database file
conn.close()  # Close the connection

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from google.colab import files
files.download('cosmetics_manufacturer.db')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import os
print("Current working directory:", os.getcwd())

Current working directory: /content
