In [1]:
import pandas as pd
import numpy as np
import random
import zipfile
from datetime import datetime, timedelta

# ----------------------------
# Helper generate tanggal
# ----------------------------
def rand_dates(n, start="2023-01-01", end="2024-12-31"):
    start_dt = datetime.strptime(start, "%Y-%m-%d")
    end_dt = datetime.strptime(end, "%Y-%m-%d")
    return [
        start_dt + timedelta(days=random.randint(0, (end_dt - start_dt).days))
        for _ in range(n)
    ]

N = 500

# ----------------------------
# Tabel Master
# ----------------------------

master_pit = pd.DataFrame({
    "pit_id": range(1, N + 1),
    "nama_pit": [f"Pit {i}" for i in range(1, N + 1)],
    "area": [random.choice(["Utara", "Selatan", "Timur", "Barat"]) for _ in range(N)],
    "status": [random.choice(["Aktif", "Non-Aktif"]) for _ in range(N)]
})

master_equipment = pd.DataFrame({
    "equipment_id": range(1, N + 1),
    "jenis": [random.choice(["Excavator", "Dump Truck", "Loader"]) for _ in range(N)],
    "kapasitas_ton": np.random.randint(20, 200, N),
    "status": [random.choice(["Operasional", "Maintenance"]) for _ in range(N)],
    "tanggal_beli": rand_dates(N)
})

master_buyer = pd.DataFrame({
    "buyer_id": range(1, N + 1),
    "nama": [f"Buyer {i}" for i in range(1, N + 1)],
    "negara": [random.choice(["India", "China", "Japan", "Korea"]) for _ in range(N)],
    "kontrak_aktif": [random.choice(["Ya", "Tidak"]) for _ in range(N)]
})

master_supplier = pd.DataFrame({
    "supplier_id": range(1, N + 1),
    "nama": [f"Supplier {i}" for i in range(1, N + 1)],
    "kategori": [random.choice(["Bahan Bakar", "Sparepart", "Jasa"]) for _ in range(N)]
})

# ----------------------------
# Tabel biaya_operasional
# ----------------------------

biaya_operasional = pd.DataFrame({
    "cost_id": range(1, N + 1),
    "tanggal": rand_dates(N),
    "kategori_biaya": [random.choice(["BBM", "Perbaikan", "Gaji", "Overhead"]) for _ in range(N)],
    "sub_kategori": [random.choice(["A", "B", "C"]) for _ in range(N)],
    "pit_id": np.random.randint(1, N + 1, N),
    "equipment_id": np.random.randint(1, N + 1, N),
    "biaya": np.random.randint(1_000_000, 10_000_000, N),
    "mata_uang": ["IDR"] * N,
    "keterangan": ["-"] * N
})

# ----------------------------
# Tabel produksi
# ----------------------------

produksi = pd.DataFrame({
    "prod_id": range(1, N + 1),
    "tanggal": rand_dates(N),
    "pit_id": np.random.randint(1, N + 1, N),
    "equipment_id": np.random.randint(1, N + 1, N),
    "volume_ton": np.random.randint(100, 1000, N),
    "shift": [random.choice(["Pagi", "Siang", "Malam"]) for _ in range(N)],
    "jenis_produksi": [random.choice(["Ore", "Waste"]) for _ in range(N)],
    "biaya_operasional_terkait": np.random.randint(1_000_000, 5_000_000, N),
    "keterangan": ["-"] * N
})

# ----------------------------
# Tabel ore_grade
# ----------------------------

ore_grade = pd.DataFrame({
    "grade_id": range(1, N + 1),
    "tanggal_sampel": rand_dates(N),
    "pit_id": np.random.randint(1, N + 1, N),
    "lokasi_sampel": [f"Lokasi {i}" for i in range(1, N + 1)],
    "bench": np.random.randint(1, 20, N),
    "blok": np.random.randint(1, 50, N),
    "grade_pct": np.random.uniform(0.1, 2.5, N).round(2),
    "volume_terkait_ton": np.random.randint(50, 500, N),
    "metode_sampling": [random.choice(["Manual", "Drill"]) for _ in range(N)],
    "analis_lab": [f"Analis {i}" for i in range(1, N + 1)],
    "keterangan": ["-"] * N
})

# ----------------------------
# Tabel keselamatan_trifr
# ----------------------------

keselamatan = pd.DataFrame({
    "incident_id": range(1, N + 1),
    "tanggal": rand_dates(N),
    "jam_kerja_total": np.random.randint(1000, 5000, N),
    "jumlah_insiden": np.random.randint(0, 5, N),
    "kategori_insiden": [random.choice(["Ringan", "Sedang", "Berat"]) for _ in range(N)],
    "tingkat_keparahan": [random.choice(["Low", "Medium", "High"]) for _ in range(N)],
    "lokasi": [f"Area {i%50}" for i in range(N)],
    "shift": [random.choice(["Pagi", "Siang", "Malam"]) for _ in range(N)],
    "karyawan_terlibat": [random.choice(["1", "2", "0"]) for _ in range(N)],
    "status_penanganan": [random.choice(["Selesai", "Proses"]) for _ in range(N)],
    "keterangan": ["-"] * N
})

# ----------------------------
# Tabel kepatuhan_lingkungan
# ----------------------------

kepatuhan_lingkungan = pd.DataFrame({
    "audit_id": range(1, N + 1),
    "tanggal": rand_dates(N),
    "jenis_audit": [random.choice(["Air", "Udara", "Tanah"]) for _ in range(N)],
    "parameter": [random.choice(["pH", "CO2", "Pb"]) for _ in range(N)],
    "nilai_ukur": np.random.uniform(0.1, 10.0, N).round(2),
    "batas_regulasi": np.random.uniform(1.0, 8.0, N).round(2),
    "status_hasil": [random.choice(["Lulus", "Tidak Lulus"]) for _ in range(N)],
    "lokasi": [f"Lokasi {i%40}" for i in range(N)],
    "auditor": [f"Auditor {i}" for i in range(1, N + 1)],
    "tindakan_diperlukan": ["-"] * N,
    "keterangan": ["-"] * N
})

# ----------------------------
# Tabel arus_kas_operasional
# ----------------------------

arus_kas_operasional = pd.DataFrame({
    "cash_id": range(1, N + 1),
    "tanggal": rand_dates(N),
    "jenis_transaksi": [random.choice(["Masuk", "Keluar"]) for _ in range(N)],
    "kategori": [random.choice(["Penjualan", "Operasional", "Lainnya"]) for _ in range(N)],
    "nilai": np.random.randint(1_000_000, 20_000_000, N),
    "mata_uang": ["IDR"] * N,
    "buyer_id": np.random.randint(1, N + 1, N),
    "kontrak_id": np.random.randint(1000, 2000, N),
    "keterangan": ["-"] * N
})

# ----------------------------
# Tabel capex
# ----------------------------

capex = pd.DataFrame({
    "capex_id": range(1, N + 1),
    "tanggal": rand_dates(N),
    "kategori": [random.choice(["Pembelian", "Upgrade"]) for _ in range(N)],
    "equipment_id": np.random.randint(1, N + 1, N),
    "nilai": np.random.randint(5_000_000, 50_000_000, N),
    "umur_ekonomis_tahun": np.random.randint(3, 10, N),
    "mata_uang": ["IDR"] * N,
    "keterangan": ["-"] * N
})

# ----------------------------
# Tabel working_capital
# ----------------------------

working_capital = pd.DataFrame({
    "wc_id": range(1, N + 1),
    "tanggal": rand_dates(N),
    "jenis": [random.choice(["Piutang", "Hutang"]) for _ in range(N)],
    "nilai": np.random.randint(1_000_000, 15_000_000, N),
    "mata_uang": ["IDR"] * N,
    "buyer_id": np.random.randint(1, N + 1, N),
    "supplier_id": np.random.randint(1, N + 1, N),
    "status": [random.choice(["Aktif", "Non-Aktif"]) for _ in range(N)],
    "keterangan": ["-"] * N
})

# ----------------------------
# Simpan CSV + ZIP
# ----------------------------

files = {
    "master_pit.csv": master_pit,
    "master_equipment.csv": master_equipment,
    "master_buyer.csv": master_buyer,
    "master_supplier.csv": master_supplier,
    "biaya_operasional.csv": biaya_operasional,
    "produksi.csv": produksi,
    "ore_grade.csv": ore_grade,
    "keselamatan_trifr.csv": keselamatan,
    "kepatuhan_lingkungan.csv": kepatuhan_lingkungan,
    "arus_kas_operasional.csv": arus_kas_operasional,
    "capex.csv": capex,
    "working_capital.csv": working_capital
}

zip_name = "dataset_proyek_analisis.zip"
with zipfile.ZipFile(zip_name, "w", zipfile.ZIP_DEFLATED) as z:
    for name, df in files.items():
        df.to_csv(name, index=False)
        z.write(name)

print("Selesai. File ZIP berhasil dibuat:", zip_name)


Selesai. File ZIP berhasil dibuat: dataset_proyek_analisis.zip
