In [7]:
import pandas as pd
import os

# =============================================================================
# TAHAP 0: FUNGSI HELPER UNTUK CLEANING
# =============================================================================

def clean_sheet_advanced(df):
    """
    Fungsi untuk cleaning sheet dengan aturan:
    1. Hapus 3 baris pertama
    2. Baris pertama setelah hapus 3 baris = header kosong, isi dengan 'No' dan 'Jenis Kendaraan'
    3. Hapus dari baris 'Vehicle Type' sampai bawah
    """
    # Hapus 3 baris pertama
    df_cleaned = df.iloc[3:].copy()
    df_cleaned = df_cleaned.reset_index(drop=True)
    
    # Cari dan hapus dari baris 'Vehicle Type' sampai bawah
    vehicle_type_row = None
    for idx, row in df_cleaned.iterrows():
        # Cek semua kolom di baris ini
        for col in df_cleaned.columns:
            cell_value = str(row[col]).strip().lower()
            if 'vehicle type' in cell_value:
                vehicle_type_row = idx
                break
        if vehicle_type_row is not None:
            break
    
    # Potong dataframe sampai sebelum baris 'Vehicle Type'
    if vehicle_type_row is not None:
        df_cleaned = df_cleaned.iloc[:vehicle_type_row]
        print(f"     🗑️  Dihapus dari baris {vehicle_type_row} (Vehicle Type) sampai bawah")
    
    # Reset index lagi setelah potong
    df_cleaned = df_cleaned.reset_index(drop=True)
    
    # Set nilai untuk 2 kolom pertama di baris pertama (header)
    if len(df_cleaned) > 0 and len(df_cleaned.columns) >= 2:
        df_cleaned.iloc[0, 0] = 'No'
        df_cleaned.iloc[0, 1] = 'Jenis Kendaraan'
    
    return df_cleaned

def process_daily_data(tanggal_list, minggu_label, base_path):
    """Fungsi untuk memproses data harian dan menggabungkannya"""
    
    nama_checkpoint = [
        "diponegoro", "imam bonjol", "a yani", "gajah mada", "sudirman",
        "brantas", "patimura", "trunojoyo", "arumdalu", "mojorejo"
    ]
    
    # Tahap 1: Cleaning individual files
    print(f"\n=== CLEANING DATA HARIAN {minggu_label} ===")
    
    for tanggal in tanggal_list:
        print(f"\n📅 Processing tanggal {tanggal} Juli...")
        
        file_path = rf"{base_path}\tanggal {tanggal} juli.xlsx"
        
        try:
            # Baca semua sheet tanpa header otomatis
            dfs = pd.read_excel(file_path, sheet_name=None, header=None)
            
            # Debug: tampilkan nama sheet
            print(f"  🔍 Sheet names found: {list(dfs.keys())}")
            
            # Mapping nama sheet ke checkpoint
            mapping = {f"{i+1}. {tanggal} Juli": nama_checkpoint[i] for i in range(10)}
            
            # Dictionary untuk menyimpan hasil cleaning per sheet
            cleaned_sheets = {}
            
            for sheet_name, df in dfs.items():
                if sheet_name in mapping:
                    print(f"  🔧 Cleaning sheet: {sheet_name}")
                    
                    # Cleaning dengan aturan baru
                    df_cleaned = clean_sheet_advanced(df)
                    
                    # Simpan dengan nama sheet asli (1. 4 Juli, 2. 4 Juli, dst)
                    cleaned_sheets[sheet_name] = df_cleaned
                    
                    print(f"     ✅ Baris setelah cleaning: {len(df_cleaned)}")
                    
                else:
                    print(f"  ⚠️  Sheet '{sheet_name}' diabaikan")
            
            # Simpan ke file Excel dengan multiple sheets (10 sheets terpisah)
            if cleaned_sheets:
                output_name = rf"{base_path}\dataset_cleaned_{tanggal}juli_{minggu_label.lower()}.xlsx"
                
                with pd.ExcelWriter(output_name, engine='openpyxl') as writer:
                    for sheet_name, df_cleaned in cleaned_sheets.items():
                        # Simpan dengan nama sheet asli, tanpa index dan tanpa header default
                        df_cleaned.to_excel(writer, sheet_name=sheet_name, index=False, header=False)
                
                print(f"  💾 Saved: {output_name}")
                print(f"     📊 Total sheets: {len(cleaned_sheets)}")
                
            else:
                print(f"  ❌ No valid data for tanggal {tanggal}")
        
        except Exception as e:
            print(f"  ❌ Error processing tanggal {tanggal}: {str(e)}")
    
    # Tahap 2: Gabungkan menjadi dataset mingguan
    print(f"\n🔄 Memulai penggabungan dataset {minggu_label}...")
    
    weekly_df_list = []
    
    for tanggal in tanggal_list:
        print(f"\n📅 Memproses tanggal {tanggal} Juli untuk penggabungan...")
        
        file_path = rf"{base_path}\dataset_cleaned_{tanggal}juli_{minggu_label.lower()}.xlsx"
        
        try:
            # Baca semua sheet dari file hasil cleaning
            dfs_cleaned = pd.read_excel(file_path, sheet_name=None, header=None)
            
            # Mapping sheet ke checkpoint
            mapping = {f"{i+1}. {tanggal} Juli": nama_checkpoint[i] for i in range(10)}
            
            daily_df_list = []
            
            for sheet_name, df in dfs_cleaned.items():
                if sheet_name in mapping:
                    # Buat header yang proper untuk dataset gabungan
                    if len(df) > 0:
                        # Ambil baris pertama sebagai header
                        header_row = df.iloc[0].tolist()
                        
                        # Buat dataframe baru dengan header yang benar
                        data_rows = df.iloc[1:].values  # Data mulai dari baris ke-2
                        
                        # Buat dataframe dengan header dan data
                        df_proper = pd.DataFrame(data_rows, columns=header_row)
                        
                        # Tambahkan kolom Source, Tanggal, dan Minggu
                        df_proper["Source"] = mapping[sheet_name]
                        df_proper["Tanggal"] = f"{tanggal}-07-2025"
                        df_proper["Minggu"] = minggu_label
                        
                        daily_df_list.append(df_proper)
                        print(f"  ✅ Sheet {sheet_name} → {mapping[sheet_name]} ({len(df_proper)} baris)")
            
            # Gabungkan semua sheet untuk tanggal ini
            if daily_df_list:
                daily_combined = pd.concat(daily_df_list, ignore_index=True)
                weekly_df_list.append(daily_combined)
                print(f"  📊 Total baris tanggal {tanggal}: {len(daily_combined)}")
            
        except Exception as e:
            print(f"  ❌ Error memproses tanggal {tanggal}: {str(e)}")
    
    # Gabungkan semua tanggal menjadi dataset mingguan
    if weekly_df_list:
        df_mingguan = pd.concat(weekly_df_list, ignore_index=True)
        return df_mingguan
    else:
        print(f"\n❌ Tidak ada data untuk {minggu_label}")
        return pd.DataFrame()

# =============================================================================
# TAHAP 1: CLEANING DAN PENGGABUNGAN DATA 2 MINGGU
# =============================================================================

# Parameter untuk lokal Windows
minggu1_list = [1, 2, 3, 4, 5, 6, 7]      # Minggu 1: Tanggal 1-7 Juli
minggu3_list = [15, 16, 17, 18, 19, 20, 21] # Minggu 3: Tanggal 15-21 Juli

# Base path untuk Windows lokal
base_path = r"C:\Dokumen\dishub\dashboard\JULI" #sesuaikan path sekarang

# Proses minggu 1
print("=== MEMPROSES MINGGU 1 (1-7 Juli) ===")
df_minggu1 = process_daily_data(minggu1_list, "Minggu1", base_path)

# Proses minggu 3  
print("\n=== MEMPROSES MINGGU 3 (15-21 Juli) ===")
df_minggu3 = process_daily_data(minggu3_list, "Minggu3", base_path)

# Gabungkan 2 minggu
if not df_minggu1.empty and not df_minggu3.empty:
    df_2minggu = pd.concat([df_minggu1, df_minggu3], ignore_index=True)
    
    # Simpan data gabungan 2 minggu
    output_2minggu = rf"{base_path}\dataset_2minggu_gabungan.xlsx"
    df_2minggu.to_excel(output_2minggu, index=False)
    
    print(f"\n✅ Data 2 minggu tergabung: {len(df_2minggu)} baris")
    print(f"💾 Disimpan ke: {output_2minggu}")
    
else:
    print("\n❌ Error: Salah satu minggu tidak memiliki data")
    exit()


=== MEMPROSES MINGGU 1 (1-7 Juli) ===

=== CLEANING DATA HARIAN Minggu1 ===

📅 Processing tanggal 1 Juli...
  🔍 Sheet names found: ['1. 1 Juli', '2. 1 Juli', '3. 1 Juli', '4. 1 Juli', '5. 1 Juli', '6. 1 Juli', '7. 1 Juli', '8. 1 Juli', '9. 1 Juli', '10. 1 Juli']
  🔧 Cleaning sheet: 1. 1 Juli
     🗑️  Dihapus dari baris 35 (Vehicle Type) sampai bawah
     ✅ Baris setelah cleaning: 35
  🔧 Cleaning sheet: 2. 1 Juli
     🗑️  Dihapus dari baris 35 (Vehicle Type) sampai bawah
     ✅ Baris setelah cleaning: 35
  🔧 Cleaning sheet: 3. 1 Juli
     🗑️  Dihapus dari baris 35 (Vehicle Type) sampai bawah
     ✅ Baris setelah cleaning: 35
  🔧 Cleaning sheet: 4. 1 Juli
     🗑️  Dihapus dari baris 35 (Vehicle Type) sampai bawah
     ✅ Baris setelah cleaning: 35
  🔧 Cleaning sheet: 5. 1 Juli
     🗑️  Dihapus dari baris 35 (Vehicle Type) sampai bawah
     ✅ Baris setelah cleaning: 35
  🔧 Cleaning sheet: 6. 1 Juli
     🗑️  Dihapus dari baris 35 (Vehicle Type) sampai bawah
     ✅ Baris setelah cleaning: 35

In [8]:
import pandas as pd

df_2minggu = pd.read_excel(output_2minggu)

print("\n=== CLEANING DAN MAPPING JENIS KENDARAAN ===")

# Mapping jenis kendaraan
jenis_map = {
    "Large-Sized Coach": "Bus",
    "Light Truck": "Truck",
    "Minivan": "Roda 4",
    "Pedestrian": "Pejalan kaki",
    "Pick-up Truck": "Pick-up",
    "SUV/MPV": "Roda 4",
    "Sedan": "Roda 4",
    "Tricycle": "Tossa",
    "Truck": "Truck",
    "Two Wheeler": "Sepeda motor"
}
df_2minggu["Jenis Kendaraan"] = df_2minggu["Jenis Kendaraan"].replace(jenis_map)

# Mapping Source ke keterangan arah
keterangan_map = {
    "diponegoro": "Keluar Batu",
    "imam bonjol": "Batu",
    "a yani": "Batu",
    "gajah mada": "Batu",
    "sudirman": "Keluar Batu",
    "brantas": "Masuk Batu",
    "patimura": "Masuk Batu",
    "trunojoyo": "Masuk Batu",
    "arumdalu": "Masuk Batu",
    "mojorejo": "Masuk Batu"
}
df_2minggu["Keterangan"] = df_2minggu["Source"].map(keterangan_map)

# Konversi tanggal dan buat kolom Hari
df_2minggu["Tanggal"] = pd.to_datetime(df_2minggu["Tanggal"], format="%d-%m-%Y")
df_2minggu["Hari"] = df_2minggu["Tanggal"].dt.day_name()

# Ambil daftar kolom jam
jam_cols = [col for col in df_2minggu.columns if col.endswith(":00:00")]

# Hapus baris yang semua kolom jamnya bernilai 0
df_2minggu = df_2minggu[~(df_2minggu[jam_cols] == 0).all(axis=1)].reset_index(drop=True)

df_2minggu.to_excel(output_2minggu, index=False)

df_2minggu



=== CLEANING DAN MAPPING JENIS KENDARAAN ===


Unnamed: 0,No,Jenis Kendaraan,00:00:00,01:00:00,02:00:00,03:00:00,04:00:00,05:00:00,06:00:00,07:00:00,...,19:00:00,20:00:00,21:00:00,22:00:00,23:00:00,Source,Tanggal,Minggu,Keterangan,Hari
0,3,Truck,21,12,17,16,8,6,7,16,...,26,12,12,30,6,diponegoro,2025-07-01,Minggu1,Keluar Batu,Tuesday
1,4,Sepeda motor,199,82,107,129,92,180,342,435,...,654,513,501,427,125,diponegoro,2025-07-01,Minggu1,Keluar Batu,Tuesday
2,7,Roda 4,10,9,12,7,10,7,13,14,...,58,50,53,22,14,diponegoro,2025-07-01,Minggu1,Keluar Batu,Tuesday
3,15,Bus,2,1,0,3,1,4,1,1,...,3,2,1,1,0,diponegoro,2025-07-01,Minggu1,Keluar Batu,Tuesday
4,20,Roda 4,13,11,7,9,7,11,10,12,...,28,29,19,18,11,diponegoro,2025-07-01,Minggu1,Keluar Batu,Tuesday
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1350,20,Roda 4,17,18,11,6,12,18,21,23,...,15,10,8,9,8,mojorejo,2025-07-21,Minggu3,Masuk Batu,Monday
1351,22,Roda 4,59,25,16,25,34,45,105,127,...,64,59,72,51,27,mojorejo,2025-07-21,Minggu3,Masuk Batu,Monday
1352,24,Pick-up,1,2,1,1,0,2,1,3,...,0,2,1,1,0,mojorejo,2025-07-21,Minggu3,Masuk Batu,Monday
1353,26,Tossa,1,0,0,0,2,1,0,0,...,0,0,0,2,0,mojorejo,2025-07-21,Minggu3,Masuk Batu,Monday


In [9]:
df_minggu1 = df_2minggu[
    (df_2minggu["Tanggal"] >= pd.Timestamp("2025-07-01")) & 
    (df_2minggu["Tanggal"] <= pd.Timestamp("2025-07-07"))
]

df_minggu3 = df_2minggu[
    (df_2minggu["Tanggal"] >= pd.Timestamp("2025-07-15")) & 
    (df_2minggu["Tanggal"] <= pd.Timestamp("2025-07-21"))
]

print("=== Minggu 1 ===")
display(df_minggu1)

print("\n=== Minggu 3 ===")
display(df_minggu3)


print(df_minggu1["Tanggal"])
print(df_minggu1["Tanggal"].dtype)
print(df_minggu3["Tanggal"])
print(df_minggu3["Tanggal"].dtype)


=== Minggu 1 ===


Unnamed: 0,No,Jenis Kendaraan,00:00:00,01:00:00,02:00:00,03:00:00,04:00:00,05:00:00,06:00:00,07:00:00,...,19:00:00,20:00:00,21:00:00,22:00:00,23:00:00,Source,Tanggal,Minggu,Keterangan,Hari
0,3,Truck,21,12,17,16,8,6,7,16,...,26,12,12,30,6,diponegoro,2025-07-01,Minggu1,Keluar Batu,Tuesday
1,4,Sepeda motor,199,82,107,129,92,180,342,435,...,654,513,501,427,125,diponegoro,2025-07-01,Minggu1,Keluar Batu,Tuesday
2,7,Roda 4,10,9,12,7,10,7,13,14,...,58,50,53,22,14,diponegoro,2025-07-01,Minggu1,Keluar Batu,Tuesday
3,15,Bus,2,1,0,3,1,4,1,1,...,3,2,1,1,0,diponegoro,2025-07-01,Minggu1,Keluar Batu,Tuesday
4,20,Roda 4,13,11,7,9,7,11,10,12,...,28,29,19,18,11,diponegoro,2025-07-01,Minggu1,Keluar Batu,Tuesday
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
673,20,Roda 4,13,15,6,9,9,14,16,37,...,20,25,15,6,14,mojorejo,2025-07-07,Minggu1,Masuk Batu,Monday
674,22,Roda 4,49,27,27,18,30,73,140,197,...,135,157,121,80,53,mojorejo,2025-07-07,Minggu1,Masuk Batu,Monday
675,24,Pick-up,0,0,0,0,0,2,8,4,...,1,1,3,1,1,mojorejo,2025-07-07,Minggu1,Masuk Batu,Monday
676,26,Tossa,0,0,1,0,1,3,0,2,...,0,1,1,0,1,mojorejo,2025-07-07,Minggu1,Masuk Batu,Monday



=== Minggu 3 ===


Unnamed: 0,No,Jenis Kendaraan,00:00:00,01:00:00,02:00:00,03:00:00,04:00:00,05:00:00,06:00:00,07:00:00,...,19:00:00,20:00:00,21:00:00,22:00:00,23:00:00,Source,Tanggal,Minggu,Keterangan,Hari
678,3,Truck,16,14,23,15,16,17,9,26,...,20,12,12,14,9,diponegoro,2025-07-15,Minggu3,Keluar Batu,Tuesday
679,4,Sepeda motor,150,156,196,137,114,279,791,861,...,365,463,510,388,252,diponegoro,2025-07-15,Minggu3,Keluar Batu,Tuesday
680,7,Roda 4,8,2,10,5,8,8,17,30,...,23,32,27,27,18,diponegoro,2025-07-15,Minggu3,Keluar Batu,Tuesday
681,15,Bus,1,1,1,0,4,0,2,10,...,3,0,3,0,0,diponegoro,2025-07-15,Minggu3,Keluar Batu,Tuesday
682,20,Roda 4,5,12,13,8,8,16,11,16,...,9,11,7,12,5,diponegoro,2025-07-15,Minggu3,Keluar Batu,Tuesday
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1350,20,Roda 4,17,18,11,6,12,18,21,23,...,15,10,8,9,8,mojorejo,2025-07-21,Minggu3,Masuk Batu,Monday
1351,22,Roda 4,59,25,16,25,34,45,105,127,...,64,59,72,51,27,mojorejo,2025-07-21,Minggu3,Masuk Batu,Monday
1352,24,Pick-up,1,2,1,1,0,2,1,3,...,0,2,1,1,0,mojorejo,2025-07-21,Minggu3,Masuk Batu,Monday
1353,26,Tossa,1,0,0,0,2,1,0,0,...,0,0,0,2,0,mojorejo,2025-07-21,Minggu3,Masuk Batu,Monday


0     2025-07-01
1     2025-07-01
2     2025-07-01
3     2025-07-01
4     2025-07-01
         ...    
673   2025-07-07
674   2025-07-07
675   2025-07-07
676   2025-07-07
677   2025-07-07
Name: Tanggal, Length: 678, dtype: datetime64[ns]
datetime64[ns]
678    2025-07-15
679    2025-07-15
680    2025-07-15
681    2025-07-15
682    2025-07-15
          ...    
1350   2025-07-21
1351   2025-07-21
1352   2025-07-21
1353   2025-07-21
1354   2025-07-21
Name: Tanggal, Length: 677, dtype: datetime64[ns]
datetime64[ns]


In [10]:
import pandas as pd

# Gabungkan Minggu 1 & 3
df_both = pd.concat([df_minggu1, df_minggu3], ignore_index=True)

# Ambil daftar jam
jam_cols = [col for col in df_both.columns if col.endswith(":00:00")]

# Hitung rata-rata per Hari + Jenis Kendaraan + Source (Source tetap dipertahankan)
df_avg_hari = (
    df_both.groupby(["Hari", "Source", "Jenis Kendaraan", "Keterangan"], as_index=False)
    [jam_cols].mean()
)

# Tambahkan kolom Total per baris
df_avg_hari["Total"] = df_avg_hari[jam_cols].sum(axis=1)

# Tampilkan
display(df_avg_hari)

# Hitung total per jenis kendaraan per hari (gabungan semua checkpoint)
total_per_jenis_per_hari = (
    df_avg_hari.groupby(["Hari", "Jenis Kendaraan"])["Total"]
    .sum()
    .reset_index()
    .rename(columns={"Total": "TotalJenis"})
)

# Gabungkan total ke df_avg_hari
df_proporsi = df_avg_hari.merge(total_per_jenis_per_hari, on=["Hari", "Jenis Kendaraan"])

# Hitung proporsi per checkpoint/source
df_proporsi["Proporsi"] = df_proporsi["Total"] / df_proporsi["TotalJenis"]

# Simpan hasil proporsi
proporsi_path = rf"{base_path}\proporsi_per_hari_2minggu.xlsx"
df_proporsi.to_excel(proporsi_path, index=False)
print(f"✅ Proporsi per checkpoint disimpan ke: {proporsi_path}")

# Tampilkan
display(df_proporsi)



Unnamed: 0,Hari,Source,Jenis Kendaraan,Keterangan,00:00:00,01:00:00,02:00:00,03:00:00,04:00:00,05:00:00,...,15:00:00,16:00:00,17:00:00,18:00:00,19:00:00,20:00:00,21:00:00,22:00:00,23:00:00,Total
0,Friday,a yani,Bus,Batu,2.500000,0.500000,0.500000,0.500000,0.500000,1.000000,...,6.000000,4.000000,4.000000,6.000000,10.0,7.0,5.500000,3.000000,0.500000,118.000000
1,Friday,a yani,Pejalan kaki,Batu,0.000000,0.500000,0.000000,2.000000,1.000000,5.500000,...,48.500000,45.000000,33.500000,39.000000,33.5,44.5,47.000000,7.000000,0.000000,419.500000
2,Friday,a yani,Pick-up,Batu,0.500000,0.500000,0.000000,0.000000,1.500000,1.500000,...,5.000000,4.000000,9.000000,4.000000,4.5,2.5,2.500000,2.500000,2.000000,97.000000
3,Friday,a yani,Roda 4,Batu,10.333333,9.166667,3.500000,4.000000,6.500000,9.666667,...,106.666667,111.333333,102.333333,99.000000,130.5,112.5,91.166667,56.500000,29.166667,1646.333333
4,Friday,a yani,Sepeda motor,Batu,110.500000,63.500000,36.000000,37.500000,40.000000,141.500000,...,892.500000,1071.000000,797.000000,791.500000,790.5,674.0,583.000000,365.000000,229.000000,13332.500000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
465,Wednesday,trunojoyo,Pick-up,Masuk Batu,1.000000,0.500000,1.000000,0.000000,1.500000,2.500000,...,4.000000,6.000000,7.000000,4.000000,3.5,3.5,2.000000,2.000000,1.500000,95.000000
466,Wednesday,trunojoyo,Roda 4,Masuk Batu,19.500000,12.833333,12.666667,12.333333,13.833333,23.166667,...,116.666667,117.833333,126.666667,110.666667,90.0,65.0,53.000000,38.333333,32.666667,1718.666667
467,Wednesday,trunojoyo,Sepeda motor,Masuk Batu,112.500000,81.500000,84.500000,61.000000,98.000000,347.000000,...,865.500000,1069.500000,732.000000,535.000000,448.5,401.5,335.000000,249.000000,178.000000,13985.000000
468,Wednesday,trunojoyo,Tossa,Masuk Batu,4.000000,6.000000,5.500000,1.500000,1.000000,3.000000,...,10.000000,13.500000,12.500000,10.000000,5.5,5.5,4.500000,4.000000,2.000000,174.500000


✅ Proporsi per checkpoint disimpan ke: C:\Dokumen\dishub\dashboard\JULI\proporsi_per_hari_2minggu.xlsx


Unnamed: 0,Hari,Source,Jenis Kendaraan,Keterangan,00:00:00,01:00:00,02:00:00,03:00:00,04:00:00,05:00:00,...,17:00:00,18:00:00,19:00:00,20:00:00,21:00:00,22:00:00,23:00:00,Total,TotalJenis,Proporsi
0,Friday,a yani,Bus,Batu,2.500000,0.500000,0.500000,0.500000,0.500000,1.000000,...,4.000000,6.000000,10.0,7.0,5.500000,3.000000,0.500000,118.000000,1255.000000,0.094024
1,Friday,a yani,Pejalan kaki,Batu,0.000000,0.500000,0.000000,2.000000,1.000000,5.500000,...,33.500000,39.000000,33.5,44.5,47.000000,7.000000,0.000000,419.500000,1191.500000,0.352077
2,Friday,a yani,Pick-up,Batu,0.500000,0.500000,0.000000,0.000000,1.500000,1.500000,...,9.000000,4.000000,4.5,2.5,2.500000,2.500000,2.000000,97.000000,868.500000,0.111687
3,Friday,a yani,Roda 4,Batu,10.333333,9.166667,3.500000,4.000000,6.500000,9.666667,...,102.333333,99.000000,130.5,112.5,91.166667,56.500000,29.166667,1646.333333,14990.500000,0.109825
4,Friday,a yani,Sepeda motor,Batu,110.500000,63.500000,36.000000,37.500000,40.000000,141.500000,...,797.000000,791.500000,790.5,674.0,583.000000,365.000000,229.000000,13332.500000,113447.500000,0.117521
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
465,Wednesday,trunojoyo,Pick-up,Masuk Batu,1.000000,0.500000,1.000000,0.000000,1.500000,2.500000,...,7.000000,4.000000,3.5,3.5,2.000000,2.000000,1.500000,95.000000,1093.000000,0.086917
466,Wednesday,trunojoyo,Roda 4,Masuk Batu,19.500000,12.833333,12.666667,12.333333,13.833333,23.166667,...,126.666667,110.666667,90.0,65.0,53.000000,38.333333,32.666667,1718.666667,17310.666667,0.099284
467,Wednesday,trunojoyo,Sepeda motor,Masuk Batu,112.500000,81.500000,84.500000,61.000000,98.000000,347.000000,...,732.000000,535.000000,448.5,401.5,335.000000,249.000000,178.000000,13985.000000,142815.500000,0.097924
468,Wednesday,trunojoyo,Tossa,Masuk Batu,4.000000,6.000000,5.500000,1.500000,1.000000,3.000000,...,12.500000,10.000000,5.5,5.5,4.500000,4.000000,2.000000,174.500000,869.000000,0.200806


In [12]:
import pandas as pd
import os

# ====== PATH FOLDER DAN FILE ======
base_path = r"C:\Dokumen\dishub\dashboard\JULI"
file_path = os.path.join(base_path, "Data Volume Lalu Lintas Juli.xlsx")  # ganti nama file sesuai aslinya

# Sheet dari 1 sampai 31 (Juli)
sheet_names = [str(i) for i in range(1, 32)]
list_df = []

# ====== FUNGSI RENAME HEADER DUPLIKAT ======
def dedup_columns(cols):
    counts = {}
    new_cols = []
    for col in cols:
        if col not in counts:
            counts[col] = 1
            new_cols.append(col)
        else:
            counts[col] += 1
            new_cols.append(f"{col}.{counts[col]}")
    return new_cols

# ====== LOOP SEMUA SHEET ======
for sheet in sheet_names:
    print(f"Memproses sheet: {sheet}")
    df_raw = pd.read_excel(file_path, sheet_name=sheet, header=None)

    # Cari baris awal data (setelah header "Jenis Kendaraan")
    start_idx = df_raw[df_raw[0].astype(str).str.contains("Jenis Kendaraan", case=False, na=False)].index[0] + 1

    # Ambil header
    header_row = df_raw.iloc[start_idx - 1].fillna("NA").astype(str)
    if header_row.duplicated().any():
        print(f"➜ Duplikat header di sheet {sheet} ➜ auto rename")
        header_row = dedup_columns(header_row)

    # Ambil data
    df_jenis = df_raw.iloc[start_idx:].copy()
    df_jenis.columns = header_row

    # Hapus baris yang berisi arah/keterangan
    mask_arah = df_jenis.apply(
         lambda row: row.astype(str).str.contains(r"Arah|Keterangan|:", case=False, na=False).any(),
         axis=1
    )
    df_jenis = df_jenis[~mask_arah]

    # Hapus baris kosong dan total
    df_jenis = df_jenis[df_jenis["Jenis Kendaraan"].notna()]
    df_jenis = df_jenis[~df_jenis["Jenis Kendaraan"].astype(str).str.lower().str.contains("total")]

    # Tambah kolom tanggal
    df_jenis["Tanggal"] = f"{sheet}-07-2025"  # karena data Juli

    list_df.append(df_jenis)

print("➜ Semua sheet OK ➜ Menggabungkan ...")
df_bulanan = pd.concat(list_df, ignore_index=True)

# Simpan hasil gabungan awal
rekap_awal_path = os.path.join(base_path, "rekap_total_jenis_kendaraan_bulanan_pertanggal.xlsx")
df_bulanan.to_excel(rekap_awal_path, index=False)
print(f"✅ File jadi: {rekap_awal_path}")

# ====== PROSES PEMBERSIHAN ======
df_jenis = pd.read_excel(rekap_awal_path)

# Ubah nama kolom jam
jam_list = [f"{str(i).zfill(2)}:00:00" for i in range(24)]
columns = list(df_jenis.columns)
columns[1:25] = jam_list
df_jenis.columns = columns

# Mapping jenis kendaraan
jenis_map = {
    "Truk": "Truck",
    "Light Truck": "Truck",
    "Bus": "Bus",
    "Pick up Truck": "Pick-up",
    "Sedan": "Roda 4",
    "Minivan": "Roda 4",
    "SUV/MPV": "Roda 4",
    "Roda 3": "Tossa",
    "Roda 2": "Sepeda motor",
    "Pedestrian": "Pejalan kaki",
    "Unknown": "Unknown"
}
df_jenis['Jenis Kendaraan'] = df_jenis['Jenis Kendaraan'].map(jenis_map)

# Pastikan semua kolom jam & Total numerik
for col in jam_list + ['Total']:
    df_jenis[col] = pd.to_numeric(df_jenis[col], errors='coerce').fillna(0)

# Grouping & penjumlahan
df_jenis = df_jenis.groupby(['Tanggal', 'Jenis Kendaraan'], as_index=False)[jam_list + ['Total']].sum()

# Urutkan data
df_jenis = df_jenis.sort_values(by=['Tanggal', 'Jenis Kendaraan']).reset_index(drop=True)

# Simpan hasil akhir
rekap_bersih_path = os.path.join(base_path, "rekap_bersih_total_jenis_kendaraan_bulanan_pertanggal.xlsx")
df_jenis.to_excel(rekap_bersih_path, index=False)

print(f"✅ Rekap jenis kendaraan selesai disimpan ke: {rekap_bersih_path}")
print("\n📊 Data types:")
print(df_jenis.dtypes)

# Preview hasil
print(df_jenis.head())


Memproses sheet: 1
➜ Duplikat header di sheet 1 ➜ auto rename
Memproses sheet: 2
➜ Duplikat header di sheet 2 ➜ auto rename
Memproses sheet: 3
➜ Duplikat header di sheet 3 ➜ auto rename
Memproses sheet: 4
➜ Duplikat header di sheet 4 ➜ auto rename
Memproses sheet: 5
➜ Duplikat header di sheet 5 ➜ auto rename
Memproses sheet: 6
➜ Duplikat header di sheet 6 ➜ auto rename
Memproses sheet: 7
➜ Duplikat header di sheet 7 ➜ auto rename
Memproses sheet: 8
➜ Duplikat header di sheet 8 ➜ auto rename
Memproses sheet: 9
➜ Duplikat header di sheet 9 ➜ auto rename
Memproses sheet: 10
➜ Duplikat header di sheet 10 ➜ auto rename
Memproses sheet: 11
➜ Duplikat header di sheet 11 ➜ auto rename
Memproses sheet: 12
➜ Duplikat header di sheet 12 ➜ auto rename
Memproses sheet: 13
➜ Duplikat header di sheet 13 ➜ auto rename
Memproses sheet: 14
➜ Duplikat header di sheet 14 ➜ auto rename
Memproses sheet: 15
➜ Duplikat header di sheet 15 ➜ auto rename
Memproses sheet: 16
➜ Duplikat header di sheet 16 ➜ auto r

In [8]:
import pandas as pd

# === 1. Load data bulanan total ===
bulanan_path = rf"{base_path}\rekap_bersih_total_jenis_kendaraan_bulanan_pertanggal.xlsx"
df_bulanan = pd.read_excel(bulanan_path)

# Konversi tanggal & buat kolom Hari
df_bulanan["Tanggal"] = pd.to_datetime(df_bulanan["Tanggal"], dayfirst=True)
df_bulanan["Hari"] = df_bulanan["Tanggal"].dt.day_name()

# Ambil kolom jam
jam_cols = [col for col in df_bulanan.columns if ":" in col]

# Pastikan kolom jam numeric
for col in jam_cols:
    df_bulanan[col] = pd.to_numeric(df_bulanan[col], errors='coerce').fillna(0)

# === 2. Ubah ke long format ===
df_long = df_bulanan.melt(
    id_vars=["Tanggal", "Jenis Kendaraan", "Hari"],
    value_vars=jam_cols,
    var_name="Jam",
    value_name="Jumlah"
)

# === 3. Load proporsi 2 minggu per checkpoint ===
proporsi_path = rf"{base_path}\proporsi_per_hari_2minggu.xlsx"
df_proporsi = pd.read_excel(proporsi_path)

# === 4. Merge bulanan dengan proporsi berdasarkan Hari, Jenis Kendaraan ===
df_merge = df_long.merge(
    df_proporsi[["Hari", "Source", "Jenis Kendaraan", "Proporsi"]],
    on=["Hari", "Jenis Kendaraan"],
    how="left"
)

# === 5. Hitung estimasi jumlah per checkpoint per jam ===
df_merge["Jumlah_Estimasi"] = df_merge["Jumlah"] * df_merge["Proporsi"]
df_merge["Jumlah_Estimasi"] = df_merge["Jumlah_Estimasi"].fillna(0)

# === 6. Pivot kembali ke wide format ===
df_final = df_merge.pivot_table(
    index=["Tanggal", "Jenis Kendaraan", "Source"],
    columns="Jam",
    values="Jumlah_Estimasi",
    aggfunc="sum"
).reset_index()

# Bulatkan nilai dan isi NaN
jam_columns = [col for col in df_final.columns if ":" in col]
df_final[jam_columns] = df_final[jam_columns].fillna(0).round().astype(int)

# Hitung total per baris
df_final["Total"] = df_final[jam_columns].sum(axis=1)

# Hapus jenis kendaraan 'unknown' jika ada
df_final = df_final[df_final["Jenis Kendaraan"].str.lower() != "unknown"]

# === 7. Simpan hasil akhir ===
output_final_path = rf"{base_path}\rekap_final_estimasi_bulan_juli.xlsx"
df_final.to_excel(output_final_path, index=False)
print(f"✅ Rekap estimasi bulanan disimpan ke: {output_final_path}")

# Tampilkan sebagian
display(df_final.head())


✅ Rekap estimasi bulanan disimpan ke: C:\Dokumen\dishub\dashboard\JULI\rekap_final_estimasi_bulan_juli.xlsx


Jam,Tanggal,Jenis Kendaraan,Source,00:00:00,01:00:00,02:00:00,03:00:00,04:00:00,05:00:00,06:00:00,...,15:00:00,16:00:00,17:00:00,18:00:00,19:00:00,20:00:00,21:00:00,22:00:00,23:00:00,Total
0,2025-07-01,Bus,a yani,2,2,2,2,3,4,4,...,8,12,9,9,9,6,5,5,3,144
1,2025-07-01,Bus,arumdalu,4,4,4,4,5,8,8,...,15,21,17,16,17,12,9,9,6,269
2,2025-07-01,Bus,brantas,1,1,1,2,2,3,3,...,5,8,6,6,6,4,3,3,2,98
3,2025-07-01,Bus,diponegoro,1,1,1,2,2,3,3,...,5,8,6,6,6,4,3,3,2,96
4,2025-07-01,Bus,gajah mada,3,3,3,3,4,6,6,...,11,16,13,13,13,9,7,7,4,206


In [4]:
# Lihat jenis kendaraan unik per Source
jenis_per_source = df_2minggu.groupby("Source")["Jenis Kendaraan"].unique()

# Biar rapi jadi DataFrame
jenis_per_source_df = jenis_per_source.reset_index()
jenis_per_source_df.columns = ["Source", "Jenis Kendaraan Unik"]

print(jenis_per_source_df)


        Source                               Jenis Kendaraan Unik
0       a yani  [Truck, Sepeda motor, Roda 4, Bus, Pick-up, To...
1     arumdalu  [Truck, Sepeda motor, Roda 4, Unknown, Bus, Pi...
2      brantas  [Truck, Sepeda motor, Roda 4, Bus, Pick-up, To...
3   diponegoro  [Truck, Sepeda motor, Roda 4, Bus, Pick-up, To...
4   gajah mada  [Truck, Sepeda motor, Roda 4, Bus, Pick-up, To...
5  imam bonjol  [Truck, Sepeda motor, Roda 4, Bus, Pick-up, To...
6     mojorejo  [Truck, Sepeda motor, Roda 4, Bus, Pick-up, To...
7     patimura  [Truck, Sepeda motor, Roda 4, Bus, Pick-up, To...
8     sudirman                      [Truck, Roda 4, Bus, Pick-up]
9    trunojoyo  [Truck, Sepeda motor, Roda 4, Bus, Pick-up, To...
