In [3]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import matplotlib.dates as mdates
import matplotlib.ticker as ticker
from geopy.distance import great_circle

In [4]:
data=pd.read_excel('Kota_tangsel_result.xlsx')
data

Unnamed: 0,Salesman,Day,Pattern,ID Outlet,Channel,Latitude,Longitude,Kota,Distance,Group
0,63,Fri,Odd,601637725,Retail,-6.26191,106.674,Tangerang Selatan,9.289766,0
1,64,Tues,Odd,601647632,Retail,-6.26382,106.673,Tangerang Selatan,9.339894,0
2,60,Fri,Even,601288171,Retail,-6.25305,106.681,Tangerang Selatan,9.358375,0
3,64,Tues,Odd,601637669,Retail,-6.26408,106.673,Tangerang Selatan,9.358619,0
4,63,Sat,Odd,602491348,Retail,-6.26297,106.674,Tangerang Selatan,9.364165,0
...,...,...,...,...,...,...,...,...,...,...
9205,84,Sat,Odd,601580025,Retail,-6.36126,106.766,Tangerang Selatan,24.240623,19
9206,73,Fri,Odd,602639786,Retail,-6.30692,106.820,Tangerang Selatan,25.759899,19
9207,18,Thurs,Even,602055437,Retail,-6.15335,106.844,Tangerang Selatan,26.765696,19
9208,18,Thurs,Even,602055438,Retail,-6.15335,106.844,Tangerang Selatan,26.765696,19


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9210 entries, 0 to 9209
Data columns (total 10 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Salesman   9210 non-null   int64  
 1   Day        9210 non-null   object 
 2   Pattern    9210 non-null   object 
 3   ID Outlet  9210 non-null   int64  
 4   Channel    9210 non-null   object 
 5   Latitude   9210 non-null   float64
 6   Longitude  9210 non-null   float64
 7   Kota       9210 non-null   object 
 8   Distance   9210 non-null   float64
 9   Group      9210 non-null   int64  
dtypes: float64(3), int64(3), object(4)
memory usage: 719.7+ KB


### `CLUSTERING Hari`

In [8]:
# Fungsi untuk menghitung jarak
def calculate_distance(lat1, lon1, lat2, lon2):
    return great_circle((lat1, lon1), (lat2, lon2)).km

# Fungsi untuk menyesuaikan kelompok
def adjust_groups(data, group_counts, max_group_size, num_groups):
    adjusted_groups = data.copy()
    duplicate_ids = data[data.duplicated(['ID Outlet'], keep=False)]['ID Outlet'].unique()
    
    for outlet_id in duplicate_ids:
        subset = data[data['ID Outlet'] == outlet_id]
        
        prev_group = None
        for index, row in subset.iterrows():
            current_group = row['Hari']
            if prev_group is not None and prev_group != current_group:
                if group_counts.get(prev_group, 0) > max_group_size:
                    adjusted_groups.loc[index, 'Hari'] = (current_group + 1) % num_groups
                else:
                    adjusted_groups.loc[index, 'Hari'] = prev_group
            prev_group = adjusted_groups.loc[index, 'Hari']
    
    return adjusted_groups

# Inisialisasi DataFrame yang akan menampung hasil akhir
all_data_adjusted = pd.DataFrame()

# Definisikan titik pusat
center_lat = -6.20949
center_lon = 106.60856

# Loop untuk setiap kelompok
for group_number in range(20):
    # Filter data untuk kelompok saat ini
    data_group = data[data['Group'] == group_number].copy()  # Pastikan menggunakan .copy()
    
    # Hitung jarak dari titik pusat
    data_group['Distance'] = data_group.apply(lambda row: calculate_distance(center_lat, center_lon, row['Latitude'], row['Longitude']), axis=1)
    
    # Urutkan data berdasarkan jarak
    data_group_sorted = data_group.sort_values(by='Distance').reset_index(drop=True)
    
    # Tentukan jumlah subkelompok dan ukuran setiap subkelompok
    num_subgroups = 6
    total_rows = len(data_group_sorted)
    group_size = total_rows // num_subgroups
    remainder = total_rows % num_subgroups
    
    # Inisialisasi kolom Hari dengan nilai default
    data_group_sorted['Hari'] = np.zeros(total_rows, dtype=int)
    
    # Tentukan batas untuk setiap subkelompok
    boundaries = [i * group_size + min(i, remainder) for i in range(num_subgroups + 1)]
    
    # Alokasikan setiap baris ke subkelompok yang sesuai
    sub_index = 0
    for i in range(total_rows):
        if i >= boundaries[sub_index + 1]:
            sub_index += 1
        data_group_sorted.loc[i, 'Hari'] = sub_index
    
    # Penyesuaian untuk ID Outlet yang sama
    group_counts = data_group_sorted['Hari'].value_counts().sort_index()
    max_group_size = group_size + 1 if remainder > 0 else group_size

    # Terapkan penyesuaian
    data_group_adjusted = adjust_groups(data_group_sorted, group_counts, max_group_size, num_subgroups)
    
    # Tambahkan hasil akhir ke DataFrame utama
    all_data_adjusted = pd.concat([all_data_adjusted, data_group_adjusted], ignore_index=True)
all_data_adjusted = all_data_adjusted.rename(columns={'Group': 'SalesmanNew'})

all_data_adjusted

Unnamed: 0,Salesman,Day,Pattern,ID Outlet,Channel,Latitude,Longitude,Kota,Distance,SalesmanNew,Hari
0,63,Fri,Odd,601637725,Retail,-6.26191,106.674,Tangerang Selatan,9.289766,0,0
1,64,Tues,Odd,601647632,Retail,-6.26382,106.673,Tangerang Selatan,9.339894,0,0
2,60,Fri,Even,601288171,Retail,-6.25305,106.681,Tangerang Selatan,9.358375,0,0
3,64,Tues,Odd,601637669,Retail,-6.26408,106.673,Tangerang Selatan,9.358619,0,0
4,63,Sat,Odd,602491348,Retail,-6.26297,106.674,Tangerang Selatan,9.364165,0,0
...,...,...,...,...,...,...,...,...,...,...,...
9205,84,Sat,Odd,601580025,Retail,-6.36126,106.766,Tangerang Selatan,24.240623,19,5
9206,73,Fri,Odd,602639786,Retail,-6.30692,106.820,Tangerang Selatan,25.759899,19,5
9207,18,Thurs,Even,602055438,Retail,-6.15335,106.844,Tangerang Selatan,26.765696,19,5
9208,18,Thurs,Even,602055437,Retail,-6.15335,106.844,Tangerang Selatan,26.765696,19,5


In [9]:
all_data_adjusted[all_data_adjusted['Channel']=='Grosir']

Unnamed: 0,Salesman,Day,Pattern,ID Outlet,Channel,Latitude,Longitude,Kota,Distance,SalesmanNew,Hari
6059,68,Wed,Odd,600007843,Grosir,-6.28348,106.747,Tangerang Selatan,17.373944,13,0
6060,68,Wed,Even,600007843,Grosir,-6.28348,106.747,Tangerang Selatan,17.373944,13,0
6062,68,Wed,Even,600544726,Grosir,-6.28349,106.747,Tangerang Selatan,17.374471,13,0
6063,68,Wed,Odd,600544726,Grosir,-6.28349,106.747,Tangerang Selatan,17.374471,13,0
6105,68,Tues,Odd,600007842,Grosir,-6.28296,106.748,Tangerang Selatan,17.444232,13,1
...,...,...,...,...,...,...,...,...,...,...,...
8319,74,Wed,Even,601287809,Grosir,-6.31070,106.768,Tangerang Selatan,20.910090,18,0
8331,74,Wed,Odd,601287818,Grosir,-6.31114,106.768,Tangerang Selatan,20.936457,18,0
8332,74,Wed,Even,601287818,Grosir,-6.31114,106.768,Tangerang Selatan,20.936457,18,0
8379,74,Wed,Odd,602752042,Grosir,-6.31089,106.769,Tangerang Selatan,21.014657,18,1


In [11]:
# Fungsi untuk menghitung jarak
def calculate_distance(lat1, lon1, lat2, lon2):
    return great_circle((lat1, lon1), (lat2, lon2)).km

# Fungsi untuk membagi data ke dalam grup dengan pengecekan ID Outlet
def assign_groups(data_sorted):
    used_ids = set()  # Set untuk menyimpan ID Outlet yang sudah digunakan
    for index, row in data_sorted.iterrows():
        outlet_id = row['ID Outlet']
        if outlet_id not in used_ids:
            # Jika ID Outlet belum digunakan, alokasikan ke grup A atau B
            if len(used_ids) % 2 == 0:
                data_sorted.loc[index, 'Group'] = 'A'
            else:
                data_sorted.loc[index, 'Group'] = 'B'
            used_ids.add(outlet_id)
        else:
            # Jika ID Outlet sudah digunakan, alokasikan ke grup yang lain
            current_group = data_sorted[data_sorted['ID Outlet'] == outlet_id]['Group'].iloc[0]
            if current_group == 'A':
                data_sorted.loc[index, 'Group'] = 'B'
            else:
                data_sorted.loc[index, 'Group'] = 'A'
    return data_sorted

# Loop untuk setiap kombinasi SalesmanNew dan Hari
all_results = []

for salesman_new in range(20):  # jumlah sales
    for hari in range(6):  # jumlah hari kerja
        # Filter data berdasarkan kombinasi SalesmanNew dan Hari
        data_pattern = all_data_adjusted[(all_data_adjusted['SalesmanNew'] == salesman_new) & (all_data_adjusted['Hari'] == hari)].copy()
        
        if not data_pattern.empty:
            # Hitung jarak dari titik pusat
            data_pattern.loc[:, 'Distance'] = data_pattern.apply(lambda row: calculate_distance(center_lat, center_lon, row['Latitude'], row['Longitude']), axis=1)

            # Urutkan data berdasarkan jarak
            data_pattern_sorted = data_pattern.sort_values(by='Distance').reset_index(drop=True)

            # Inisialisasi kolom Grup dengan tipe data yang sesuai
            data_pattern_sorted['Group'] = pd.Series(dtype='str')

            # Terapkan fungsi untuk alokasi grup
            result = assign_groups(data_pattern_sorted)
            
            # Tambahkan hasil ke daftar
            all_results.append(result)
# Gabungkan semua hasil
final_result = pd.concat(all_results, ignore_index=True)
# Tampilkan hasil akhir
final_result 

Unnamed: 0,Salesman,Day,Pattern,ID Outlet,Channel,Latitude,Longitude,Kota,Distance,SalesmanNew,Hari,Group
0,63,Fri,Odd,601637725,Retail,-6.26191,106.674,Tangerang Selatan,9.289766,0,0,A
1,64,Tues,Odd,601647632,Retail,-6.26382,106.673,Tangerang Selatan,9.339894,0,0,B
2,60,Fri,Even,601288171,Retail,-6.25305,106.681,Tangerang Selatan,9.358375,0,0,A
3,64,Tues,Odd,601637669,Retail,-6.26408,106.673,Tangerang Selatan,9.358619,0,0,B
4,63,Sat,Odd,602491348,Retail,-6.26297,106.674,Tangerang Selatan,9.364165,0,0,A
...,...,...,...,...,...,...,...,...,...,...,...,...
9205,84,Sat,Odd,601580025,Retail,-6.36126,106.766,Tangerang Selatan,24.240623,19,5,B
9206,73,Fri,Odd,602639786,Retail,-6.30692,106.820,Tangerang Selatan,25.759899,19,5,A
9207,18,Thurs,Even,602055438,Retail,-6.15335,106.844,Tangerang Selatan,26.765696,19,5,B
9208,18,Thurs,Even,602055437,Retail,-6.15335,106.844,Tangerang Selatan,26.765696,19,5,A


In [12]:
final_result[final_result['Channel']=='Grosir']

Unnamed: 0,Salesman,Day,Pattern,ID Outlet,Channel,Latitude,Longitude,Kota,Distance,SalesmanNew,Hari,Group
6059,68,Wed,Odd,600007843,Grosir,-6.28348,106.747,Tangerang Selatan,17.373944,13,0,B
6060,68,Wed,Even,600007843,Grosir,-6.28348,106.747,Tangerang Selatan,17.373944,13,0,A
6062,68,Wed,Even,600544726,Grosir,-6.28349,106.747,Tangerang Selatan,17.374471,13,0,B
6063,68,Wed,Odd,600544726,Grosir,-6.28349,106.747,Tangerang Selatan,17.374471,13,0,A
6105,68,Tues,Odd,600007842,Grosir,-6.28296,106.748,Tangerang Selatan,17.444232,13,1,A
...,...,...,...,...,...,...,...,...,...,...,...,...
8319,74,Wed,Even,601287809,Grosir,-6.31070,106.768,Tangerang Selatan,20.910090,18,0,A
8331,74,Wed,Odd,601287818,Grosir,-6.31114,106.768,Tangerang Selatan,20.936457,18,0,B
8332,74,Wed,Even,601287818,Grosir,-6.31114,106.768,Tangerang Selatan,20.936457,18,0,A
8379,74,Wed,Odd,602752042,Grosir,-6.31089,106.769,Tangerang Selatan,21.014657,18,1,A


In [13]:
# Ganti nilai 'Group'
final_result['Group'] = final_result['Group'].replace({'A': 'Odd', 'B': 'Even'})

# Ganti nilai 'Hari'
day_mapping = {
    0: 'Mon',
    1: 'Tues',
    2: 'Wed',
    3: 'Thurs',
    4: 'Fri',
    5: 'Sat'
}
final_result['Hari'] = final_result['Hari'].map(day_mapping)

# Tampilkan DataFrame setelah perubahan
print("\nSetelah perubahan:")
final_result 


Setelah perubahan:


Unnamed: 0,Salesman,Day,Pattern,ID Outlet,Channel,Latitude,Longitude,Kota,Distance,SalesmanNew,Hari,Group
0,63,Fri,Odd,601637725,Retail,-6.26191,106.674,Tangerang Selatan,9.289766,0,Mon,Odd
1,64,Tues,Odd,601647632,Retail,-6.26382,106.673,Tangerang Selatan,9.339894,0,Mon,Even
2,60,Fri,Even,601288171,Retail,-6.25305,106.681,Tangerang Selatan,9.358375,0,Mon,Odd
3,64,Tues,Odd,601637669,Retail,-6.26408,106.673,Tangerang Selatan,9.358619,0,Mon,Even
4,63,Sat,Odd,602491348,Retail,-6.26297,106.674,Tangerang Selatan,9.364165,0,Mon,Odd
...,...,...,...,...,...,...,...,...,...,...,...,...
9205,84,Sat,Odd,601580025,Retail,-6.36126,106.766,Tangerang Selatan,24.240623,19,Sat,Even
9206,73,Fri,Odd,602639786,Retail,-6.30692,106.820,Tangerang Selatan,25.759899,19,Sat,Odd
9207,18,Thurs,Even,602055438,Retail,-6.15335,106.844,Tangerang Selatan,26.765696,19,Sat,Even
9208,18,Thurs,Even,602055437,Retail,-6.15335,106.844,Tangerang Selatan,26.765696,19,Sat,Odd


In [15]:
final_result.to_excel('tangsel_final_.xlsx', index=False)