In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import matplotlib.dates as mdates
import matplotlib.ticker as ticker
from geopy.distance import great_circle

In [2]:
data=pd.read_excel('Kab_tangerang_result.xlsx')
data

Unnamed: 0,Salesman,Day,Pattern,ID Outlet,Channel,Latitude,Longitude,Kota,Distance,Group
0,43,Wed,Odd,601642135,Retail,-6.21070,106.609,Kabupaten Tangerang,0.143068,0
1,43,Wed,Odd,601641710,Retail,-6.21084,106.609,Kabupaten Tangerang,0.157797,0
2,43,Wed,Odd,601642288,Retail,-6.21083,106.608,Kabupaten Tangerang,0.161349,0
3,43,Wed,Odd,601642139,Retail,-6.21111,106.608,Kabupaten Tangerang,0.190476,0
4,43,Wed,Odd,601641586,Retail,-6.21051,106.610,Kabupaten Tangerang,0.195455,0
...,...,...,...,...,...,...,...,...,...,...
10243,80,Thurs,Even,601692055,Retail,-6.33538,106.412,Kabupaten Tangerang,25.844885,21
10244,80,Thurs,Even,601292626,Retail,-6.33548,106.412,Kabupaten Tangerang,25.850908,21
10245,44,Mon,Odd,602057323,Retail,-6.21280,106.374,Kabupaten Tangerang,25.931428,21
10246,80,Thurs,Even,601292625,Retail,-6.33567,106.411,Kabupaten Tangerang,25.955279,21


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10248 entries, 0 to 10247
Data columns (total 10 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Salesman   10248 non-null  int64  
 1   Day        10248 non-null  object 
 2   Pattern    10248 non-null  object 
 3   ID Outlet  10248 non-null  int64  
 4   Channel    10248 non-null  object 
 5   Latitude   10248 non-null  float64
 6   Longitude  10248 non-null  float64
 7   Kota       10248 non-null  object 
 8   Distance   10248 non-null  float64
 9   Group      10248 non-null  int64  
dtypes: float64(3), int64(3), object(4)
memory usage: 800.8+ KB


### `CLUSTERING Hari`

In [4]:
# Fungsi untuk menghitung jarak
def calculate_distance(lat1, lon1, lat2, lon2):
    return great_circle((lat1, lon1), (lat2, lon2)).km

# Fungsi untuk menyesuaikan kelompok
def adjust_groups(data, group_counts, max_group_size, num_groups):
    adjusted_groups = data.copy()
    duplicate_ids = data[data.duplicated(['ID Outlet'], keep=False)]['ID Outlet'].unique()
    
    for outlet_id in duplicate_ids:
        subset = data[data['ID Outlet'] == outlet_id]
        
        prev_group = None
        for index, row in subset.iterrows():
            current_group = row['Hari']
            if prev_group is not None and prev_group != current_group:
                if group_counts.get(prev_group, 0) > max_group_size:
                    adjusted_groups.loc[index, 'Hari'] = (current_group + 1) % num_groups
                else:
                    adjusted_groups.loc[index, 'Hari'] = prev_group
            prev_group = adjusted_groups.loc[index, 'Hari']
    
    return adjusted_groups

# Inisialisasi DataFrame yang akan menampung hasil akhir
all_data_adjusted = pd.DataFrame()

# Definisikan titik pusat
center_lat = -6.20949
center_lon = 106.60856

# Loop untuk setiap kelompok
for group_number in range(22):
    # Filter data untuk kelompok saat ini
    data_group = data[data['Group'] == group_number].copy()  # Pastikan menggunakan .copy()
    
    # Hitung jarak dari titik pusat
    data_group['Distance'] = data_group.apply(lambda row: calculate_distance(center_lat, center_lon, row['Latitude'], row['Longitude']), axis=1)
    
    # Urutkan data berdasarkan jarak
    data_group_sorted = data_group.sort_values(by='Distance').reset_index(drop=True)
    
    # Tentukan jumlah subkelompok dan ukuran setiap subkelompok
    num_subgroups = 6
    total_rows = len(data_group_sorted)
    group_size = total_rows // num_subgroups
    remainder = total_rows % num_subgroups
    
    # Inisialisasi kolom Hari dengan nilai default
    data_group_sorted['Hari'] = np.zeros(total_rows, dtype=int)
    
    # Tentukan batas untuk setiap subkelompok
    boundaries = [i * group_size + min(i, remainder) for i in range(num_subgroups + 1)]
    
    # Alokasikan setiap baris ke subkelompok yang sesuai
    sub_index = 0
    for i in range(total_rows):
        if i >= boundaries[sub_index + 1]:
            sub_index += 1
        data_group_sorted.loc[i, 'Hari'] = sub_index
    
    # Penyesuaian untuk ID Outlet yang sama
    group_counts = data_group_sorted['Hari'].value_counts().sort_index()
    max_group_size = group_size + 1 if remainder > 0 else group_size

    # Terapkan penyesuaian
    data_group_adjusted = adjust_groups(data_group_sorted, group_counts, max_group_size, num_subgroups)
    
    # Tambahkan hasil akhir ke DataFrame utama
    all_data_adjusted = pd.concat([all_data_adjusted, data_group_adjusted], ignore_index=True)
all_data_adjusted = all_data_adjusted.rename(columns={'Group': 'SalesmanNew'})

all_data_adjusted

Unnamed: 0,Salesman,Day,Pattern,ID Outlet,Channel,Latitude,Longitude,Kota,Distance,SalesmanNew,Hari
0,43,Wed,Odd,601642135,Retail,-6.21070,106.609,Kabupaten Tangerang,0.143068,0,0
1,43,Wed,Odd,601641710,Retail,-6.21084,106.609,Kabupaten Tangerang,0.157797,0,0
2,43,Wed,Odd,601642288,Retail,-6.21083,106.608,Kabupaten Tangerang,0.161349,0,0
3,43,Wed,Odd,601642139,Retail,-6.21111,106.608,Kabupaten Tangerang,0.190476,0,0
4,43,Wed,Odd,601641586,Retail,-6.21051,106.610,Kabupaten Tangerang,0.195455,0,0
...,...,...,...,...,...,...,...,...,...,...,...
10243,80,Thurs,Even,601692055,Retail,-6.33538,106.412,Kabupaten Tangerang,25.844885,21,5
10244,80,Thurs,Even,601292626,Retail,-6.33548,106.412,Kabupaten Tangerang,25.850908,21,5
10245,44,Mon,Odd,602057323,Retail,-6.21280,106.374,Kabupaten Tangerang,25.931428,21,5
10246,80,Thurs,Even,601292625,Retail,-6.33567,106.411,Kabupaten Tangerang,25.955279,21,5


In [5]:
all_data_adjusted[all_data_adjusted['Channel']=='Grosir']

Unnamed: 0,Salesman,Day,Pattern,ID Outlet,Channel,Latitude,Longitude,Kota,Distance,SalesmanNew,Hari
3779,49,Wed,Odd,602048287,Grosir,-6.22446,106.678,Kabupaten Tangerang,7.854392,8,0
3780,49,Wed,Even,602048287,Grosir,-6.22446,106.678,Kabupaten Tangerang,7.854392,8,0


In [6]:
all_data_adjusted.groupby('SalesmanNew')['Hari'].value_counts()

SalesmanNew  Hari
0            0       78
             1       78
             2       78
             3       78
             4       77
                     ..
21           1       78
             2       78
             3       77
             4       77
             5       77
Name: count, Length: 132, dtype: int64

### `CLUSTERING PATTERN`

In [7]:
# Fungsi untuk menghitung jarak
def calculate_distance(lat1, lon1, lat2, lon2):
    return great_circle((lat1, lon1), (lat2, lon2)).km

# Fungsi untuk membagi data ke dalam grup dengan pengecekan ID Outlet
def assign_groups(data_sorted):
    used_ids = set()  # Set untuk menyimpan ID Outlet yang sudah digunakan
    for index, row in data_sorted.iterrows():
        outlet_id = row['ID Outlet']
        if outlet_id not in used_ids:
            # Jika ID Outlet belum digunakan, alokasikan ke grup A atau B
            if len(used_ids) % 2 == 0:
                data_sorted.loc[index, 'Group'] = 'A'
            else:
                data_sorted.loc[index, 'Group'] = 'B'
            used_ids.add(outlet_id)
        else:
            # Jika ID Outlet sudah digunakan, alokasikan ke grup yang lain
            current_group = data_sorted[data_sorted['ID Outlet'] == outlet_id]['Group'].iloc[0]
            if current_group == 'A':
                data_sorted.loc[index, 'Group'] = 'B'
            else:
                data_sorted.loc[index, 'Group'] = 'A'
    return data_sorted

# Loop untuk setiap kombinasi SalesmanNew dan Hari
all_results = []

for salesman_new in range(22):  # jumlah sales
    for hari in range(6):  # jumlah hari kerja
        # Filter data berdasarkan kombinasi SalesmanNew dan Hari
        data_pattern = all_data_adjusted[(all_data_adjusted['SalesmanNew'] == salesman_new) & (all_data_adjusted['Hari'] == hari)].copy()
        
        if not data_pattern.empty:
            # Hitung jarak dari titik pusat
            data_pattern.loc[:, 'Distance'] = data_pattern.apply(lambda row: calculate_distance(center_lat, center_lon, row['Latitude'], row['Longitude']), axis=1)

            # Urutkan data berdasarkan jarak
            data_pattern_sorted = data_pattern.sort_values(by='Distance').reset_index(drop=True)

            # Inisialisasi kolom Grup dengan tipe data yang sesuai
            data_pattern_sorted['Group'] = pd.Series(dtype='str')

            # Terapkan fungsi untuk alokasi grup
            result = assign_groups(data_pattern_sorted)
            
            # Tambahkan hasil ke daftar
            all_results.append(result)
# Gabungkan semua hasil
final_result = pd.concat(all_results, ignore_index=True)
# Tampilkan hasil akhir
final_result 

Unnamed: 0,Salesman,Day,Pattern,ID Outlet,Channel,Latitude,Longitude,Kota,Distance,SalesmanNew,Hari,Group
0,43,Wed,Odd,601642135,Retail,-6.21070,106.609,Kabupaten Tangerang,0.143068,0,0,A
1,43,Wed,Odd,601641710,Retail,-6.21084,106.609,Kabupaten Tangerang,0.157797,0,0,B
2,43,Wed,Odd,601642288,Retail,-6.21083,106.608,Kabupaten Tangerang,0.161349,0,0,A
3,43,Wed,Odd,601642139,Retail,-6.21111,106.608,Kabupaten Tangerang,0.190476,0,0,B
4,43,Wed,Odd,601641586,Retail,-6.21051,106.610,Kabupaten Tangerang,0.195455,0,0,A
...,...,...,...,...,...,...,...,...,...,...,...,...
10243,80,Thurs,Even,601692055,Retail,-6.33538,106.412,Kabupaten Tangerang,25.844885,21,5,A
10244,80,Thurs,Even,601292626,Retail,-6.33548,106.412,Kabupaten Tangerang,25.850908,21,5,B
10245,44,Mon,Odd,602057323,Retail,-6.21280,106.374,Kabupaten Tangerang,25.931428,21,5,A
10246,80,Thurs,Even,601292625,Retail,-6.33567,106.411,Kabupaten Tangerang,25.955279,21,5,B


In [8]:
final_result[final_result['Channel']=='Grosir']

Unnamed: 0,Salesman,Day,Pattern,ID Outlet,Channel,Latitude,Longitude,Kota,Distance,SalesmanNew,Hari,Group
3779,49,Wed,Odd,602048287,Grosir,-6.22446,106.678,Kabupaten Tangerang,7.854392,8,0,B
3780,49,Wed,Even,602048287,Grosir,-6.22446,106.678,Kabupaten Tangerang,7.854392,8,0,A


In [9]:
# Ganti nilai 'Group'
final_result['Group'] = final_result['Group'].replace({'A': 'Odd', 'B': 'Even'})

# Ganti nilai 'Hari'
day_mapping = {
    0: 'Mon',
    1: 'Tues',
    2: 'Wed',
    3: 'Thurs',
    4: 'Fri',
    5: 'Sat'
}
final_result['Hari'] = final_result['Hari'].map(day_mapping)

# Tampilkan DataFrame setelah perubahan
print("\nSetelah perubahan:")
final_result 


Setelah perubahan:


Unnamed: 0,Salesman,Day,Pattern,ID Outlet,Channel,Latitude,Longitude,Kota,Distance,SalesmanNew,Hari,Group
0,43,Wed,Odd,601642135,Retail,-6.21070,106.609,Kabupaten Tangerang,0.143068,0,Mon,Odd
1,43,Wed,Odd,601641710,Retail,-6.21084,106.609,Kabupaten Tangerang,0.157797,0,Mon,Even
2,43,Wed,Odd,601642288,Retail,-6.21083,106.608,Kabupaten Tangerang,0.161349,0,Mon,Odd
3,43,Wed,Odd,601642139,Retail,-6.21111,106.608,Kabupaten Tangerang,0.190476,0,Mon,Even
4,43,Wed,Odd,601641586,Retail,-6.21051,106.610,Kabupaten Tangerang,0.195455,0,Mon,Odd
...,...,...,...,...,...,...,...,...,...,...,...,...
10243,80,Thurs,Even,601692055,Retail,-6.33538,106.412,Kabupaten Tangerang,25.844885,21,Sat,Odd
10244,80,Thurs,Even,601292626,Retail,-6.33548,106.412,Kabupaten Tangerang,25.850908,21,Sat,Even
10245,44,Mon,Odd,602057323,Retail,-6.21280,106.374,Kabupaten Tangerang,25.931428,21,Sat,Odd
10246,80,Thurs,Even,601292625,Retail,-6.33567,106.411,Kabupaten Tangerang,25.955279,21,Sat,Even


In [10]:
final_result.to_excel('kab_tang_final_.xlsx', index=False)

In [13]:
final_result.columns

Index(['Salesman', 'Day', 'Pattern', 'ID Outlet', 'Channel', 'Latitude',
       'Longitude', 'Kota', 'Distance', 'SalesmanNew', 'Hari', 'Group'],
      dtype='object')

In [14]:
final_result.groupby(['SalesmanNew','Hari'])['Group'].value_counts()

SalesmanNew  Hari   Group
0            Fri    Odd      39
                    Even     38
             Mon    Even     39
                    Odd      39
             Sat    Odd      39
                             ..
21           Thurs  Even     38
             Tues   Even     39
                    Odd      39
             Wed    Even     39
                    Odd      39
Name: count, Length: 264, dtype: int64