In [2]:
import pandas as pd
import numpy as np

df = pd.read_csv('./SLAP_LOC.csv')
df.head()

Unnamed: 0,ORD_NO,SKU_CD,NUM_PCS,LOC,CART_NO,SEQ
0,ORD_0002,SKU_0156,1,WP_0009,1,1
1,ORD_0002,SKU_0123,1,WP_0009,1,2
2,ORD_0004,SKU_0180,1,WP_0023,1,3
3,ORD_0001,SKU_0246,1,WP_0024,1,4
4,ORD_0004,SKU_0037,1,WP_0068,1,5


In [11]:
ord_group = df.groupby(['ORD_NO','SKU_CD', 'LOC'])[['NUM_PCS']].count()
ord_group

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,NUM_PCS
ORD_NO,SKU_CD,LOC,Unnamed: 3_level_1
ORD_0001,SKU_0246,WP_0024,1
ORD_0001,SKU_0267,WP_0129,1
ORD_0002,SKU_0005,WP_0152,1
ORD_0002,SKU_0047,WP_0138,1
ORD_0002,SKU_0063,WP_0138,1
...,...,...,...
ORD_0479,SKU_0099,WP_0107,1
ORD_0479,SKU_0176,WP_0078,1
ORD_0479,SKU_0274,WP_0107,1
ORD_0479,SKU_0329,WP_0109,1


In [18]:
import numpy as np

def estimate_row_count_from_od_matrix(od_matrix):
    """
    OD Matrix에서 랙 간 거리 분포를 보고 한 줄에 몇 개 있는지, 전체 줄 수를 추정
    """
    row_len_candidates = []
    for rack in od_matrix.index :  # 일부만 샘플로 추정
        distances = od_matrix.loc[rack].sort_values()
        diffs = distances.diff().fillna(0)

        # 급격히 증가하는 지점 찾기 (거리 급증)
        jump_indices = np.where(diffs > 5)[0]  # 거리 단위 기준 조정 가능

        if len(jump_indices) > 0:
            row_len_candidates.append(jump_indices[0])  # 첫 번째 점프 위치

    # 최빈값 = 추정된 한 줄 랙 수
    from statistics import mode
    est_rack_count = mode(row_len_candidates) # 한 구역에 포함된 랙의 갯수

    # 전체 랙 수에서 줄 수 계산
    total_racks = len(od_matrix)
    est_row_count = total_racks // est_rack_count # 구역 갯수

    return est_rack_count

od_matrix = pd.read_csv('./data/Sample_OD_Matrix.csv', index_col= 0)
od_matrix.head()

res = estimate_row_count_from_od_matrix(od_matrix)
print(res)


14


In [20]:
import re

def extract_loc_number(loc_name):
    """
    LOC 이름(WP_0001 등)에서 숫자 부분만 추출하여 정수로 반환
    """
    match = re.search(r'\d+', loc_name)
    return int(match.group()) if match else None

def assign_zone_by_locnum(slap_df, row_len):
    """
    LOC_NUM을 기반으로 ZONE을 부여한다.
    
    Parameters:
    - slap_df: SLAP_LOC DataFrame
    - row_len: 추정된 한 줄에 있는 랙 수
    
    Returns:
    - ZONE이 부여된 DataFrame
    """
    slap_df = slap_df.copy()
    
    # LOC에서 숫자 추출
    slap_df['LOC_NUM'] = slap_df['LOC'].map(extract_loc_number)
    
    # ZONE 부여
    slap_df['ZONE'] = slap_df['LOC_NUM'].apply(
        lambda x: f"ZONE_{(x - 1) // row_len + 1}" if pd.notnull(x) else None
    )
    
    return slap_df
  
  

In [21]:
zone_assign = assign_zone_by_locnum(df, res)
zone_assign

Unnamed: 0,ORD_NO,SKU_CD,NUM_PCS,LOC,CART_NO,SEQ,LOC_NUM,ZONE
0,ORD_0002,SKU_0156,1,WP_0009,1,1,9,ZONE_1
1,ORD_0002,SKU_0123,1,WP_0009,1,2,9,ZONE_1
2,ORD_0004,SKU_0180,1,WP_0023,1,3,23,ZONE_2
3,ORD_0001,SKU_0246,1,WP_0024,1,4,24,ZONE_2
4,ORD_0004,SKU_0037,1,WP_0068,1,5,68,ZONE_5
...,...,...,...,...,...,...,...,...
1421,ORD_0479,SKU_0031,1,WP_0096,120,9,96,ZONE_7
1422,ORD_0479,SKU_0099,1,WP_0107,120,10,107,ZONE_8
1423,ORD_0479,SKU_0274,1,WP_0107,120,11,107,ZONE_8
1424,ORD_0479,SKU_0329,1,WP_0109,120,12,109,ZONE_8


In [28]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import KMeans
from math import ceil

# -----------------------------
# 2. 주문-SKU 매트릭스 & 주문-ZONE 매트릭스
# -----------------------------
order_sku_matrix = zone_assign.pivot_table(index='ORD_NO', columns='SKU_CD', aggfunc='size', fill_value=0)
order_zone_matrix = zone_assign.pivot_table(index='ORD_NO', columns='ZONE', aggfunc='size', fill_value=0)

# -----------------------------
# 3. 유사도 계산
# -----------------------------
sku_sim = cosine_similarity(order_sku_matrix)
zone_sim = cosine_similarity(order_zone_matrix)

order_sku_matrix # ord / sku
order_zone_matrix # ord / zone

ZONE,ZONE_1,ZONE_10,ZONE_11,ZONE_12,ZONE_2,ZONE_3,ZONE_4,ZONE_5,ZONE_6,ZONE_7,ZONE_8,ZONE_9
ORD_NO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
ORD_0001,0,1,0,0,1,0,0,0,0,0,0,0
ORD_0002,2,2,1,0,0,0,0,0,0,0,0,0
ORD_0003,0,0,0,0,0,0,0,0,1,0,0,1
ORD_0004,0,0,0,0,1,0,0,1,0,0,0,2
ORD_0005,2,0,0,0,0,0,1,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
ORD_0476,0,0,0,0,1,0,3,0,0,0,0,1
ORD_0477,1,0,0,0,0,2,0,0,0,0,0,1
ORD_0478,1,0,0,0,0,0,2,0,0,0,0,0
ORD_0479,0,0,0,0,0,0,0,0,1,1,3,0


In [None]:
# 결합 유사도 (단순 평균)
combined_sim = (sku_sim + zone_sim) / 2
combined_dist = 1 - combined_sim  # KMeans는 거리 기반

# -----------------------------
# 4. KMeans 클러스터링 (4개씩 주문 묶기)
# -----------------------------
n_orders = order_sku_matrix.shape[0]
cart_capa = 4
n_clusters = ceil(n_orders / cart_capa)

kmeans = KMeans(n_clusters=n_clusters, random_state=42)
labels = kmeans.fit_predict(combined_dist)

# -----------------------------
# 5. 결과 저장
# -----------------------------
clustered_orders = pd.DataFrame({
    'ORD_NO': order_sku_matrix.index,
    'BATCH_ID': labels
})

# 결과 확인
clustered_orders.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 480 entries, 0 to 479
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   ORD_NO    480 non-null    object
 1   BATCH_ID  480 non-null    int32 
dtypes: int32(1), object(1)
memory usage: 5.8+ KB


