In [2]:
import pandas as pd

In [3]:
bank_df= pd.read_excel("bank_df.xlsx")
dalrat_df = pd.read_csv("settlement_order_list_20240621.csv")
df= dalrat_df
# Converting 'Order Money', '세금', and 'Refund Money' columns to float for aggregation
df['Order Money'] = df['Order Money'].replace({'₩': '', ',': ''}, regex=True).astype(float)
df['세금'] = df['세금'].replace({'₩': '', ',': ''}, regex=True).astype(float)
df['Refund Money'] = df['Refund Money'].replace({'₩': '', ',': ''}, regex=True).astype(float)

# Grouping by 'Order No' and aggregating data accordingly
grouped_df = df.groupby('Order No').agg({
    'Delivery Start Date': 'first',
    '고객 이름': 'first',
    'Order Create Date': 'first',
    '상품명': lambda x: ', '.join(x),
    'SKU': lambda x: ', '.join(map(str, x)),
    'Order Money': 'sum',
    '수량': 'sum',
    '결제방법': 'first',
    '세금': 'sum',
    'Refund Date': 'first',
    'Refund Money': 'sum'
}).reset_index()

# Formatting the money columns back to currency format
grouped_df['Order Money'] = grouped_df['Order Money'].apply(lambda x: '₩' + format(x, ','))
grouped_df['세금'] = grouped_df['세금'].apply(lambda x: '₩' + format(x, ','))
grouped_df['Refund Money'] = grouped_df['Refund Money'].apply(lambda x: '₩' + format(x, ','))

grouped_df.to_excel("주문번호별.xlsx",index=False)

In [25]:
dalrat_df = pd.read_excel("주문번호별+배송료.xlsx")

In [26]:
def adjust_order_number(order_number):
    # order_number를 문자열로 변환
    order_number_str = str(order_number)
    # "-"를 기준으로 분리하고 첫 번째 부분을 선택
    processed_order_number = order_number_str.split('-')[0]
    # 선택된 부분이 10자리인 경우는 그대로 반환
    if len(processed_order_number) == 10:
        return processed_order_number
    # 9자리 미만인 경우, 9자리가 되도록 앞쪽을 0으로 채움
    elif len(processed_order_number) < 9:
        return processed_order_number.zfill(9)
    # 그 외의 경우 (즉, 9자리인 경우), 그대로 반환
    else:
        return processed_order_number
    
# combined_df의 '주문번호' 컬럼에 정의한 함수 적용
bank_df['주문번호'] = bank_df['주문번호'].apply(adjust_order_number)

# combined_df의 '주문번호' 컬럼에 정의한 함수 적용
dalrat_df['Order No'] = dalrat_df['Order No'].apply(adjust_order_number)

In [27]:
bank_df.to_excel("토스-주문번호_2.xlsx",index=False)
dalrat_df.to_excel("달랏-주문번호_2.xlsx",index=False)

In [28]:
# 일치하는 주문번호를 가진 행 찾기
matching_rows = dalrat_df[dalrat_df['Order No'].isin(bank_df['주문번호'])]

In [29]:
# 일치하지 않는 주문번호를 가진 행 찾기
non_matching_rows = dalrat_df[~dalrat_df['Order No'].isin(bank_df['주문번호'])]

In [30]:
# 일치하는 상태를 표시하는 새로운 컬럼 추가
dalrat_df['Matching Status'] = dalrat_df['Order No'].apply(lambda x: '일치' if x in bank_df['주문번호'].values else '불일치')
bank_df['Matching Status'] = bank_df['주문번호'].apply(lambda x: '일치' if x in dalrat_df['Order No'].values else '불일치')

# 소스 데이터 컬럼 추가
dalrat_df['Source'] = 'dalrat'
bank_df['Source'] = 'bank'

# 두 데이터프레임 결합
combined_df = pd.concat([dalrat_df, bank_df.rename(columns={'주문번호': 'Order No'})], ignore_index=True)

combined_df

Unnamed: 0,Order No,Delivery Start Date,고객 이름,Order Create Date,상품명,SKU,Order Money,수량,결제방법,세금,...,결제상태,현금영수증 발급 상태,구매자명,입금자명,입금·취소액,과세제외액,은행,구매상품,안내메세지,취소자
0,000004083,2024. 5. 30. 오후 2:22:20,WIKANDA _F,2024. 5. 28. 오후 5:13:44,ทุเรียน สด 10kg,95,"₩140,000.0",1.0,Bank Deposit - Tosspayments,₩0.0,...,,,,,,,,,,
1,000004101,2024. 5. 30. 오후 4:38:36,Sutthita _F,2024. 5. 29. 오전 11:26:08,เครื่องดื่ม เอ็ม 150,872,"₩47,500.0",1.0,Bank Deposit - Tosspayments,"₩4,318.18",...,,,,,,,,,,
2,000004137,2024. 5. 30. 오전 10:33:29,밍 _R,2024. 5. 29. 오후 11:05:22,ปลาทูนึ่ง2P(thai)(STEAM MACKEREL),4319,"₩58,500.0",15.0,Bank Deposit - Tosspayments,"₩5,318.25",...,,,,,,,,,,
3,000004139,2024. 5. 30. 오전 10:33:29,WIKANDA _F,2024. 5. 30. 오전 9:16:33,หมอนทอง มินิ 10กิโล_BOX,1617691893,"₩113,000.0",1.0,Bank Deposit - Tosspayments,₩0.0,...,,,,,,,,,,
4,000004142,2024. 5. 30. 오전 10:33:30,1_ 소미 무카타 _F,2024. 5. 30. 오전 9:52:49,"ปลาสวาย ตัว, ปลาร้าไมค์, น้ำตาลปิ๊ปสีแดง","1669271714, 2394, 1663230216","₩188,800.0",4.0,Bank Deposit - Tosspayments,"₩7,527.27",...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81,3000001094,,,,,,,,,,...,완료,,1**********장,,66400.0,,농협,Dalrat 3000001094,,
82,3000001093,,,,,,,,,,...,완료,,1**********장,,54400.0,,농협,Dalrat 3000001093,,
83,3000001092,,,,,,,,,,...,완료,,CU********_H,,562850.0,,삼성,Dalrat 3000001092,,
84,3000001089,,,,,,,,,,...,완료,,S2****************17,,153400.0,,현대,Dalrat 3000001089,,


In [31]:
combined_df.to_excel("차이분석.xlsx",index=False)

In [32]:
# 26번째 열 이름 확인
bank_column_names = bank_df.columns
bank_column_names

Index(['상점아이디(MID)', '가상계좌 발급일시', '가상계좌 만료일시', '결제·취소일시', '취소완료일시', '주문번호',
       '결제상태', '현금영수증 발급 상태', '구매자명', '입금자명', '입금·취소액', '과세제외액', '은행', '구매상품',
       '안내메세지', '취소자', 'Matching Status', 'Source'],
      dtype='object')

In [33]:
# Display the 26th column name
bank_26th_column_name = bank_column_names[17]
bank_26th_column_name

'Source'

In [34]:
# 일치하는 경우 SUM의 금액과 Z 열(26번째 열) 비교
matching_rows = combined_df[combined_df['Matching Status'] == '일치']
comparison_df = matching_rows[['Order No', 'SUM', '입금·취소액']]

comparison_df.to_excel("차이분석.xlsx",index=False)