# Data Load

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import numpy as np
import pandas as pd

In [None]:
train_df = pd.read_csv('/content/drive/MyDrive/LG Aimers 7기/data/raw data/train/train.csv')
train_df.head()

Unnamed: 0,영업일자,영업장명_메뉴명,매출수량
0,2023-01-01,느티나무 셀프BBQ_1인 수저세트,0
1,2023-01-02,느티나무 셀프BBQ_1인 수저세트,0
2,2023-01-03,느티나무 셀프BBQ_1인 수저세트,0
3,2023-01-04,느티나무 셀프BBQ_1인 수저세트,0
4,2023-01-05,느티나무 셀프BBQ_1인 수저세트,0


# Preprocess

In [None]:
train_df['영업일자'] = pd.to_datetime(train_df['영업일자'])

In [None]:
# 영업장명_메뉴명 공백 없애기
train_df['영업장명_메뉴명'] = train_df['영업장명_메뉴명'].str.replace(" ", "", regex=False)

In [None]:
train_df.head()

Unnamed: 0,영업일자,영업장명_메뉴명,매출수량
0,2023-01-01,느티나무셀프BBQ_1인수저세트,0
1,2023-01-02,느티나무셀프BBQ_1인수저세트,0
2,2023-01-03,느티나무셀프BBQ_1인수저세트,0
3,2023-01-04,느티나무셀프BBQ_1인수저세트,0
4,2023-01-05,느티나무셀프BBQ_1인수저세트,0


In [None]:
# 음수 없애기
train_df.loc[train_df['매출수량'] < 0, '매출수량'] = 0

In [None]:
# 공백 없애기
train_df[['영업장명', '메뉴명']] = train_df['영업장명_메뉴명'].str.split('_', expand=True)

In [None]:
train_df.head()

Unnamed: 0,영업일자,영업장명_메뉴명,매출수량,영업장명,메뉴명
0,2023-01-01,느티나무셀프BBQ_1인수저세트,0,느티나무셀프BBQ,1인수저세트
1,2023-01-02,느티나무셀프BBQ_1인수저세트,0,느티나무셀프BBQ,1인수저세트
2,2023-01-03,느티나무셀프BBQ_1인수저세트,0,느티나무셀프BBQ,1인수저세트
3,2023-01-04,느티나무셀프BBQ_1인수저세트,0,느티나무셀프BBQ,1인수저세트
4,2023-01-05,느티나무셀프BBQ_1인수저세트,0,느티나무셀프BBQ,1인수저세트


In [None]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 102676 entries, 0 to 102675
Data columns (total 5 columns):
 #   Column    Non-Null Count   Dtype         
---  ------    --------------   -----         
 0   영업일자      102676 non-null  datetime64[ns]
 1   영업장명_메뉴명  102676 non-null  object        
 2   매출수량      102676 non-null  int64         
 3   영업장명      102676 non-null  object        
 4   메뉴명       102676 non-null  object        
dtypes: datetime64[ns](1), int64(1), object(3)
memory usage: 3.9+ MB


193개의 '영업장명_메뉴명'  
532개의 날짜

In [None]:
train_df['dayofweek'] = train_df['영업일자'].dt.dayofweek  # 월요일=0, 일요일=6

In [None]:
preprocessed_df = pd.DataFrame()

for item in train_df['영업장명_메뉴명'].unique():
    item_df = train_df[train_df['영업장명_메뉴명'] == item].copy()

    # Lag features (1일 전, 7일 전 매출)
    item_df['lag_1'] = item_df['매출수량'].shift(1)
    item_df['lag_7'] = item_df['매출수량'].shift(7)

    # Rolling mean feature
    item_df['rolling_mean_7'] = item_df['매출수량'].rolling(window=7).mean()

    preprocessed_df = pd.concat([preprocessed_df, item_df])

preprocessed_df = preprocessed_df.drop(columns=['영업장명_메뉴명'])
preprocessed_df = preprocessed_df.fillna(0)

In [None]:
preprocessed_df.head()

Unnamed: 0,영업일자,매출수량,영업장명,메뉴명,dayofweek,lag_1,lag_7,rolling_mean_7
0,2023-01-01,0,느티나무셀프BBQ,1인수저세트,6,0.0,0.0,0.0
1,2023-01-02,0,느티나무셀프BBQ,1인수저세트,0,0.0,0.0,0.0
2,2023-01-03,0,느티나무셀프BBQ,1인수저세트,1,0.0,0.0,0.0
3,2023-01-04,0,느티나무셀프BBQ,1인수저세트,2,0.0,0.0,0.0
4,2023-01-05,0,느티나무셀프BBQ,1인수저세트,3,0.0,0.0,0.0


In [None]:
new_order = ['영업일자',
    '영업장명',
    '메뉴명',
    '매출수량',
    'dayofweek',
    'lag_1',
    'lag_7',
    'rolling_mean_7']

preprocessed_df = preprocessed_df[new_order]
preprocessed_df.head()

Unnamed: 0,영업일자,영업장명,메뉴명,매출수량,dayofweek,lag_1,lag_7,rolling_mean_7
0,2023-01-01,느티나무셀프BBQ,1인수저세트,0,6,0.0,0.0,0.0
1,2023-01-02,느티나무셀프BBQ,1인수저세트,0,0,0.0,0.0,0.0
2,2023-01-03,느티나무셀프BBQ,1인수저세트,0,1,0.0,0.0,0.0
3,2023-01-04,느티나무셀프BBQ,1인수저세트,0,2,0.0,0.0,0.0
4,2023-01-05,느티나무셀프BBQ,1인수저세트,0,3,0.0,0.0,0.0


In [None]:
preprocessed_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 102676 entries, 0 to 102675
Data columns (total 8 columns):
 #   Column          Non-Null Count   Dtype         
---  ------          --------------   -----         
 0   영업일자            102676 non-null  datetime64[ns]
 1   영업장명            102676 non-null  object        
 2   메뉴명             102676 non-null  object        
 3   매출수량            102676 non-null  int64         
 4   dayofweek       102676 non-null  int32         
 5   lag_1           102676 non-null  float64       
 6   lag_7           102676 non-null  float64       
 7   rolling_mean_7  102676 non-null  float64       
dtypes: datetime64[ns](1), float64(3), int32(1), int64(1), object(2)
memory usage: 6.7+ MB


In [None]:
bpn = preprocessed_df['영업장명'].unique() # business place name
print(bpn)

['느티나무셀프BBQ' '담하' '라그로타' '미라시아' '연회장' '카페테리아' '포레스트릿' '화담숲주막' '화담숲카페']


In [None]:
mn = preprocessed_df['메뉴명'].unique()
print(mn)

['1인수저세트' 'BBQ55(단체)' '대여료30,000원' '대여료60,000원' '대여료90,000원' '본삼겹(단품,실내)'
 '스프라이트(단체)' '신라면' '쌈야채세트' '쌈장' '육개장사발면' '일회용소주컵' '일회용종이컵'
 '잔디그늘집대여료(12인석)' '잔디그늘집대여료(6인석)' '잔디그늘집의자추가' '참이슬(단체)' '친환경접시14cm'
 '친환경접시23cm' '카스병(단체)' '콜라(단체)' '햇반' '허브솔트' '(단체)공깃밥' '(단체)생목살김치전골2.0'
 '(단체)은이버섯갈비탕' '(단체)한우우거지국밥' '(단체)황태해장국3/27까지' '(정식)된장찌개' '(정식)물냉면'
 '(정식)비빔냉면' '(후식)된장찌개' '(후식)물냉면' '(후식)비빔냉면' '갑오징어비빔밥' '갱시기' '공깃밥' '꼬막비빔밥'
 '느린마을막걸리' '담하한우불고기' '담하한우불고기정식' '더덕한우지짐' '들깨양지탕' '라면사리' '룸이용료' '메밀면사리'
 '명인안동소주' '명태회비빔냉면' '문막복분자칵테일' '봉평메밀물냉면' '생목살김치찌개' '스프라이트' '은이버섯갈비탕'
 '제로콜라' '참이슬' '처음처럼' '카스' '콜라' '테라' '하동매실칵테일' '한우떡갈비정식' '한우미역국정식'
 '한우우거지국밥' '한우차돌박이된장찌개' '황태해장국' 'AUS(200g)' 'G-Charge(3)' 'Gls.Sileni'
 'Gls.미션서드' 'OpenFood' '그릴드비프샐러드' '까르보나라' '모둠해산물플래터' '미션서드카베르네쉬라'
 '버섯크림리조또' '빵추가(1인)' '시저샐러드' '아메리카노' '알리오에올리오' '양갈비(4ps)' '자몽리치에이드'
 '하이네켄(생)' '한우(200g)' '해산물토마토리조또' '해산물토마토스튜파스타' '해산물토마토스파게티'
 '(단체)브런치주중36,000' '(오븐)하와이안쉬림프피자' '(화덕)불고기페퍼로니반반피자' 'BBQPlatter'
 'BBQ고기추가' '글라스와인(레드)' '레인보우칵테일(알코올)' '미라시아브런치(

In [None]:
preprocessed_df['영업장명'] = preprocessed_df['영업장명'].astype('category')
print(preprocessed_df['영업장명'].cat.categories)

mapping = dict(enumerate(preprocessed_df['영업장명'].cat.categories))
print(mapping)

Index(['느티나무셀프BBQ', '담하', '라그로타', '미라시아', '연회장', '카페테리아', '포레스트릿', '화담숲주막',
       '화담숲카페'],
      dtype='object')
{0: '느티나무셀프BBQ', 1: '담하', 2: '라그로타', 3: '미라시아', 4: '연회장', 5: '카페테리아', 6: '포레스트릿', 7: '화담숲주막', 8: '화담숲카페'}


In [None]:
# 한글 → 카테고리 → 숫자 코드
# df['name'] = df['name'].astype('category').cat.codes
# 한글 입력 순서대로 번호
# df['name'] = pd.Categorical(df['name'], categories=df['name'].unique()).codes

preprocessed_df['영업장명'] = preprocessed_df['영업장명'].astype('category').cat.codes
preprocessed_df.head()

Unnamed: 0,영업일자,영업장명,메뉴명,매출수량,dayofweek,lag_1,lag_7,rolling_mean_7
0,2023-01-01,0,1인수저세트,0,6,0.0,0.0,0.0
1,2023-01-02,0,1인수저세트,0,0,0.0,0.0,0.0
2,2023-01-03,0,1인수저세트,0,1,0.0,0.0,0.0
3,2023-01-04,0,1인수저세트,0,2,0.0,0.0,0.0
4,2023-01-05,0,1인수저세트,0,3,0.0,0.0,0.0


In [None]:
preprocessed_df['영업장명'] = preprocessed_df['영업장명'].astype('category')
preprocessed_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 102676 entries, 0 to 102675
Data columns (total 8 columns):
 #   Column          Non-Null Count   Dtype         
---  ------          --------------   -----         
 0   영업일자            102676 non-null  datetime64[ns]
 1   영업장명            102676 non-null  category      
 2   메뉴명             102676 non-null  object        
 3   매출수량            102676 non-null  int64         
 4   dayofweek       102676 non-null  int32         
 5   lag_1           102676 non-null  float64       
 6   lag_7           102676 non-null  float64       
 7   rolling_mean_7  102676 non-null  float64       
dtypes: category(1), datetime64[ns](1), float64(3), int32(1), int64(1), object(1)
memory usage: 6.0+ MB


In [None]:
preprocessed_df['메뉴명'] = preprocessed_df['메뉴명'].astype('category')
print(list(preprocessed_df['메뉴명'].cat.categories))

mapping = dict(enumerate(preprocessed_df['메뉴명'].cat.categories))
print('\n')
print('mapping list:')
print(mapping)

['(단체)공깃밥', '(단체)브런치주중36,000', '(단체)생목살김치전골2.0', '(단체)은이버섯갈비탕', '(단체)한우우거지국밥', '(단체)황태해장국3/27까지', '(오븐)하와이안쉬림프피자', '(정식)된장찌개', '(정식)물냉면', '(정식)비빔냉면', '(화덕)불고기페퍼로니반반피자', '(후식)된장찌개', '(후식)물냉면', '(후식)비빔냉면', '1인수저세트', 'AUS(200g)', 'BBQ55(단체)', 'BBQPlatter', 'BBQ고기추가', 'CassBeer', 'ConferenceL1', 'ConferenceL2', 'ConferenceL3', 'ConferenceM1', 'ConferenceM8', 'ConferenceM9', 'ConventionHall', 'CookiePlatter', 'G-Charge(3)', 'Gls.Sileni', 'Gls.미션서드', 'GrandBallroom', 'OPUS2', 'OpenFood', 'RegularCoffee', '갑오징어비빔밥', '갱시기', '골뱅이무침', '공깃밥', '공깃밥(추가)', '구슬아이스크림', '그릴드비프샐러드', '글라스와인(레드)', '까르보나라', '꼬막비빔밥', '꼬치어묵', '느린마을막걸리', '단체식13000(신)', '단체식18000(신)', '단호박식혜', '담하한우불고기', '담하한우불고기정식', '대여료30,000원', '대여료60,000원', '대여료90,000원', '더덕한우지짐', '돈목살김치찌개(밥포함)', '돼지고기김치찌개', '들깨양지탕', '떡볶이', '라면사리', '레인보우칵테일(알코올)', '로제치즈떡볶이', '룸이용료', '마라샹궈', '매콤무뼈닭발&계란찜', '메밀면사리', '메밀미숫가루', '명인안동소주', '명태회비빔냉면', '모둠돈육구이(3인)', '모둠해산물플래터', '문막복분자칵테일', '미라시아브런치(패키지)', '미션서드카베르네쉬라', '버드와이저(무제한)', '버섯크림리조또', '병천순대', '보일링랍스타플래터', 

In [None]:
preprocessed_df['메뉴명'] = preprocessed_df['메뉴명'].astype('category').cat.codes
preprocessed_df['메뉴명'] = preprocessed_df['메뉴명'].astype('category')
preprocessed_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 102676 entries, 0 to 102675
Data columns (total 8 columns):
 #   Column          Non-Null Count   Dtype         
---  ------          --------------   -----         
 0   영업일자            102676 non-null  datetime64[ns]
 1   영업장명            102676 non-null  category      
 2   메뉴명             102676 non-null  category      
 3   매출수량            102676 non-null  int64         
 4   dayofweek       102676 non-null  int32         
 5   lag_1           102676 non-null  float64       
 6   lag_7           102676 non-null  float64       
 7   rolling_mean_7  102676 non-null  float64       
dtypes: category(2), datetime64[ns](1), float64(3), int32(1), int64(1)
memory usage: 5.4 MB


In [None]:
preprocessed_df.head()

Unnamed: 0,영업일자,영업장명,메뉴명,매출수량,dayofweek,lag_1,lag_7,rolling_mean_7
0,2023-01-01,0,14,0,6,0.0,0.0,0.0
1,2023-01-02,0,14,0,0,0.0,0.0,0.0
2,2023-01-03,0,14,0,1,0.0,0.0,0.0
3,2023-01-04,0,14,0,2,0.0,0.0,0.0
4,2023-01-05,0,14,0,3,0.0,0.0,0.0


In [None]:
preprocessed_df.to_csv('/content/drive/MyDrive/LG Aimers 7기/data/preprocessed_train.csv')
print('완료')

완료


In [None]:
# 다시 mapping 할 경우
# bpn_dict = {0: '느티나무셀프BBQ', 1: '담하', 2: '라그로타', 3: '미라시아', 4: '연회장', 5: '카페테리아', 6: '포레스트릿', 7: '화담숲주막', 8: '화담숲카페'}
# menu_dict = {0: '(단체)공깃밥', 1: '(단체)브런치주중36,000', 2: '(단체)생목살김치전골2.0', 3: '(단체)은이버섯갈비탕', 4: '(단체)한우우거지국밥', 5: '(단체)황태해장국3/27까지', 6: '(오븐)하와이안쉬림프피자', 7: '(정식)된장찌개', 8: '(정식)물냉면', 9: '(정식)비빔냉면', 10: '(화덕)불고기페퍼로니반반피자', 11: '(후식)된장찌개', 12: '(후식)물냉면', 13: '(후식)비빔냉면', 14: '1인수저세트', 15: 'AUS(200g)', 16: 'BBQ55(단체)', 17: 'BBQPlatter', 18: 'BBQ고기추가', 19: 'CassBeer', 20: 'ConferenceL1', 21: 'ConferenceL2', 22: 'ConferenceL3', 23: 'ConferenceM1', 24: 'ConferenceM8', 25: 'ConferenceM9', 26: 'ConventionHall', 27: 'CookiePlatter', 28: 'G-Charge(3)', 29: 'Gls.Sileni', 30: 'Gls.미션서드', 31: 'GrandBallroom', 32: 'OPUS2', 33: 'OpenFood', 34: 'RegularCoffee', 35: '갑오징어비빔밥', 36: '갱시기', 37: '골뱅이무침', 38: '공깃밥', 39: '공깃밥(추가)', 40: '구슬아이스크림', 41: '그릴드비프샐러드', 42: '글라스와인(레드)', 43: '까르보나라', 44: '꼬막비빔밥', 45: '꼬치어묵', 46: '느린마을막걸리', 47: '단체식13000(신)', 48: '단체식18000(신)', 49: '단호박식혜', 50: '담하한우불고기', 51: '담하한우불고기정식', 52: '대여료30,000원', 53: '대여료60,000원', 54: '대여료90,000원', 55: '더덕한우지짐', 56: '돈목살김치찌개(밥포함)', 57: '돼지고기김치찌개', 58: '들깨양지탕', 59: '떡볶이', 60: '라면사리', 61: '레인보우칵테일(알코올)', 62: '로제치즈떡볶이', 63: '룸이용료', 64: '마라샹궈', 65: '매콤무뼈닭발&계란찜', 66: '메밀면사리', 67: '메밀미숫가루', 68: '명인안동소주', 69: '명태회비빔냉면', 70: '모둠돈육구이(3인)', 71: '모둠해산물플래터', 72: '문막복분자칵테일', 73: '미라시아브런치(패키지)', 74: '미션서드카베르네쉬라', 75: '버드와이저(무제한)', 76: '버섯크림리조또', 77: '병천순대', 78: '보일링랍스타플래터', 79: '보일링랍스타플래터(덜매운맛)', 80: '복숭아아이스티', 81: '본삼겹(단품,실내)', 82: '봉평메밀물냉면', 83: '브런치(대인)주말', 84: '브런치(대인)주중', 85: '브런치(어린이)', 86: '브런치2인패키지', 87: '브런치4인패키지', 88: '빵추가(1인)', 89: '삼겹살추가(200g)', 90: '새우볶음밥', 91: '새우튀김우동', 92: '생목살김치찌개', 93: '생수', 94: '샷추가', 95: '수제등심돈까스', 96: '쉬림프투움바파스타', 97: '스텔라(무제한)', 98: '스프라이트', 99: '스프라이트(단체)', 100: '시저샐러드', 101: '신라면', 102: '쌈야채세트', 103: '쌈장', 104: '아메리카노', 105: '아메리카노(HOT)', 106: '아메리카노(ICE)', 107: '아메리카노HOT', 108: '아메리카노ICE', 109: '알리오에올리오', 110: '애플망고에이드', 111: '야채추가', 112: '약고추장돌솥비빔밥', 113: '양갈비(4ps)', 114: '어린이돈까스', 115: '얼그레이하이볼', 116: '오븐구이윙과킬바사소세지', 117: '오픈푸드', 118: '왕갈비치킨', 119: '유자하이볼', 120: '육개장사발면', 121: '은이버섯갈비탕', 122: '일회용소주컵', 123: '일회용종이컵', 124: '자몽리치에이드', 125: '잔디그늘집대여료(12인석)', 126: '잔디그늘집대여료(6인석)', 127: '잔디그늘집의자추가', 128: '잭애플토닉', 129: '제로콜라', 130: '주먹밥(2ea)', 131: '진사골설렁탕', 132: '짜장면', 133: '짜장밥', 134: '짬뽕', 135: '짬뽕밥', 136: '참살이막걸리', 137: '참이슬', 138: '참이슬(단체)', 139: '찹쌀식혜', 140: '처음처럼', 141: '치즈돈까스', 142: '치즈핫도그', 143: '친환경접시14cm', 144: '친환경접시23cm', 145: '칠리치즈프라이', 146: '카스', 147: '카스병(단체)', 148: '카페라떼(HOT)', 149: '카페라떼(ICE)', 150: '카페라떼ICE', 151: '코카콜라', 152: '코카콜라(제로)', 153: '콜라', 154: '콜라(단체)', 155: '콥샐러드', 156: '테라', 157: '파스타면추가(150g)', 158: '페스츄리소시지', 159: '핑크레몬에이드', 160: '하동매실칵테일', 161: '하이네켄(생)', 162: '한상삼겹구이정식(2인)소요시간약15~20분', 163: '한우(200g)', 164: '한우떡갈비정식', 165: '한우미역국정식', 166: '한우우거지국밥', 167: '한우차돌박이된장찌개', 168: '해물파전', 169: '해산물토마토리조또', 170: '해산물토마토스튜파스타', 171: '해산물토마토스파게티', 172: '햇반', 173: '허브솔트', 174: '현미뻥스크림', 175: '황태해장국'}
# df['영업장명_복원'] = df['영업장명'].map(bpn_dict)
# df['메뉴명'] = df['메뉴명'].map(menu_dict)

# Pivot

In [None]:
train_df['영업장명_메뉴명'] = train_df['영업장명_메뉴명'].str.strip()

In [None]:
duplicate_rows = train_df[train_df.duplicated(subset=['영업일자', '영업장명_메뉴명'], keep=False)]

if duplicate_rows.empty:
    print("중복 데이터 없음")
else:
    print(f"총 {len(duplicate_rows)}개")
    print(duplicate_rows.sort_values(by=['영업일자', '영업장명_메뉴명']))

중복 데이터 없음


In [None]:
negative_values_before = (train_df['매출수량'] < 0).sum()
print(f"음수 개수: {negative_values_before}")

음수 개수: 14


In [None]:
train_df.loc[train_df['매출수량'] < 0, '매출수량'] = 0

In [None]:
negative_values_after = (train_df['매출수량'] < 0).sum()
print(f"음수 개수: {negative_values_after}")

음수 개수: 0


In [None]:
train_df['영업일자'] = pd.to_datetime(train_df['영업일자'])

In [None]:
pivot_df = train_df.pivot_table(index='영업일자',
                            columns='영업장명_메뉴명',
                            values='매출수량')

In [None]:
pivot_df.head()

영업장명_메뉴명,느티나무 셀프BBQ_1인 수저세트,느티나무 셀프BBQ_BBQ55(단체),"느티나무 셀프BBQ_대여료 30,000원","느티나무 셀프BBQ_대여료 60,000원","느티나무 셀프BBQ_대여료 90,000원","느티나무 셀프BBQ_본삼겹 (단품,실내)",느티나무 셀프BBQ_스프라이트 (단체),느티나무 셀프BBQ_신라면,느티나무 셀프BBQ_쌈야채세트,느티나무 셀프BBQ_쌈장,...,화담숲주막_스프라이트,화담숲주막_참살이 막걸리,화담숲주막_찹쌀식혜,화담숲주막_콜라,화담숲주막_해물파전,화담숲카페_메밀미숫가루,화담숲카페_아메리카노 HOT,화담숲카페_아메리카노 ICE,화담숲카페_카페라떼 ICE,화담숲카페_현미뻥스크림
영업일자,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-01-01,0.0,0.0,9.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2023-01-02,0.0,0.0,2.0,4.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2023-01-03,0.0,0.0,2.0,3.0,0.0,2.0,10.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2023-01-04,0.0,0.0,3.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2023-01-05,0.0,19.0,6.0,2.0,0.0,0.0,8.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
pivot_df.columns.name = None
pivot_df = pivot_df.reset_index()

In [None]:
pivot_df.head()

Unnamed: 0,영업일자,느티나무 셀프BBQ_1인 수저세트,느티나무 셀프BBQ_BBQ55(단체),"느티나무 셀프BBQ_대여료 30,000원","느티나무 셀프BBQ_대여료 60,000원","느티나무 셀프BBQ_대여료 90,000원","느티나무 셀프BBQ_본삼겹 (단품,실내)",느티나무 셀프BBQ_스프라이트 (단체),느티나무 셀프BBQ_신라면,느티나무 셀프BBQ_쌈야채세트,...,화담숲주막_스프라이트,화담숲주막_참살이 막걸리,화담숲주막_찹쌀식혜,화담숲주막_콜라,화담숲주막_해물파전,화담숲카페_메밀미숫가루,화담숲카페_아메리카노 HOT,화담숲카페_아메리카노 ICE,화담숲카페_카페라떼 ICE,화담숲카페_현미뻥스크림
0,2023-01-01,0.0,0.0,9.0,2.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2023-01-02,0.0,0.0,2.0,4.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2023-01-03,0.0,0.0,2.0,3.0,0.0,2.0,10.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2023-01-04,0.0,0.0,3.0,6.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2023-01-05,0.0,19.0,6.0,2.0,0.0,0.0,8.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
nan_count_before = pivot_df.isnull().sum().sum()
print(f"NaN 개수: {nan_count_before}")

NaN 개수: 0


In [None]:
pivot_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 532 entries, 0 to 531
Columns: 194 entries, 영업일자 to 화담숲카페_현미뻥스크림
dtypes: datetime64[ns](1), float64(193)
memory usage: 806.4 KB


In [None]:
float_cols = pivot_df.select_dtypes(include=['float64']).columns
pivot_df[float_cols] = pivot_df[float_cols].astype(int)
pivot_df.head()

Unnamed: 0,영업일자,느티나무 셀프BBQ_1인 수저세트,느티나무 셀프BBQ_BBQ55(단체),"느티나무 셀프BBQ_대여료 30,000원","느티나무 셀프BBQ_대여료 60,000원","느티나무 셀프BBQ_대여료 90,000원","느티나무 셀프BBQ_본삼겹 (단품,실내)",느티나무 셀프BBQ_스프라이트 (단체),느티나무 셀프BBQ_신라면,느티나무 셀프BBQ_쌈야채세트,...,화담숲주막_스프라이트,화담숲주막_참살이 막걸리,화담숲주막_찹쌀식혜,화담숲주막_콜라,화담숲주막_해물파전,화담숲카페_메밀미숫가루,화담숲카페_아메리카노 HOT,화담숲카페_아메리카노 ICE,화담숲카페_카페라떼 ICE,화담숲카페_현미뻥스크림
0,2023-01-01,0,0,9,2,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2023-01-02,0,0,2,4,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2023-01-03,0,0,2,3,0,2,10,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2023-01-04,0,0,3,6,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2023-01-05,0,19,6,2,0,0,8,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
pivot_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 532 entries, 0 to 531
Columns: 194 entries, 영업일자 to 화담숲카페_현미뻥스크림
dtypes: datetime64[ns](1), int64(193)
memory usage: 806.4 KB


In [None]:
pivot_df.to_csv('/content/drive/MyDrive/LG Aimers 7기/data/pivot_train.csv')
print('완료')

완료
