# Data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import seaborn as sns
sns.set_style('darkgrid')
sns.set(font='IPAexGothic')

import warnings
warnings.filterwarnings('ignore')

path = '/content/drive/My Drive/Signate/MUFG/Data/'

train = pd.read_csv(path + 'train.csv')
test = pd.read_csv(path + 'test.csv')

In [None]:
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)

# Preprocessing

## Drop Data

はじめの11個を落とす

In [None]:
train = train[11:]

カレー以外で100個売れる（異常値）として削除

In [None]:
train.drop(train[(train['y'] >= 100) & (train['name'].apply(lambda x : True if ('カレー' not in x) else False))].index, axis=0, inplace=True)

In [None]:
combined = train.append(test, ignore_index=True)

## Missing Value

In [None]:
# 欠損値補完
combined['remarks'] = combined['remarks'].fillna('なし')
combined['event'] = combined['event'].fillna('なし')
combined['payday'] = combined['payday'].fillna(0)

# Name Featuer Engineering

## 条件別

In [None]:
#  kcalが欠損かどうか
combined['kcal_nan'] = combined['kcal'].isnull()

# kcalのないカレー（人気）
combined['popular_curry'] = (combined['kcal'].isnull()) & (combined['name'].apply(lambda x : True if x.find('カレー') >=0 else False))

# kcalがあるカレー（不人気不人気）
# combined['unpopular_curry'] = (combined['kcal'].notnull()) & (combined['name'].apply(lambda x : True if x.find('カレー') >=0 else False))

# kcalがあるキーマ（キーマカレー）
combined['キーマ'] = (combined['kcal'].notnull()) & (combined['name'].apply(lambda x : True if x.find('キーマカレー') >=0 else False))

## 料理名別

In [None]:
combined['チキン'] = combined['name'].apply(lambda x : True if ('チキン' in x and 'カレー' not in x) else False)

combined['唐揚げ'] = combined['name'].apply(lambda x : True if ('唐揚' in x) else False)

combined['ハンバーグ'] =  combined['name'].apply(lambda x : True if ('ハンバーグ' in x) else False)

combined['メンチカツ'] =  combined['name'].apply(lambda x : True if ('メンチカツ' in x) else False)

combined['エビフライ'] =  combined['name'].apply(lambda x : True if ('エビフライ' in x or 'えびフライ' in x) else False)

combined['麻婆'] =  combined['name'].apply(lambda x : True if ('麻婆' in x or 'マーボ' in x) else False)

combined['牛丼風'] =  combined['name'].apply(lambda x : True if ('牛丼風' in x) else False)

combined['シチュー'] =  combined['name'].apply(lambda x : True if ('シチュー' in x) else False)

combined['酢豚'] =  combined['name'].apply(lambda x : True if ('酢豚' in x) else False)

combined['チンジャオロース'] =  combined['name'].apply(lambda x : True if ('チンジャオロース' in x or '青椒肉絲' in x) else False)

combined['親子'] =  combined['name'].apply(lambda x : True if ('親子' in x) else False)

combined['生姜'] =  combined['name'].apply(lambda x : True if ('生姜' in x) else False)

# testになし
combined['キムチ'] =  combined['name'].apply(lambda x : True if ('キムチ' in x) else False)

combined['ぶり照り'] =  combined['name'].apply(lambda x : True if ('ぶり' in x and '照り' in x) else False)

combined['トンカツ'] =  combined['name'].apply(lambda x : True if ('トンカツ' in x or 'とんかつ' in x) else False)

combined['白身魚'] =  combined['name'].apply(lambda x : True if ('白身魚' in x) else False)

combined['油淋鶏'] =  combined['name'].apply(lambda x : True if ('油淋鶏' in x) else False)

combined['炊き込みご飯'] =  combined['name'].apply(lambda x : True if ('炊き込みご飯' in x) else False)

combined['マスタード'] =  combined['name'].apply(lambda x : True if ('マスタード' in x) else False)

combined['肉じゃが'] =  combined['name'].apply(lambda x : True if ('肉じゃが' in x) else False)

combined['スタミナ'] =  combined['name'].apply(lambda x : True if ('スタミナ' in x) else False)

combined['柳川'] =  combined['name'].apply(lambda x : True if ('柳川' in x) else False)

combined['コロッケ'] =  combined['name'].apply(lambda x : True if ('コロッケ' in x) else False)

combined['エビカツ'] =  combined['name'].apply(lambda x : True if ('エビカツ' in x) else False)

combined['天ぷら'] =  combined['name'].apply(lambda x : True if ('天ぷら' in x) else False)

# testになし
combined['ビーフストロガノフ'] =  combined['name'].apply(lambda x : True if ('ビーフストロガノフ' in x) else False) # testになし

combined['肉野菜オイスター'] =  combined['name'].apply(lambda x : True if ('肉野菜オイスター' in x) else False)

combined['鶏照り'] =  combined['name'].apply(lambda x : True if ('鶏' in x and '照り' in x) else False)

combined['レモンペッパー焼き'] =  combined['name'].apply(lambda x : True if ('レモンペッパー焼き' in x) else False)

## カテゴリ別

In [None]:
combined['鶏'] =  combined['name'].apply(lambda x : True if ('鶏' in x or 'チキン' in x) else False)
combined['牛'] =  combined['name'].apply(lambda x : True if ('牛' in x or 'ビーフ' in x) else False)
combined['豚'] =  combined['name'].apply(lambda x : True if ('豚' in x or 'ポーク' in x or 'ロース' in x ) else False)
combined['魚'] =  combined['name'].apply(lambda x : True if ('魚' in x or '鮭' in x or 'サーモン' in x or 'ぶり' in x or 'アジ' in x or 'ホキ' in x or '海鮮' in x 
                                                    or 'サバ' in x or '鯖' in x or 'エビ' in x or 'えび' in x or '海老' in x or 'イカ' in x or 'いか' in x or 'かじき' in x) else False)
combined['野菜'] =  combined['name'].apply(lambda x : True if ('菜' in x or '大根' in x or 'キムチ' in x or 'ゴーヤ' in x) else False)

## 調理法別

In [None]:
combined['焼き'] =  combined['name'].apply(lambda x : True if ('焼' in x or '焼き' in x or 'やき' in x or 'ヤキ' in x) else False)
combined['煮'] =  combined['name'].apply(lambda x : True if ('煮' in x) else False)
combined['フライ'] =  combined['name'].apply(lambda x : True if ('フライ' in x) else False)
combined['炒め'] =  combined['name'].apply(lambda x : True if ('炒め' in x) else False)
combined['ソテー'] =  combined['name'].apply(lambda x : True if ('ソテー' in x) else False)
combined['漬け'] =  combined['name'].apply(lambda x : True if ('漬け' in x) else False)
# 白身魚とサーモン（いらないかも）
combined['ムニエル'] =  combined['name'].apply(lambda x : True if ('ムニエル' in x) else False)

## スタイル別

ユニークな値をここで拾う

In [None]:
# 和風
# 和、おでん、おろし、みりん、五目、西京
# combined['和風'] =  combined['name'].apply(lambda x : True if ('和' in x or 'おでん' in x or 'おろし' in x or 'みりん' in x or '味醂' in x or '五目' in x or '西京' in x or '筑前' in x) else False)

# 洋風
# 洋、ビーフストロガノフ、BBQ、マヨ、ガーリック、オリーブ、デミ(testになし)、ビュッフェ(testになし)、ミックスグリル(testになし)、
# combined['洋風'] =  combined['name'].apply(lambda x : True if ('洋' in x or 'ビーフストロガノフ' in x or 'BBQ' in x or 'マヨ' in x or 'ガーリック' in x 
                                                             or 'オリーブ' in x or 'デミ' in x or 'ビュッフェ' in x) else False)

# 中華、韓国、その他
# 中華、プルコギ、酢豚、チリソース、サーザイ、ガパオ、韓国
# combined['中華'] =  combined['name'].apply(lambda x : True if ('中華' in x or 'プルコギ' in x or '酢豚' in x or 'チリソース' in x or 'サーザイ' in x or 'ガパオ' in x or '韓国' in x ) else False)

## テストデータ用

In [None]:
combined['サバ'] =  combined['name'].apply(lambda x : True if ('サバ' in x or '鯖' in x) else False)
combined['エビ'] =  combined['name'].apply(lambda x : True if ('エビ' in x or 'えび' in x or '海老' in x) else False)
combined['イカ'] =  combined['name'].apply(lambda x : True if ('イカ' in x or 'いか' in x) else False)
combined['豆'] =  combined['name'].apply(lambda x : True if ('豆' in x) else False)
combined['ビビンバ'] =  combined['name'].apply(lambda x : True if ('ビビンバ' in x) else False)
combined['グリル'] =  combined['name'].apply(lambda x : True if ('グリル' in x) else False)
combined['すき焼き'] =  combined['name'].apply(lambda x : True if ('すき焼き' in x) else False)
combined['回鍋肉'] =  combined['name'].apply(lambda x : True if ('回鍋肉' in x) else False)
combined['挽肉'] =  combined['name'].apply(lambda x : True if ('挽肉' in x) else False)
combined['胡椒'] =  combined['name'].apply(lambda x : True if ('胡椒' in x) else False)
combined['トマト'] =  combined['name'].apply(lambda x : True if ('トマト' in x) else False)
combined['味噌'] =  combined['name'].apply(lambda x : True if ('味噌' in x or 'みそ' in x) else False)
combined['胡麻'] =  combined['name'].apply(lambda x : True if ('胡麻' in x) else False)
combined['タルタル'] =  combined['name'].apply(lambda x : True if ('タルタル' in x) else False)
combined['おろし'] =  combined['name'].apply(lambda x : True if ('おろし' in x) else False)
# trainになし
# combined['みりん'] =  combined['name'].apply(lambda x : True if ('みりん' in x or '味醂' in x) else False)


In [None]:
combined.columns.values

array(['dateid', 'y', 'week', 'soldout', 'name', 'kcal', 'remarks',
       'event', 'payday', 'weather', 'precipitation', 'temperature',
       'kcal_nan', 'popular_curry', 'unpopular_curry', 'チキン', '唐揚げ',
       'ハンバーグ', 'メンチカツ', 'エビフライ', '麻婆', '牛丼風', 'シチュー', '酢豚', 'チンジャオロース',
       '親子', '生姜', 'キムチ', 'ぶり照り', 'トンカツ', '白身魚', '油淋鶏', '炊き込みご飯', 'マスタード',
       '肉じゃが', 'スタミナ', '柳川', 'コロッケ', 'エビカツ', '天ぷら', 'ビーフストロガノフ',
       '肉野菜オイスター', '鶏照り', 'レモンペッパー焼き', '鶏', '牛', '豚', '魚', '野菜', '焼き',
       '煮', 'フライ', '炒め', 'ソテー', '漬け', 'ムニエル', '和風', '洋風', '中華', 'サバ',
       'エビ', 'イカ', '豆', 'ビビンバ', 'グリル', 'すき焼き', '回鍋肉', '挽肉', '胡椒', 'トマト',
       '味噌', '胡麻', 'タルタル', 'おろし'], dtype=object)

# Featuer Engineering

## month

In [None]:
# combined['dateid'] = pd.to_datetime(combined['dateid'])
# combined['month'] = combined['dateid'].dt.month

## Delete ununique values

In [None]:
delimit_num =  train.shape[0]
train = combined.loc[:delimit_num-1, :]
test = combined.loc[delimit_num:, :].drop(['y'], axis=1)
test_confirmation = test

In [None]:
delete_unique_col = [
    'name',
    'remarks',
    'event',
    'weather'
]

In [None]:
def delete_unique_values(train, test, delete_unique_col):
    for col in delete_unique_col:
        train_name_unique = set(train[col].unique())
        test_name_unique = set(test[col].unique())

        name_wa =  train_name_unique |  test_name_unique
        name_seki = train_name_unique & test_name_unique
        delete_name = name_wa - name_seki

        train = train.replace(delete_name,  'あり')
        test = test.replace(delete_name,  'あり')

        # train = train.replace(delete_name, np.nan)
        # test = test.replace(delete_name,  np.nan)


    return train, test

In [None]:
train, test = delete_unique_values(train, test, delete_unique_col)

## kcal

特定の値で補間

In [None]:
# train.loc[train['kcal'] < 400, 'kcal'] = 400
# test.loc[test['kcal'] > 500, 'kcal'] = 500 

# 最終確認

In [None]:
combined = train.append(test, ignore_index=True)

In [None]:
combined.drop(['dateid'], axis=1, inplace=True)

In [None]:
combined.replace(True, 1.0, inplace=True)
combined.replace(False, 0.0, inplace=True)

In [None]:
combined.columns.values

array(['y', 'week', 'soldout', 'name', 'kcal', 'remarks', 'event',
       'payday', 'weather', 'precipitation', 'temperature', 'kcal_nan',
       'popular_curry', 'unpopular_curry', 'チキン', '唐揚げ', 'ハンバーグ', 'メンチカツ',
       'エビフライ', '麻婆', '牛丼風', 'シチュー', '酢豚', 'チンジャオロース', '親子', '生姜', 'キムチ',
       'ぶり照り', 'トンカツ', '白身魚', '油淋鶏', '炊き込みご飯', 'マスタード', '肉じゃが', 'スタミナ',
       '柳川', 'コロッケ', 'エビカツ', '天ぷら', 'ビーフストロガノフ', '肉野菜オイスター', '鶏照り',
       'レモンペッパー焼き', '鶏', '牛', '豚', '魚', '野菜', '焼き', '煮', 'フライ', '炒め',
       'ソテー', '漬け', 'ムニエル', '和風', '洋風', '中華', 'サバ', 'エビ', 'イカ', '豆',
       'ビビンバ', 'グリル', 'すき焼き', '回鍋肉', '挽肉', '胡椒', 'トマト', '味噌', '胡麻',
       'タルタル', 'おろし'], dtype=object)

# PyCaret

In [None]:
! pip install pycaret

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
delimit_num =  train.shape[0]
X = combined.loc[:delimit_num-1, :]
test = combined.loc[delimit_num:, :].drop(['y'], axis=1)

In [None]:
print("X   :", X.shape)
print("test:", test.shape)

X   : (321, 73)
test: (144, 72)


In [None]:
X.columns.values

array(['y', 'week', 'soldout', 'name', 'kcal', 'remarks', 'event',
       'payday', 'weather', 'precipitation', 'temperature', 'kcal_nan',
       'popular_curry', 'unpopular_curry', 'チキン', '唐揚げ', 'ハンバーグ', 'メンチカツ',
       'エビフライ', '麻婆', '牛丼風', 'シチュー', '酢豚', 'チンジャオロース', '親子', '生姜', 'キムチ',
       'ぶり照り', 'トンカツ', '白身魚', '油淋鶏', '炊き込みご飯', 'マスタード', '肉じゃが', 'スタミナ',
       '柳川', 'コロッケ', 'エビカツ', '天ぷら', 'ビーフストロガノフ', '肉野菜オイスター', '鶏照り',
       'レモンペッパー焼き', '鶏', '牛', '豚', '魚', '野菜', '焼き', '煮', 'フライ', '炒め',
       'ソテー', '漬け', 'ムニエル', '和風', '洋風', '中華', 'サバ', 'エビ', 'イカ', '豆',
       'ビビンバ', 'グリル', 'すき焼き', '回鍋肉', '挽肉', '胡椒', 'トマト', '味噌', '胡麻',
       'タルタル', 'おろし'], dtype=object)

## feaure

In [None]:
# exp1

categorical_features = [
    'week', 'soldout', 'name', 'remarks', 'event','payday', 'weather',

    'kcal_nan', 'popular_curry', 'キーマ', 
    'チキン', '唐揚げ', 'ハンバーグ', 'メンチカツ',
    'エビフライ', '麻婆', '牛丼風', 'シチュー', '酢豚', 'チンジャオロース', '親子', '生姜', 'ぶり照り',
    'トンカツ', '白身魚', '油淋鶏', '炊き込みご飯', 'マスタード', '肉じゃが', 'スタミナ', '柳川',
    'コロッケ', 'エビカツ', '天ぷら', '肉野菜オイスター', '鶏照り', 'レモンペッパー焼き', '鶏', '牛',
    '豚', '魚', '野菜', '焼き', '煮', 'フライ', '炒め', 'ソテー', '漬け', 'ムニエル', 
    #  '和風','洋風', '中華', 
    'サバ', 'エビ', 'イカ', '豆', 'ビビンバ', 'グリル', 'すき焼き', '回鍋肉',
    '挽肉', '胡椒', 'トマト', '味噌', '胡麻', 'タルタル', 'おろし'
    #'month'
    ]

numeric_features = [
    
   'kcal','precipitation', 'temperature',
   
    ]

## setup

In [None]:
from pycaret.regression import *

exp = setup(
        data=X,
        target='y',
        session_id=42,
        normalize = True,
        train_size = 0.999, # trainとtestで分割させない
        categorical_features = categorical_features, 
        numeric_features = numeric_features,
        )

Unnamed: 0,Description,Value
0,session_id,42
1,Target,y
2,Original Data,"(321, 73)"
3,Missing Values,True
4,Numeric Features,3
5,Categorical Features,69
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(320, 143)"


INFO:logs:create_model_container: 0
INFO:logs:master_model_container: 0
INFO:logs:display_container: 1
INFO:logs:Pipeline(memory=None,
         steps=[('dtypes',
                 DataTypes_Auto_infer(categorical_features=['week', 'soldout',
                                                            'name', 'remarks',
                                                            'event', 'payday',
                                                            'weather',
                                                            'kcal_nan',
                                                            'popular_curry',
                                                            'unpopular_curry',
                                                            'チキン', '唐揚げ',
                                                            'ハンバーグ', 'メンチカツ',
                                                            'エビフライ', '麻婆',
                                                            '牛丼風', 'シチュー', 

## create_model

In [None]:
model = create_model('rf', fold = 5)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,8.9542,124.7189,11.1678,0.7413,0.1821,0.1512
1,9.0011,124.1566,11.1426,0.6435,0.198,0.1627
2,7.768,99.6614,9.9831,0.72,0.1733,0.1433
3,7.9227,121.8013,11.0364,0.5933,0.174,0.1283
4,10.4328,161.2633,12.6989,0.4951,0.2157,0.1868
Mean,8.8158,126.3203,11.2057,0.6386,0.1886,0.1545
Std,0.9554,19.7958,0.867,0.0893,0.0162,0.0196


INFO:logs:create_model_container: 1
INFO:logs:master_model_container: 1
INFO:logs:display_container: 2
INFO:logs:RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=-1, oob_score=False,
                      random_state=42, verbose=0, warm_start=False)
INFO:logs:create_model() succesfully completed......................................


## tune_model

In [None]:
tuned_model = tune_model(model, fold=5, n_iter=100)

## evaluate_model

In [None]:
evaluate_model(tuned_model)

## finalize_model

In [None]:
final_model = finalize_model(tuned_model)

## predict_model

### final_model

In [None]:
pred_esb = predict_model(final_model, test)

In [None]:
pred_esb['Label']

# 提出前確認

In [None]:
X['y'].plot(figsize=(15,4))

In [None]:
pred_esb['Label'].plot(figsize=(15,4))

In [None]:
test_confirmation['y'] = pred_esb['Label']

In [None]:
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)

In [None]:
test_confirmation.loc[:,['y','week','name']]

# Submit

In [None]:
とまる

In [None]:
submit = pd.read_csv(path + 'submit_sample.csv', header=None)
submit.iloc[:,[1]] = pred_esb['Label'].values

In [None]:
# Google Drive・Google Colaboratoryで作業する場合
submit.to_csv('AutoML_final_exp2_9.csv', header=None, index=None)

from google.colab import files
files.download('AutoML_final_exp2_9.csv')