<a href="https://colab.research.google.com/github/daisuke08253649/DeepLearning/blob/main/Lunch_box_demand_forecast.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

train_df = pd.read_csv('./drive/MyDrive/DeepLearning/SIGNATE/Lunch_box_demand_forecast/train_3.csv')
print(train_df.info())
print(train_df.describe())
print(train_df.isnull().sum())

In [None]:
train_df['y'].plot.hist(title='sales of box lunch')
plt.xlabel('time step')
plt.ylabel('sales')

plt.axvline(x=train_df['y'].mean(), color='red')
plt.show()

In [None]:
train_df.plot.scatter(x='temperature', y='y', c='blue', title='scatter plot of temperature and sales')

plt.xlabel('temperature')
plt.ylabel('sales')
plt.show()

In [None]:
train_df.plot.scatter(x='kcal', y='y', c='blue', title='scatter plot of kcal and sales')

plt.xlabel('kcal')
plt.ylabel('sales')
plt.show()

In [None]:
print(train_df[['y', 'temperature', 'kcal']].corr())

In [None]:
sns.boxplot(x='week', y='y', data=train_df, order = ['月', '火', '水', '木', '金'])

plt.title('sales of each week')
plt.ylabel('sales')
plt.show()

In [None]:
plt.figure(figsize=(10,6))
sns.lineplot(x=train_df.index, y=train_df.y, hue=train_df.week, data=train_df)

plt.xlabel('time step')
plt.ylabel('sales')
plt.title('sales of box lunch')
plt.show()

In [None]:
train_df['remarks'].fillna('特記なし', inplace=True)
train_df['event'].fillna('イベントなし', inplace=True)
train_df['payday'].fillna('0', inplace=True)

In [None]:
sns.boxplot(x='remarks', y='y', data=train_df)

plt.title('sales of each remarks content')
plt.xlabel('sales')
plt.show()

In [None]:
sns.boxplot(x='event', y='y', data=train_df)

plt.title('sales of each event')
plt.xlabel('sales')
plt.show()

In [None]:
sns.boxplot(x='payday', y='y', data=train_df)

plt.title('sales of payday or not')
plt.xlabel('payday')
plt.show()

In [None]:
sns.boxplot(x='weather', y='y', data=train_df)

plt.title('sales of each wather category')
plt.xlabel('sales')
plt.show()

In [None]:
#train_df['flag'] = 1
#test_df['flag'] = 0

#data = pd.concat([train_df, test_df])

#data = data.reset_index(drop=True)
#print(data.head())

In [None]:
train_df['weather'] = train_df['weather'].apply(lambda x: '雨' if x == '雪' or x == '雷電' else x)
train_df['remarks'] = train_df.apply(lambda x: 1 if x['remarks'] == 'お楽しみメニュー' and 'カレー' in x['name'] else 0, axis=1)

In [None]:
train_df['year'] = train_df['datetime'].apply(lambda x: x.split('-')[0])
train_df['month'] = train_df['datetime'].apply(lambda x: x.split('-')[1])

train_df['year'] = train_df['year'].astype(int)
train_df['month'] = train_df['month'].astype(int)

In [None]:
data = train_df.drop(columns = ['datetime', 'week', 'soldout', 'name', 'kcal', 'event', 'payday', 'precipitation', 'year'])
print(data.head())

In [None]:
data = pd.get_dummies(data)
print(data.head(10))

In [None]:
x_data = data.drop(columns=['y'])
print(x_data.head())

y_data = data['y']
print(y_data.head())

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error


train_X, test_X, train_y, test_y = train_test_split(x_data, y_data)
print(train_X.shape)
print(test_X.shape)

lm = LinearRegression()

def RMSE(var1, var2):
    mse = mean_squared_error(var1, var2)
    return np.sqrt(mse)

In [None]:
features = ['remarks', 'temperature', 'weather_快晴', 'weather_晴れ', 'weather_曇', 'weather_雨']

#train = data[data['flag'] == 1]
#test = data[data['flag'] == 0]

train_X = train_X[features]
test_X = test_X[features]

print(train_X.shape)
print(test_X.shape)

print(train_y.shape)
print(test_y.shape)

In [None]:
lm.fit(train_X, train_y)
print(pd.DataFrame(lm.coef_, index=features, columns=['回帰係数']))

pred1 = lm.predict(test_X)
var = RMSE(test_y, pred1)
print(var)
print(pred1)

In [None]:
plt.plot(test_y.values, label='actual')
plt.plot(pred1, label='forecast')

plt.title('sales of box lunch')
plt.xlabel('time step')
plt.ylabel('sales')
plt.legend()
plt.show()

In [None]:
test_df = pd.read_csv('./drive/MyDrive/DeepLearning/SIGNATE/Lunch_box_demand_forecast/test_2.csv')
print(len(test_df))
print(test_df.info())
print(test_df.describe())
print(test_df.isnull().sum())

In [None]:
test_df['weather'] = test_df['weather'].apply(lambda x: '雨' if x == '雪' or x == '雷電' else x)
test_df['weather'] = test_df['weather'].apply(lambda x: '曇' if x == '薄曇' else x)
test_df['remarks'] = test_df.apply(lambda x: 1 if x['remarks'] == 'お楽しみメニュー' and 'カレー' in x['name'] else 0, axis=1)

test_df['year'] = test_df['datetime'].apply(lambda x: x.split('-')[0])
test_df['month'] = test_df['datetime'].apply(lambda x: x.split('-')[1])

test_df['year'] = test_df['year'].astype(int)
test_df['month'] = test_df['month'].astype(int)

In [None]:
test_data = test_df.drop(columns = ['datetime', 'week', 'soldout', 'name', 'kcal', 'event', 'payday', 'precipitation', 'year', 'month'])
print(test_data)

In [None]:
test_data = pd.get_dummies(test_data)
print(test_data.head(10))

In [None]:
sample = pd.read_csv('./drive/MyDrive/DeepLearning/SIGNATE/Lunch_box_demand_forecast/sample.csv')
print(len(sample))

In [None]:
if len(test_df) > len(sample):
    test_df.drop(test_df.index[-1], inplace=True)

print(len(test_df))

In [None]:
test_pred = lm.predict(test_data)
print(len(test_pred))

In [None]:
sample.to_csv('./drive/MyDrive/DeepLearning/SIGNATE/Lunch_box_demand_forecast/sample.csv', index=False)