In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
from sklearn.metrics import mean_squared_error
import lightgbm

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/google-apple-facebook-stock-price/AAPL.csv
/kaggle/input/google-apple-facebook-stock-price/GOOG.csv
/kaggle/input/google-apple-facebook-stock-price/META.csv


In [2]:
apple = pd.read_csv('../input/google-apple-facebook-stock-price/AAPL.csv')
google = pd.read_csv('../input/google-apple-facebook-stock-price/GOOG.csv')
meta = pd.read_csv('../input/google-apple-facebook-stock-price/META.csv')

apple = apple[(apple['Date']>'2021-01-01')&(apple['Date']<='2021-12-31')][['Date','Adj Close']]
apple.columns = ['Date', 'Apple']
google = google[(google['Date']>'2021-01-01')&(google['Date']<='2021-12-31')][['Date','Adj Close']]
google.columns = ['Date', 'Google']
meta = meta[(meta['Date']>'2021-01-01')&(meta['Date']<='2021-12-31')][['Date','Adj Close']]
meta.columns = ['Date', 'Meta']

merge = pd.merge(apple, google, how='outer', on='Date')
merge = pd.merge(merge, meta, how='outer', on='Date')

tmp = merge.copy()
tmp.set_index('Date', inplace=True)
tmp=tmp.stack()

tmp_df = pd.DataFrame(tmp)
tmp_df.reset_index(inplace=True)
tmp_df.columns = ['Date', 'Name', 'Price']
tmp_df = tmp_df.set_index(['Date', 'Name'])

In [3]:
df = tmp_df.copy()

df = df.unstack().shift(22)
df = df.stack(dropna=False)
tmp_df['Shift22'] = df['Price']

df = df.unstack().rolling(22).mean()
df = df.stack(dropna=False)
tmp_df['MA22'] = df['Price']

tmp_df.dropna(inplace=True)
tmp_df.reset_index(inplace=True)
tmp_df

Unnamed: 0,Date,Name,Price,Shift22,MA22
0,2021-03-08,Apple,115.342575,132.570770,131.854972
1,2021-03-08,Google,101.208504,103.503502,91.495955
2,2021-03-08,Meta,255.309998,266.649994,264.696363
3,2021-03-09,Apple,120.031227,135.985489,132.213991
4,2021-03-09,Google,102.635002,103.118500,92.255341
...,...,...,...,...,...
622,2021-12-30,Google,146.002502,146.113998,147.637750
623,2021-12-30,Meta,344.359985,338.029999,335.598182
624,2021-12-31,Apple,176.838257,164.618805,153.368775
625,2021-12-31,Google,144.679504,142.451996,147.470614


### グループごとにモデルを評価

In [4]:
def model_evaluation(data, name_list, model):
    
    mape_ls, rmse_ls, pred_ls = [], [], []
    
    for name in name_list:
        # 銘柄指定
        df = data[data['Name']==name]
        # test/train split
        train_df, test_df = df[:-pred_len], df[-pred_len:]
        # X/y split
        X = train_df.drop(['Date', 'Name', 'Price'], axis=1)
        y = train_df['Price']
        X_test = test_df.drop(['Date', 'Name', 'Price'], axis=1)
        y_test = test_df['Price']
        # model fit
        model.fit(X, y)
        y_pred = model.predict(X_test)
        # put result into pred_df
        pred_df = y_test.copy()
        pred_df['Price'] = y_pred
        # for output easier
        pred_ls.append(pred_df)
        # validation
        mape = np.mean(np.abs( (y_pred-y_test)/y_test ))*100
        rmse = np.sqrt( mean_squared_error(y_pred, y_test))
        mape_ls.append(mape)
        rmse_ls.append(rmse)
        # print every training result
        print('MAPE:',mape, 'RMSE:',rmse)
    # print average of all results
    print('Ave MAPE:',np.mean(mape_ls), 'Ave RMSE:', np.mean(rmse_ls))
    return pred_ls, mape_ls, rmse_ls

In [5]:
data = tmp_df
pred_len = 22
name_list = tmp_df['Name'].unique()
model = lightgbm.LGBMRegressor()

pred_list = model_evaluation(data, name_list, model)

MAPE: 16.09745686399475 RMSE: 29.600330450238786
MAPE: 3.902565774323355 RMSE: 6.122525114965094
MAPE: 6.394826434010242 RMSE: 23.424275256614237
Ave MAPE: 8.79828302410945 Ave RMSE: 19.715710273939372


### 全体的にモデルを評価

In [6]:
def model_evaluation_once(data, model):
      
    train_df, test_df = data[:-pred_len*3], data[-pred_len*3:]
    # test/train split
    X = train_df.drop(['Date', 'Name', 'Price'], axis=1)
    y = train_df['Price']
    X_test = test_df.drop(['Date', 'Name', 'Price'], axis=1)
    y_test = test_df['Price']
    # model fit
    model.fit(X, y)
    y_pred = model.predict(X_test)
    # put result into pred_df
    pred_df = y_test.copy()
    pred_df['Price'] = y_pred
    # validation
    mape = np.mean(np.abs( (y_pred-y_test)/y_test ))*100
    rmse = np.sqrt( mean_squared_error(y_pred, y_test))
    # print every training result
    print('MAPE:',mape, 'RMSE:',rmse)
        
    return pred_df

In [7]:
data = tmp_df
pred_len = 22
model = lightgbm.LGBMRegressor()

pred_df = model_evaluation_once(data, model)

MAPE: 9.871512687477578 RMSE: 26.153705562087776
