In [1]:
import pandas as pd
import numpy as np
import math
from multiprocessing import Pool
import datetime
import pickle

In [2]:
def feature_generator(face_data):
    face_data_feature = (face_data[['Name','Date']]).copy()
    face_data_feature['weekdiff'] = face_data.iloc[:,2:].fillna(3).diff(axis =1).fillna(0).abs().sum(axis=1)
    face_data_feature['weekmean'] = face_data.iloc[:,2:].mean(axis=1,skipna=True)
    face_data_feature['weekstd'] = face_data.iloc[:,2:].std(axis=1,skipna=True)+0.01
    face_data_feature['weekmaxmin'] = face_data.iloc[:,2:].max(axis=1,skipna=True)-face_data.iloc[:,2:].min(axis=1,skipna=True)
    face_data_feature['weeknegative'] = face_data.iloc[:,2:].isin([1,2]).sum(axis=1)
    face_data_feature['weekneutral'] = face_data.iloc[:,2:].isin([3]).sum(axis=1)
    face_data_feature['weekpositive'] = face_data.iloc[:,2:].isin([4]).sum(axis=1)
    face_data_feature['posminusneg'] = face_data_feature.weekpositive-face_data_feature.weeknegative
    return face_data_feature

In [3]:
def data_processor(face_data, range_index):
    face_data = face_data.drop(['ElderID'], axis=1)
    face_data_week = face_data.copy()
    max_index = np.int(face_data_week.columns.str.encode('utf-8')[-1])+1
    for index in range(max_index):
        face_data_week[str(index)] = np.where(face_data_week[str(index)].apply(lambda x : math.isnan(x)),1,0)
    # determine first or second week
    face_data['first_week'] = np.where((face_data_week.iloc[:,2:(range_index+2)]).sum(axis=1)<=np.floor(range_index/2),1,0)
    # one week
    face_data_first = face_data.loc[(face_data.first_week==1),:].iloc[:,0:(range_index+2)]
    face_data_first = face_data_first.reset_index(drop=True).rename(columns={'ElderName':'Name','Surgery_date':'Date'})
    # generate feature
    face_data_feature = feature_generator(face_data_first)
    face_data_feature = face_data_feature.sort_values(['Name','Date'], ascending=[1,1])
    # dtype some column
    face_data_feature.Date = [str(x) for x in face_data_feature.Date]
    face_data_feature.Name = face_data_feature.Name.str.encode('utf-8')
    return face_data_feature

In [4]:
def model_forecastor(path, test_data):
    # Load ML model
    lm_model_name = path+'lm_model.pkl'
    rf_model_name = path+'rf_model.pkl'
    with open(lm_model_name, 'rb') as file:  
        lm_model = pickle.load(file)
    with open(rf_model_name, 'rb') as file:  
        rf_model = pickle.load(file)
    # Generate test data
    test_data_final = test_data.drop(['Name','Date'],axis=1)
    # LM Forecast
    test_lm_forecast = lm_model.predict(test_data_final)
    test_lm_forecast = np.floor(np.where(test_lm_forecast<0,0,test_lm_forecast))
    # RF Forecast
    test_rf_forecast = rf_model.predict(test_data_final)
    test_rf_forecast = np.floor(np.where(test_rf_forecast<0,0,test_rf_forecast))
    test_emb_forecast = 0.5*test_lm_forecast + 0.5*test_rf_forecast
    test_data_forecast = test_data.copy()[['Name','Date']]
    test_data_forecast['forecast'] = test_emb_forecast
    return test_data_forecast

In [8]:
# NPI forecast main process
#-- Load data frame
face_data = pd.read_csv('/KR7B_shared/zack.li/NPI_predict/New_sample_data/Face_label_result.csv', encoding='utf-8')
#-- Preprocess data frame
face_data_feature = data_processor(face_data,7)
#-- Forecast data frame
test_final_forecast = model_forecastor('/KR7B_shared/zack.li/NPI_predict/Exhibition_sample_data/',face_data_feature)
test_final_forecast

Unnamed: 0,Name,Date,forecast
0,劉黃招,2019-04-28,4.0
1,劉黃招,2019-07-28,0.0
2,劉黃招,2019-10-28,5.0
3,吳江票,2019-08-27,3.0
4,吳江票,2019-11-27,1.0
5,周春枝,2019-01-15,5.0
6,周春枝,2019-07-16,0.0
7,姚忠順,2019-09-12,0.0
8,張岳軍,2019-03-17,0.0
9,曹陳金鳳,2019-07-01,7.5
