In [1]:
import pandas as pd 
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np 
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns 
from xgboost import XGBRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score
import xgboost as xgb
from datetime import date
from statsmodels.stats.weightstats import ttest_ind
from scipy.stats import skew
from sklearn.model_selection import cross_validate
from sklearn.model_selection import ShuffleSplit
from scipy import stats

In [2]:
def model_perform(X_train,y_train, X_test, y_test, model, name, verbose = 0 ):
    # train 
    model.fit(X_train, y_train)

    # test 
    y_pred_test = model.predict(X_test)
    y_pred_train = model.predict(X_train)


    if verbose == 1 :
        print(' MAE {} '.format(mean_absolute_error(y_pred_test,y_test)))
        print(' MSE {} '.format(mean_squared_error(y_pred_test,y_test)))
        print(' R2 {} '.format(r2_score(y_train,y_pred_train)))
    else : 
        pass 
    
    result = {}
    result['mae'] = np.round(mean_absolute_error(y_pred_test,y_test),5)
    result['mse'] = np.round(mean_squared_error(y_pred_test,y_test),5)
    result['R2'] = np.round(r2_score(y_train,y_pred_train),5)
    result['name'] = name
    result['feature_size'] = X_train.shape[1]
    result['train_size'] = X_train.shape[0]
    result['test_size'] = X_test.shape[0]
    result['pearson'] = np.round(stats.pearsonr(y_pred_test, y_test)[0],5)

    return model, y_pred_test, y_pred_train, result 

def show_error_pattern(y_pred, y_test):
    result_test = pd.DataFrame()
    result_test['score'] = y_pred
    result_test['type'] = 'predict'
    result_test['idx'] = np.arange(result_test.shape[0])

    result_test2 = pd.DataFrame()
    result_test2['score'] = np.squeeze(y_test)
    result_test2['type'] = 'test'
    result_test2['idx'] = np.arange(result_test2.shape[0])

    df_result = pd.concat([result_test2,result_test])

    sns.lineplot(data=df_result, x="idx", y='score', hue="type")


def bulk_train(df_input, drop_column, target_column, dataset_name,  rf_param, xgb_param, svr_param, verbose = 0,):

    error_report = []
    model_dict = {}


    # remove 0 rating
    df_input = df_input[df_input['rating']>0]

    # scaled the values 
    features_columns = df_input.drop(drop_column,axis=1).columns
    features_columns = list(features_columns) + [target_column]
    scaler = MinMaxScaler()
    df_input_scale = pd.DataFrame(scaler.fit_transform(df_input[features_columns]), columns = features_columns)
    


    # split train and test 
    X_train, X_test, y_train, y_test = train_test_split(df_input_scale.drop(target_column, axis=1), df_input_scale[target_column], test_size=0.12, random_state=42)

    if rf_param : 
        regr = RandomForestRegressor(**rf_param)
    else :
        regr = RandomForestRegressor(random_state=0)
    model_regr, y_pred_test_regr, y_pred_train_regr, result_rf   = model_perform(X_train, y_train, X_test, y_test, regr, name='rf', verbose = verbose )
    error_report.append(result_rf)

    if xgb_param:
        xgbr = XGBRegressor(**xgb_param)
    else :
        xgbr = XGBRegressor(random_state=0)

    model_xgbr, y_pred_test_xgbr, y_pred_train_xgbr, result_xgb  = model_perform(X_train, y_train, X_test, y_test, xgbr, name='xgb', verbose = verbose)
    error_report.append(result_xgb)

    # SVR 
    if svr_param:
        svr = SVR(**svr_param)
    else :
        svr = SVR(kernel='poly')
        
    model_svr, y_pred_test_svr, y_pred_train_svr, result_svr  = model_perform(X_train, y_train, X_test, y_test, svr, name='svr',verbose = verbose)
    error_report.append(result_svr)

    model_dict['rf'] = model_regr
    model_dict['xgbr'] = result_xgb
    model_dict['svr'] = result_svr
    
    df_report = pd.DataFrame(error_report)
    df_report['dataset'] = dataset_name
    return  model_dict, df_report


def bulk_train_k_fold(df_input, drop_column, target_column, dataset_name, rf_param, xgb_param, svr_param, verbose = 0):

        
    # remove 0 rating
    df_input = df_input[df_input['rating'] > 0]

    # scaled the values 
    features_columns = df_input.drop(drop_column,axis=1).columns
    features_columns = list(features_columns) + [target_column]
    scaler = MinMaxScaler()
    df_input_scale = pd.DataFrame(scaler.fit_transform(df_input[features_columns]), columns = features_columns)

    # split train and test 
    X_train, X_test, y_train, y_test = train_test_split(df_input_scale.drop(target_column, axis=1), df_input_scale[target_column], test_size=0.12, random_state=42)

    # cross validation 
    scoring = ['neg_mean_absolute_error','neg_mean_squared_error','r2']

    if rf_param:
        regr = RandomForestRegressor(**rf_param)
    else :
        regr = RandomForestRegressor(random_state=0)
        
    result_rf = cross_validation(regr, X_train, y_train, scoring = scoring, cv =5 )
    df_rf = pd.DataFrame(result_rf)
    df_rf['name'] = 'rf'

    if xgb_param:
        xgbr = XGBRegressor(**xgb_param)
    else :
        xgbr = XGBRegressor(random_state=0)

    result_xgb = cross_validation(xgbr, X_train, y_train, scoring = scoring, cv =5 )
    df_xgb = pd.DataFrame(result_xgb)
    df_xgb['name'] = 'xgb'


    if svr_param:
        svr = SVR(**svr_param)
    else :
        svr = SVR(kernel='poly')

    result_svr = cross_validation(svr, X_train, y_train, scoring = scoring, cv =5 )
    df_svr = pd.DataFrame(result_svr)
    df_svr['name'] = 'svr'

    df_result = pd.concat([df_rf,df_xgb,df_svr])
    df_result['dataset'] = dataset_name

    # Singe train and test 
    single_result = []

    # random forest
    tmp_result = {} 
    regr.fit(X_train,y_train)
    y_pred = regr.predict(X_test)
    tmp_result['mae'] = mean_absolute_error(y_pred, y_test)
    tmp_result['mse'] = mean_squared_error(y_pred, y_test)
    tmp_result['pearson'] = stats.pearsonr(y_pred, y_test)[0]
    tmp_result['name'] = 'rf'
    tmp_result['dataset'] = dataset_name
    tmp_result['feature_size'] = len(X_train.columns)
    tmp_result['train_size'] = X_train.shape[0]
    tmp_result['test_size'] = X_test.shape[0]
    single_result.append(tmp_result)

    # random forest
    tmp_result = {} 
    xgbr.fit(X_train,y_train)
    y_pred = xgbr.predict(X_test)
    tmp_result['mae'] = mean_absolute_error(y_pred, y_test)
    tmp_result['mse'] = mean_squared_error(y_pred, y_test)
    tmp_result['pearson'] = stats.pearsonr(y_pred, y_test)[0]
    tmp_result['name'] = 'xgb'
    tmp_result['dataset'] = dataset_name
    tmp_result['feature_size'] = len(X_train.columns)
    tmp_result['train_size'] = X_train.shape[0]
    tmp_result['test_size'] = X_test.shape[0]
    single_result.append(tmp_result)

    #svr 
    tmp_result = {} 
    svr.fit(X_train,y_train)
    y_pred = svr.predict(X_test)
    tmp_result['mae'] = mean_absolute_error(y_pred, y_test)
    tmp_result['mse'] = mean_squared_error(y_pred, y_test)
    tmp_result['pearson'] = stats.pearsonr(y_pred, y_test)[0]
    tmp_result['name'] = 'svr'
    tmp_result['dataset'] = dataset_name
    tmp_result['feature_size'] = len(X_train.columns)
    tmp_result['train_size'] = X_train.shape[0]
    tmp_result['test_size'] = X_test.shape[0]
    single_result.append(tmp_result)

    df_result_singel = pd.DataFrame(single_result)


    return df_result, df_result_singel


def cross_validation(model, X, y, scoring, cv=5):

    results = cross_validate(estimator=model,
                               X=X,
                               y=y,
                               cv=cv,
                               scoring=scoring,
                               return_train_score=True)

    return results


# Zomato

In [3]:
# zomato only
df_zom_train = pd.read_csv('data/3_clean_zomato_feat_ori_train.csv')
df_zom_train['rest_price_idr'] = df_zom_train['rest_price_idr'].astype(int)
df_zom_train = df_zom_train[df_zom_train['rating']>0]


df_zom_test = pd.read_csv('data/3_clean_zomato_feat_ori_test.csv')
df_zom_test['rest_price_idr'] = df_zom_test['rest_price_idr'].astype(int)
df_zom_test = df_zom_test[df_zom_test['rating']>0]

# df_zom = pd.read_csv('data/3_jbdk_clean_zomato_feat.csv')

# ori 
drop_column = ['url','index','rating','lat','long','review']
target_column = 'rating'

# cross validation 
rf_param = {'n_estimators':500,'max_features':3, 'max_depth':30,'bootstrap':True}
xgb_param = {'n_estimators':100, 'max_depth':5, 'min_child_weight':3, 'learning_rate':0.15, 'booster':'gbtree'}
svr_param  = {'kernel':'poly','gamma':0.1,'C':0.1}


# scaled the values 
features_columns = df_zom_train.drop(drop_column,axis=1).columns
features_columns = list(features_columns) + [target_column]

scaler_train = MinMaxScaler()
scaler_test = MinMaxScaler()

df_input_train_scale = pd.DataFrame(scaler_train.fit_transform(df_zom_train[features_columns]), columns = features_columns)
df_input_test_scale = pd.DataFrame(scaler_test.fit_transform(df_zom_test[features_columns]), columns = features_columns)

# Prediction 

result_zomato = []

# Random Forest 
regr = RandomForestRegressor(**rf_param)
model_regr, y_pred_test_regr, y_pred_train_regr, result_regr = model_perform(df_input_train_scale.drop(target_column,axis=1),  df_input_train_scale[target_column],  df_input_test_scale.drop(target_column,axis=1),  df_input_test_scale[target_column],  regr, name = 'rf', verbose = 0 )
result_zomato.append(result_regr)

# xgb 
xgbr = XGBRegressor(**xgb_param)
model_xgb, y_pred_test_xgb, y_pred_train_xgb, result_xgb = model_perform(df_input_train_scale.drop(target_column,axis=1), df_input_train_scale[target_column], df_input_test_scale.drop(target_column,axis=1), df_input_test_scale[target_column], xgbr, name = 'xgb', verbose = 0 )
result_zomato.append(result_xgb)

# svr 
svr = SVR(**svr_param)
model_svr, y_pred_test_svr, y_pred_train_svr, result_svr = model_perform(df_input_train_scale.drop(target_column, axis=1), df_input_train_scale[target_column], df_input_test_scale.drop(target_column,axis=1), df_input_test_scale[target_column], svr, name = 'svr', verbose = 0 )
result_zomato.append(result_svr)

df_result_zomato = pd.DataFrame(result_zomato)
df_result_zomato['dataset'] = 'zomato'

In [4]:
df_input_test_scale

Unnamed: 0,rest_price_idr,is_chain,alcohol,delivery,entertainment,indoor seating,kid friendly,no alcohol available,others_facility,outdoor seating,...,mid_east,noodle,others_type,seafood,snacks,special_food,street_food,tea,western,rating
0,0.029146,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.620690
1,0.041647,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
2,0.066647,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.448276
3,0.083314,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.586207
4,0.095814,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.586207
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1178,0.104148,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.724138
1179,0.166649,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.724138
1180,0.058314,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.551724
1181,0.104148,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.655172


In [5]:
df_result_zomato

Unnamed: 0,mae,mse,R2,name,feature_size,train_size,test_size,pearson,dataset
0,0.12398,0.02546,0.76029,rf,41,4730,1183,0.41264,zomato
1,0.12642,0.02615,0.40629,xgb,41,4730,1183,0.41256,zomato
2,0.12298,0.02467,0.23789,svr,41,4730,1183,0.4076,zomato


# Zomato 250 

In [10]:
# zomato only
df_zom_250_train = pd.read_csv('data/3_clean_zomato_gof_ori_250_train.csv')
df_zom_250_train['rest_price_idr'] = df_zom_250_train['rest_price_idr'].astype(int)
df_zom_250_train = df_zom_250_train[df_zom_250_train['rating']>0]


df_zom_250_test = pd.read_csv('data/3_clean_zomato_gof_ori_250_test.csv')
df_zom_250_test['rest_price_idr'] = df_zom_250_test['rest_price_idr'].astype(int)
df_zom_250_test = df_zom_250_test[df_zom_250_test['rating']>0]

# df_zom = pd.read_csv('data/3_jbdk_clean_zomato_feat.csv')

# ori
drop_column = ['url','index','rating','lat','long','review','geohash','encode']
target_column = 'rating'

# cross validation 
rf_param = {'n_estimators':250,'max_features':3, 'max_depth':30,'bootstrap':True}
xgb_param = {'n_estimators':100, 'max_depth':5, 'min_child_weight':1, 'learning_rate':0.15, 'booster':'gbtree'}
svr_param  = {'kernel':'poly','gamma':0.1,'C':0.1}


# scaled the values 
features_columns = df_zom_250_train.drop(drop_column,axis=1).columns
features_columns = list(features_columns) + [target_column]

scaler_train = MinMaxScaler()
scaler_test = MinMaxScaler()

df_input_train_scale = pd.DataFrame(scaler_train.fit_transform(df_zom_250_train[features_columns]), columns = features_columns)
df_input_test_scale = pd.DataFrame(scaler_test.fit_transform(df_zom_250_test[features_columns]), columns = features_columns)

# Prediction 

result_zomato_250 = []

# Random Forest 
regr = RandomForestRegressor(**rf_param)
model_regr, y_pred_test_regr, y_pred_train_regr, result_regr = model_perform(df_input_train_scale.drop(target_column,axis=1),  df_input_train_scale[target_column],  df_input_test_scale.drop(target_column,axis=1),  df_input_test_scale[target_column],  regr, name = 'rf', verbose = 0 )
result_zomato_250.append(result_regr)

# xgb 
xgbr = XGBRegressor(**xgb_param)
model_xgb, y_pred_test_xgb, y_pred_train_xgb, result_xgb = model_perform(df_input_train_scale.drop(target_column,axis=1), df_input_train_scale[target_column], df_input_test_scale.drop(target_column,axis=1), df_input_test_scale[target_column], xgbr, name = 'xgb', verbose = 0 )
result_zomato_250.append(result_xgb)

# svr 
svr = SVR(**svr_param)
model_svr, y_pred_test_svr, y_pred_train_svr, result_svr = model_perform(df_input_train_scale.drop(target_column, axis=1), df_input_train_scale[target_column], df_input_test_scale.drop(target_column,axis=1), df_input_test_scale[target_column], svr, name = 'svr', verbose = 0 )
result_zomato_250.append(result_svr)

df_result_zomato_250 = pd.DataFrame(result_zomato_250)
df_result_zomato_250['dataset'] = 'zomato_250'

In [11]:
df_result_zomato_250

Unnamed: 0,mae,mse,R2,name,feature_size,train_size,test_size,pearson,dataset
0,0.12001,0.02362,0.83424,rf,60,4730,1183,0.45083,zomato_250
1,0.12174,0.02496,0.47433,xgb,60,4730,1183,0.4395,zomato_250
2,0.12278,0.02447,0.26521,svr,60,4730,1183,0.41555,zomato_250


# Zomato 500 

In [16]:
# zomato only
df_zom_500_train = pd.read_csv('data/3_clean_zomato_gof_ori_500_train.csv')
df_zom_500_train['rest_price_idr'] = df_zom_500_train['rest_price_idr'].astype(int)
df_zom_500_train = df_zom_500_train[df_zom_500_train['rating']>0]


df_zom_500_test = pd.read_csv('data/3_clean_zomato_gof_ori_500_test.csv')
df_zom_500_test['rest_price_idr'] = df_zom_500_test['rest_price_idr'].astype(int)
df_zom_500_test = df_zom_500_test[df_zom_500_test['rating']>0]

# df_zom = pd.read_csv('data/3_jbdk_clean_zomato_feat.csv')

# ori
drop_column = ['url','index','rating','lat','long','review','geohash','encode']
target_column = 'rating'

# cross validation
rf_param = {'n_estimators':500,'max_features':3, 'max_depth':30,'bootstrap':True}
xgb_param = {'n_estimators':100, 'max_depth':5, 'min_child_weight':3, 'learning_rate':0.2, 'booster':'gbtree'}
svr_param  = {'kernel':'poly','gamma':0.1,'C':0.1}

# scaled the values 
features_columns = df_zom_500_train.drop(drop_column,axis=1).columns
features_columns = list(features_columns) + [target_column]

scaler_train = MinMaxScaler()
scaler_test = MinMaxScaler()

df_input_train_scale = pd.DataFrame(scaler_train.fit_transform(df_zom_500_train[features_columns]), columns = features_columns)
df_input_test_scale = pd.DataFrame(scaler_test.fit_transform(df_zom_500_test[features_columns]), columns = features_columns)

# Prediction 

result_zomato_500 = []

# Random Forest 
regr = RandomForestRegressor(**rf_param)
model_regr, y_pred_test_regr, y_pred_train_regr, result_regr = model_perform(df_input_train_scale.drop(target_column,axis=1),  df_input_train_scale[target_column],  df_input_test_scale.drop(target_column,axis=1),  df_input_test_scale[target_column],  regr, name = 'rf', verbose = 0 )
result_zomato_500.append(result_regr)

# xgb 
xgbr = XGBRegressor(**xgb_param)
model_xgb, y_pred_test_xgb, y_pred_train_xgb, result_xgb = model_perform(df_input_train_scale.drop(target_column,axis=1), df_input_train_scale[target_column], df_input_test_scale.drop(target_column,axis=1), df_input_test_scale[target_column], xgbr, name = 'xgb', verbose = 0 )
result_zomato_500.append(result_xgb)

# svr 
svr = SVR(**svr_param)
model_svr, y_pred_test_svr, y_pred_train_svr, result_svr = model_perform(df_input_train_scale.drop(target_column, axis=1), df_input_train_scale[target_column], df_input_test_scale.drop(target_column,axis=1), df_input_test_scale[target_column], svr, name = 'svr', verbose = 0 )
result_zomato_500.append(result_svr)

df_result_zomato_500 = pd.DataFrame(result_zomato_500)
df_result_zomato_500['dataset'] = 'zomato_500'

In [17]:
df_result_zomato_500

Unnamed: 0,mae,mse,R2,name,feature_size,train_size,test_size,pearson,dataset
0,0.12006,0.02357,0.88486,rf,60,4730,1183,0.45421,zomato_500
1,0.12491,0.02602,0.64134,xgb,60,4730,1183,0.42244,zomato_500
2,0.12244,0.02432,0.28125,svr,60,4730,1183,0.42217,zomato_500


# Zomato 1000

In [18]:
# zomato only
df_zom_1000_train = pd.read_csv('data/3_clean_zomato_gof_ori_1000_train.csv')
df_zom_1000_train['rest_price_idr'] = df_zom_1000_train['rest_price_idr'].astype(int)
df_zom_1000_train = df_zom_1000_train[df_zom_1000_train['rating']>0]


df_zom_1000_test = pd.read_csv('data/3_clean_zomato_gof_ori_1000_test.csv')
df_zom_1000_test['rest_price_idr'] = df_zom_1000_test['rest_price_idr'].astype(int)
df_zom_1000_test = df_zom_1000_test[df_zom_1000_test['rating']>0]

# df_zom = pd.read_csv('data/3_jbdk_clean_zomato_feat.csv')

# ori
drop_column = ['url','index','rating','lat','long','review','geohash','encode']
target_column = 'rating'

# cross validation
rf_param = {'n_estimators':500,'max_features':3, 'max_depth':30,'bootstrap':True}
xgb_param = {'n_estimators':100, 'max_depth':5, 'min_child_weight':3, 'learning_rate':0.2, 'booster':'gbtree'}
svr_param  = {'kernel':'poly','gamma':0.1,'C':0.1}

# scaled the values 
features_columns = df_zom_1000_train.drop(drop_column,axis=1).columns
features_columns = list(features_columns) + [target_column]

scaler_train = MinMaxScaler()
scaler_test = MinMaxScaler()

df_input_train_scale = pd.DataFrame(scaler_train.fit_transform(df_zom_1000_train[features_columns]), columns = features_columns)
df_input_test_scale = pd.DataFrame(scaler_test.fit_transform(df_zom_1000_test[features_columns]), columns = features_columns)

# Prediction 

result_zomato_1000 = []

# Random Forest 
regr = RandomForestRegressor(**rf_param)
model_regr, y_pred_test_regr, y_pred_train_regr, result_regr = model_perform(df_input_train_scale.drop(target_column,axis=1),  df_input_train_scale[target_column],  df_input_test_scale.drop(target_column,axis=1),  df_input_test_scale[target_column],  regr, name = 'rf', verbose = 0 )
result_zomato_1000.append(result_regr)

# xgb 
xgbr = XGBRegressor(**xgb_param)
model_xgb, y_pred_test_xgb, y_pred_train_xgb, result_xgb = model_perform(df_input_train_scale.drop(target_column,axis=1), df_input_train_scale[target_column], df_input_test_scale.drop(target_column,axis=1), df_input_test_scale[target_column], xgbr, name = 'xgb', verbose = 0 )
result_zomato_1000.append(result_xgb)

# svr 
svr = SVR(**svr_param)
model_svr, y_pred_test_svr, y_pred_train_svr, result_svr = model_perform(df_input_train_scale.drop(target_column, axis=1), df_input_train_scale[target_column], df_input_test_scale.drop(target_column,axis=1), df_input_test_scale[target_column], svr, name = 'svr', verbose = 0 )
result_zomato_1000.append(result_svr)

df_result_zomato_1000 = pd.DataFrame(result_zomato_1000)
df_result_zomato_1000['dataset'] = 'zomato_1000'

In [19]:
df_result_zomato_1000

Unnamed: 0,mae,mse,R2,name,feature_size,train_size,test_size,pearson,dataset
0,0.11915,0.02324,0.88669,rf,60,4730,1183,0.47026,zomato_1000
1,0.1255,0.0258,0.65563,xgb,60,4730,1183,0.4084,zomato_1000
2,0.12145,0.02404,0.30091,svr,60,4730,1183,0.43416,zomato_1000


# Final result 

In [20]:
# cross validation 
df_final_result = pd.concat([df_result_zomato ,df_result_zomato_250,df_result_zomato_500,df_result_zomato_1000])


In [21]:
df_final_result

Unnamed: 0,mae,mse,R2,name,feature_size,train_size,test_size,pearson,dataset
0,0.12398,0.02546,0.76029,rf,41,4730,1183,0.41264,zomato
1,0.12642,0.02615,0.40629,xgb,41,4730,1183,0.41256,zomato
2,0.12298,0.02467,0.23789,svr,41,4730,1183,0.4076,zomato
0,0.12001,0.02362,0.83424,rf,60,4730,1183,0.45083,zomato_250
1,0.12174,0.02496,0.47433,xgb,60,4730,1183,0.4395,zomato_250
2,0.12278,0.02447,0.26521,svr,60,4730,1183,0.41555,zomato_250
0,0.12006,0.02357,0.88486,rf,60,4730,1183,0.45421,zomato_500
1,0.12491,0.02602,0.64134,xgb,60,4730,1183,0.42244,zomato_500
2,0.12244,0.02432,0.28125,svr,60,4730,1183,0.42217,zomato_500
0,0.11915,0.02324,0.88669,rf,60,4730,1183,0.47026,zomato_1000


In [13]:
today = date.today()
print("Today's date:", today)

Today's date: 2022-11-19


In [14]:
df_final_result.to_excel('report/ml_perform_final.xlsx'.format(today),index=False)