In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import random
import os
import math
import lightgbm as lgb
from sklearn.linear_model import Lasso,Ridge,LinearRegression
from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor,AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split,cross_val_score,ShuffleSplit,GridSearchCV
from sklearn.feature_selection import RFE
from sklearn.manifold import TSNE
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
from xgboost import XGBRegressor
from scipy import stats
sns.set_style('ticks')

# # 1 导入数据
ADMET = pd.read_excel('./数据/ADMET.xlsx')
label = pd.read_excel('./数据/ERα_activity.xlsx')
data = pd.read_excel('./数据/Molecular_Descriptor.xlsx')
features = pd.read_excel('./数据/aa.xlsx')
#features = features.sort_values(by='rf_score',ascending=False)
selected_f = list(features['features'])[:20]
X = data[selected_f]
y = label['pIC50']

def is_norm(X_train):
    norm_cols = []
    for col in X_train.columns:
        u = X_train[col].mean()
        std = X_train[col].std()
        ks = stats.kstest(X_train[col], 'norm', (u, std))
        p = float(str(ks).split(',')[1][8:-1])
        if p>0.05:
            norm_cols.append(col)
    return norm_cols
        
def three_sigma(x):
    index = []
    mean_value = np.mean(x)
    std_value = np.std(x)
    stats.kstest(x, 'norm', (mean_value, std_value))
    for i,each in enumerate(x):
        if (each<(mean_value-3*std_value)) or (each>(mean_value+3*std_value)):
            index.append(i)
    return index

if __name__ == '__main__':
    norm_cols = is_norm(X_train)

plt.figure(figsize=(20,10))
plt.subplot(221)
plt.boxplot(X_train[['MDEC-23','LipoaffinityIndex','MDEC-33','XLogP','C3SP2']])
plt.yticks(fontsize=14,fontweight='bold')
plt.xticks(np.arange(1,6,1),['MDEC-23','LipoaffinityIndex','MDEC-33','XLogP','C3SP2'], fontsize=14,fontweight='bold')

plt.subplot(222)
plt.boxplot(X_train[['MLFER_A','ATSc3','BCUTc-1l','BCUTc-1h','VC-5']])
plt.yticks(fontsize=14,fontweight='bold')
plt.xticks(np.arange(1,6,1),['MLFER_A','ATSc3','BCUTc-1l','BCUTc-1h','VC-5'],fontsize=14,fontweight='bold')

plt.subplot(223)
plt.boxplot(X_train[['nHBAcc','minHBa','CrippenLogP','MLFER_BH','nAtomP']])
plt.yticks(fontsize=14,fontweight='bold')
plt.xticks(np.arange(1,6,1),['nHBAcc','minHBa','CrippenLogP','MLFER_BH','nAtomP'],fontsize=14,fontweight='bold')

plt.subplot(224)
plt.boxplot(X_train[['maxHBd','hmin','ETA_Shape_Y','SCH-7','ATSc4']])
plt.yticks(fontsize=14,fontweight='bold')
plt.xticks(np.arange(1,6,1),['maxHBd','hmin','ETA_Shape_Y','SCH-7','ATSc4'],fontsize=14,fontweight='bold')
plt.tight_layout()
plt.savefig('./箱图.tif',dpi=100)
plt.show()

fig = plt.figure(figsize=(20,15))
for i,col in enumerate(X_train.columns):
    fig.add_subplot(4,5,i+1)
    plt.boxplot(X_train[col])
    plt.ylabel(col,fontsize=16,fontweight='bold')
plt.tight_layout()
plt.savefig('./箱图.tif',dpi=100)
plt.show()

def normalize(X_train):
    return (X_train-X_train.mean(axis=0))/(X_train.std(axis=0))
if __name__ == '__main__':
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,shuffle=True)

def out_index(x,up=None,down=None):
    index = []
    if up!= None and down!=None:
        for i,each in enumerate(x):
            if each>up or each<down:
                index.append(i)
    if up !=None and down==None:
        for i,each in enumerate(x):
            if each>up:
                index.append(i)
    if up ==None and down!=None:
        for i,each in enumerate(x):
            if each<down:
                index.append(i)   
    return index

if __name__ == '__main__':
    out_inx = {}
    bond=[(2,None),(10,None),(0.2,-0.25),(-0.35,-0.37),(17,3),(22,None),(7,0.1),(0.47,None), (7,None),(12,None),(0.45,None),(0.8,0.25),(1.4,None),(4,None),(0.5,0.18),(8,3),(0.3,-0.3)]
    for i,col in enumerate(['MLFER_A','nHBAcc','ATSc3','BCUTc-1l','LipoaffinityIndex','MDEC-33','XLogP','BCUTc-1h','C3SP2','minHBa','VC-5','maxHBd','SCH-7','MLFER_BH','ETA_Shape_Y','CrippenLogP','ATSc4']):
        out_inx[col] = out_index(list(X_train[col]),up=bond[i][0],down=bond[i][1])
    all_index = []
    for v in out_inx.values():
        all_index+=v
    all_set = set(all_index)
    all_dict = {}
    for each in all_set:
        all_dict[each]=all_index.count(each)
    all_pd = pd.DataFrame({'index':list(all_dict.keys()),'count':list(all_dict.values())})
    all_out = all_pd[all_pd['count']>=5]
    all_out2 = all_pd[all_pd['count']<5]
    X_train_new = pd.DataFrame({'MDEC-23':[],'MLFER_A':[],'nHBAcc':[],'ATSc3':[],'BCUTc-1l':[],'LipoaffinityIndex':[],'MDEC-33':[],'XLogP':[],'BCUTc-1h':[],'C3SP2':[],'minHBa':[],'VC-5':[],'maxHBd':[],'SCH7':[],'nAtomP':[],'MLFER_BH':[],'hmin':[],'ETA_Shape_Y':[],'CrippenLogP':[],'ATSc4':[]})
    y_train_new = []
    for i in range(X_train.shape[0]):
        if i not in list(all_out.index):
            X_train_new = X_train_new.append(X_train.iloc[i,:])
            y_train_new.append(y_train.iloc[i])


# # 2 预测模型构建
# ### 2.1 模型初步训练 

rf = RandomForestRegressor(random_state=5)
rf_param = {'max_depth':list(range(1,5)),'n_estimators':[50,100,150,200]}
grid = GridSearchCV(rf,param_grid=rf_param, cv=5)
grid.fit(X_train_new, y_train_new)
print(grid.best_params_)

LS = Lasso(random_state=5)
LS_param = {'alpha':np.arange(0.0001,0.001,0.0002)}
grid = GridSearchCV(LS,param_grid=LS_param, cv=5)
grid.fit(X_train_new, y_train_new)
grid.best_params_

svr = SVR()
svr_param = {'kernel':['poly','rbf','sigmoid'],'C':np.arange(1,3,0.2)}
grid = GridSearchCV(svr,param_grid=svr_param, cv=5)
grid.fit(X_train_new, y_train_new)
grid.best_params_

rg = Ridge(random_state=5)
rg_param = {'alpha':np.arange(0.0001,0.001,0.0002)}
grid = GridSearchCV(rg,param_grid=rg_param, cv=5)
grid.fit(X_train_new, y_train_new)

gbr = GradientBoostingRegressor(random_state=5)
gbr_param = {'learning_rate':np.arange(0.001,0.005,0.001),'n_estimators':[10,30,50,100,150,200,250,300],'max_depth':list(range(1,10,2))}
grid = GridSearchCV(gbr,param_grid=gbr_param, cv=5)
grid.fit(X_train_new, y_train_new)

mae_score = {}
mse_score = {}
R2_score = {}

#线性回归
lr = LinearRegression()
lr.fit(X_train,y_train)
lr_pred = lr.predict(X_test)
mae_score['LR'] = mean_absolute_error(y_test,lr_pred)
mse_score['LR'] = mean_squared_error(y_test,lr_pred)
R2_score['LR'] = r2_score(y_test,lr_pred)

#随机森林
rf = RandomForestRegressor(random_state=5)
rf.fit(X_train,y_train)
rf_pred = rf.predict(X_test)
mae_score['RF'] = mean_absolute_error(y_test,rf_pred)
mse_score['RF'] = mean_squared_error(y_test,rf_pred)
R2_score['RF'] = r2_score(y_test,rf_pred)

#LASSO:
LS = Lasso(alpha=0.0003,random_state=5)
LS.fit(X_train,y_train)
LS_pred = LS.predict(X_test)
mae_score['LS'] = mean_absolute_error(y_test,LS_pred)
mse_score['LS'] = mean_squared_error(y_test,LS_pred)
R2_score['LS'] = r2_score(y_test,LS_pred)

#SVR:
svr = SVR()
svr.fit(X_train,y_train)
svr_pred = svr.predict(X_test)
mae_score['SVR'] = mean_absolute_error(y_test,svr_pred)
mse_score['SVR'] = mean_squared_error(y_test,svr_pred)
R2_score['SVR'] = r2_score(y_test,svr_pred)

#Ridge
ridge =Ridge(alpha=0.002,random_state=5)
ridge.fit(X_train,y_train)
ridge_pred = ridge.predict(X_test)
mae_score['Ridge'] = mean_absolute_error(y_test,ridge_pred)
mse_score['Ridge'] = mean_squared_error(y_test,ridge_pred)
R2_score['Ridge'] = r2_score(y_test,ridge_pred)

#Gradient Boosting Regression
GBR=GradientBoostingRegressor(learning_rate=0.05,n_estimators=30,max_depth=3,random_state=5)

GBR.fit(X_train,y_train)
GBR_pred = GBR.predict(X_test)
mae_score['GBR'] = mean_absolute_error(y_test,GBR_pred)
mse_score['GBR'] = mean_squared_error(y_test,GBR_pred)
R2_score['GBR'] = r2_score(y_test,GBR_pred)

# XGBoost
n_estimators =10#树的棵数
MAX_DEPTH = 2
LR = 0.3
min_child_weight = 1 # 最小叶子节点占比权重
base_score = 0.5
GAMMA = 0.05
xgb = XGBRegressor(n_estimators=n_estimators,learning_rate=LR,max_depth=MAX_DEPTH,                   min_child_weight=min_child_weight,base_score=base_score,gamma=GAMMA)
xgb.fit(X_train,y_train)
xgb_pred = xgb.predict(X_test)
mae_score['XGB'] = mean_absolute_error(y_test,xgb_pred)
mse_score['XGB'] = mean_squared_error(y_test,xgb_pred)
R2_score['XGB'] = r2_score(y_test,xgb_pred)

# lgb
gbm = lgb.LGBMRegressor(num_leaves=30, learning_rate=0.05, n_estimators=200)
gbm.fit(X_train,y_train)
gbm_pred =gbm.predict(X_test)
mae_score['LGB'] = mean_absolute_error(y_test,gbm_pred)
mse_score['LGB'] = mean_squared_error(y_test,gbm_pred)
R2_score['LGB'] = r2_score(y_test,gbm_pred)
print('MAE:-------------------------')
print(mae_score)
print('MSE:-------------------------')
print(mse_score)
print('R2:--------------------------')
print(R2_score)

MSE =MSE.sort_values(by='MSE',ascending=False)
MSE.iloc[4,0] = 'GBDT'

plt.figure(figsize=(12,10))
sns.barplot(x='Model name',y='MSE',data=MSE,palette='hsv')
plt.xlabel('Model name',fontsize=14,fontweight='bold')
plt.ylabel('MSE',fontsize=14,fontweight='bold')
plt.xticks(fontsize=12,fontweight='bold')
plt.yticks(fontsize=12,fontweight='bold')
plt.savefig('./MSE.tif',dpi=100)
plt.show()

gbm_param= {'learning_rate':np.arange(0.001,0.01,0.002),'max_depth':list(range(1,15,2)),'num_leaves':[30,40],'n_estimators':[50,100,150,200,250,300]}
grid = GridSearchCV(lgb.LGBMRegressor(),param_grid=gbm_param, cv=5)
grid.fit(X_train_new, y_train_new)
print(grid.best_params_)

n_folds = 10
cross_score = {}
scores = cross_val_score(lr, X_train, y_train, scoring='neg_mean_squared_error', cv=n_folds)
lr_mae_scores = np.sqrt(-scores)
cross_score['LinearRegression'] =lr_mae_scores.mean().round(decimals=3)
print('For LR model:')
print('Mean RMSE = ' + str(lr_mae_scores.mean().round(decimals=3)))
print('Error std deviation = ' +str(lr_mae_scores.std().round(decimals=3)))

scores = cross_val_score(rf, X_train, y_train, scoring='neg_mean_squared_error', cv=n_folds)
rf_mae_scores = np.sqrt(-scores)
cross_score['RandomForest'] =rf_mae_scores.mean().round(decimals=3)
print('For RF model:')
print('Mean RMSE = ' + str(rf_mae_scores.mean().round(decimals=3)))
print('Error std deviation = ' +str(rf_mae_scores.std().round(decimals=3)))

scores = cross_val_score(LS, X_train, y_train , scoring='neg_mean_squared_error', cv=n_folds)
ls_mae_scores = np.sqrt(-scores)
cross_score['Lasso'] =ls_mae_scores.mean().round(decimals=3)
print('For LS model:')
print('Mean RMSE = ' + str(ls_mae_scores.mean().round(decimals=3)))
print('Error std deviation = ' +str(ls_mae_scores.std().round(decimals=3)))

scores = cross_val_score(svr,X_train, y_train , scoring='neg_mean_squared_error', cv=n_folds)
svr_mae_scores = np.sqrt(-scores)
cross_score['SVR'] =svr_mae_scores.mean().round(decimals=3)
print('For svr model:')
print('Mean RMSE = ' + str(svr_mae_scores.mean().round(decimals=3)))
print('Error std deviation = ' +str(svr_mae_scores.std().round(decimals=3)))

scores = cross_val_score(ridge,X_train, y_train , scoring='neg_mean_squared_error', cv=n_folds)
ridge_mae_scores = np.sqrt(-scores)
cross_score['Ridge'] =ridge_mae_scores.mean().round(decimals=3)
print('For ridge model:')
print('Mean RMSE = ' + str(ridge_mae_scores.mean().round(decimals=3)))
print('Error std deviation = ' +str(ridge_mae_scores.std().round(decimals=3)))

scores = cross_val_score(GBR, X_train, y_train  , scoring='neg_mean_squared_error', cv=n_folds)
gbr_mae_scores = np.sqrt(-scores)
cross_score['GBDT'] =gbr_mae_scores.mean().round(decimals=3)
print('For GBDT model:')
print('Mean RMSE = ' + str(gbr_mae_scores.mean().round(decimals=3)))
print('Error std deviation = ' +str(gbr_mae_scores.std().round(decimals=3)))

scores = cross_val_score(xgb, X_train, y_train  , scoring='neg_mean_squared_error', cv=n_folds)
xgb_mae_scores = np.sqrt(-scores)
cross_score['XGBoost'] =xgb_mae_scores.mean().round(decimals=3)
print('For xgb model:')
print('Mean RMSE = ' + str(xgb_mae_scores.mean().round(decimals=3)))
print('Error std deviation = ' +str(xgb_mae_scores.std().round(decimals=3)))

scores = cross_val_score(gbm, X_train, y_train  , scoring='neg_mean_squared_error', cv=n_folds)
gbm_mae_scores = np.sqrt(-scores)
cross_score['LightGBM'] =gbm_mae_scores.mean().round(decimals=3)
print('For LGMB model:')
print('Mean RMSE = ' + str(gbm_mae_scores.mean().round(decimals=3)))
print('Error std deviation = ' +str(gbm_mae_scores.std().round(decimals=3)))

RMSE=pd.DataFrame({'Model name':['LR','RF','Lasso','SVR','Ridge','GBDT','XGBoost','LGBM'], 'RMSE':[0.949,0.744,0.948,0.971,0.949,0.934,0.923,0.735]})
MSE=pd.DataFrame({'Model name':['LR','RF','Lasso','SVR','Ridge','GBDT','XGBoost','LGBM'], 'MSE':[0.86,0.51,0.86,0.83,0.86,0.82,0.80,0.47]})
RMSE = RMSE.sort_values(by='RMSE',ascending=False)
MSE = MSE.sort_values(by='MSE',ascending=False)
plt.figure(figsize=(8,6))
sns.lineplot(x='Model name',y='RMSE',data=RMSE,marker='o',color='blue',markersize=10,label='RMSE')
sns.lineplot(x='Model name',y='MSE',data=MSE,marker='s',color='red',markersize=10,label='MSE')
plt.xlabel('Model name',fontsize=14,fontweight='bold')
plt.ylabel('RMSE/MSE',fontsize=14,fontweight='bold')
plt.xticks(fontsize=12,fontweight='bold')
plt.yticks(fontsize=12,fontweight='bold')
plt.savefig('./RMSE_MSE.tif',dpi=100)
plt.show()

model_names = list(cross_score.keys())
model_RMSE = list(cross_score.values())
model_error = pd.DataFrame({"Model_name":model_names,'Model_RMSE':model_RMSE})
model_error = model_error.sort_values(by='Model_RMSE',ascending=False)
plt.figure(figsize=(12,10))
sns.barplot(x='Model_name',y='Model_RMSE',data=model_error,palette='gist_rainbow')
plt.xlabel('Model name',fontsize=14,fontweight='bold')
plt.ylabel('RMES',fontsize=14,fontweight='bold')
plt.xticks(fontsize=12,fontweight='bold')
plt.yticks(fontsize=12,fontweight='bold')
plt.savefig('./RMSE.tif',dpi=100)
plt.show()

# # 画图
plt.figure(figsize=(20,14))
plt.subplot(221)
plt.plot(np.arange(min(gbm_pred)-3,max(gbm_pred)+3),np.arange(min(gbm_pred)-3,max(gbm_pred)+3),color='red')
plt.text(1,10.5,r'$R^2={:.2f}$'.format(R2_score['LGB']),fontsize=14,fontweight='bold')
plt.text(1,9.5,r'$RMSE={}$'.format(cross_score['LightGBM']),fontsize=14,fontweight='bold')
plt.scatter(gbm_pred,y_test,color='blue',label='pIC50')
plt.ylabel(r'$y_{pred}$',fontsize=18,fontweight='bold')
plt.xticks(fontsize=14,fontweight='bold')
plt.yticks(fontsize=14,fontweight='bold')
plt.title('LightGBM',fontsize=18,fontweight='bold')

plt.subplot(222)
plt.plot(np.arange(min(rf_pred)-3,max(rf_pred)+3),np.arange(min(rf_pred)-3,max(rf_pred)+3),color='red')
plt.text(1.5,10,r'$R^2={:.2f}$'.format(R2_score['RF']),fontsize=14,fontweight='bold')
plt.text(1.5,9,r'$RMSE={}$'.format(cross_score['RandomForest']),fontsize=14,fontweight='bold')
plt.scatter(rf_pred,y_test,color='blue',label='pIC50')
plt.xticks(fontsize=14,fontweight='bold')
plt.yticks(fontsize=14,fontweight='bold')
plt.title('RandomForest',fontsize=18,fontweight='bold')

plt.subplot(223)
plt.plot(np.arange(min(GBR_pred)-3,max(GBR_pred)+3),np.arange(min(GBR_pred)-3,max(GBR_pred)+3),color='red')
plt.text(2,10,r'$R^2={:.2f}$'.format(R2_score['GBR']),fontsize=14,fontweight='bold')
plt.text(2,9,r'$RMSE={}$'.format(cross_score['GBDT']),fontsize=14,fontweight='bold')
plt.scatter(GBR_pred,y_test,color='blue',label='pIC50')
plt.xlabel(r'$y_{true}$',fontsize=18,fontweight='bold')
plt.ylabel(r'$y_{pred}$',fontsize=18,fontweight='bold')
plt.xticks(fontsize=14,fontweight='bold')
plt.yticks(fontsize=14,fontweight='bold')
plt.title('GBDT',fontsize=18,fontweight='bold')


plt.subplot(224)
plt.plot(np.arange(min(xgb_pred)-3,max(xgb_pred)+3),np.arange(min(xgb_pred)-3,max(xgb_pred)+3),color='red')
plt.text(1.2,10,r'$R^2={:.2f}$'.format(R2_score['XGB']),fontsize=14,fontweight='bold')
plt.text(1.2,9,r'$RMSE={}$'.format(cross_score['XGBoost']),fontsize=14,fontweight='bold')
plt.scatter(xgb_pred,y_test,color='blue',label='pIC50')
plt.xlabel(r'$y_{true}$',fontsize=18,fontweight='bold')
#plt.ylabel(r'$y_{pred}$',fontsize=16,fontweight='bold')
plt.xticks(fontsize=14,fontweight='bold')
plt.yticks(fontsize=14,fontweight='bold')
plt.title('XGBoost',fontsize=18,fontweight='bold')
plt.tight_layout()
plt.savefig('./问题二模型评价1.tif',dpi=100)
plt.show()

plt.figure(figsize=(20,14))
plt.subplot(221)
plt.plot(np.arange(min(svr_pred)-3,max(svr_pred)+3),np.arange(min(svr_pred)-3,max(svr_pred)+3),color='red')
plt.text(1.2,10,r'$R^2={:.2f}$'.format(R2_score['SVR']),fontsize=14,fontweight='bold')
plt.text(1.2,9,r'$RMSE={}$'.format(cross_score['SVR']),fontsize=14,fontweight='bold')
plt.scatter(svr_pred,y_test,color='blue',label='pIC50')
plt.xticks(fontsize=14,fontweight='bold')
plt.yticks(fontsize=14,fontweight='bold')
plt.title('SVR',fontsize=18,fontweight='bold')

plt.subplot(222)
plt.plot(np.arange(min(ridge_pred)-3,max(ridge_pred)+3),np.arange(min(ridge_pred)-3,max(ridge_pred)+3),color='red')
plt.text(0.4,10,r'$R^2={:.2f}$'.format(R2_score['Ridge']),fontsize=14,fontweight='bold')
plt.text(0.4,9,r'$RMSE={}$'.format(cross_score['Ridge']),fontsize=14,fontweight='bold')
plt.scatter(ridge_pred,y_test,color='blue',label='pIC50')
plt.xticks(fontsize=14,fontweight='bold')
plt.yticks(fontsize=14,fontweight='bold')
plt.title('Ridge',fontsize=18,fontweight='bold')

plt.subplot(223)
plt.plot(np.arange(min(LS_pred)-3,max(LS_pred)+3),np.arange(min(LS_pred)-3,max(LS_pred)+3),color='red')
plt.text(0.5,10,r'$R^2={:.2f}$'.format(R2_score['LS']),fontsize=14,fontweight='bold')
plt.text(0.5,9,r'$RMSE={}$'.format(cross_score['Lasso']),fontsize=14,fontweight='bold')
plt.scatter(LS_pred,y_test,color='blue',label='pIC50')
plt.xlabel(r'$y_{true}$',fontsize=18,fontweight='bold')
plt.xticks(fontsize=14,fontweight='bold')
plt.yticks(fontsize=14,fontweight='bold')
plt.title('Lasso',fontsize=18,fontweight='bold')

plt.subplot(224)
plt.plot(np.arange(min(lr_pred)-3,max(lr_pred)+3),np.arange(min(lr_pred)-3,max(lr_pred)+3),color='red')
plt.text(0.5,10,r'$R^2={:.2f}$'.format(R2_score['LR']),fontsize=14,fontweight='bold')
plt.text(0.5,9,r'$RMSE={}$'.format(cross_score['LinearRegression']),fontsize=14,fontweight='bold')
plt.scatter(lr_pred,y_test,color='blue',label='pIC50')
plt.xlabel(r'$y_{true}$',fontsize=18,fontweight='bold')
plt.xticks(fontsize=14,fontweight='bold')
plt.yticks(fontsize=14,fontweight='bold')
plt.title('Linear Regression',fontsize=18,fontweight='bold')
plt.tight_layout()
plt.savefig('./问题二模型评价2.tif',dpi=100)
plt.show()

Test_set = pd.read_excel('./ERα_activity.xlsx',sheet_name='test')
Test_x = pd.read_excel('./Molecular_Descriptor.xlsx',sheet_name='test')
Test_x = Test_x[selected_f]
test_pred  = gbm.predict(Test_x)

def ictopic(y):
    """将ic转换为pic"""
    return -np.log10(y*(10**(-9)))

def pictoic(y):
    """将pic转换为ic"""
    return (10**-y)/(10**(-9))

if __name__ == '__main__':
    ic = pictoic(test_pred)

Test_set['IC50_nM'] = ic
Test_set['pIC50'] = test_pred
Test_set.to_excel('./prediction.xlsx',index=False)

