### Interpretable Maching Learning models and Energy Storage Systems at the Southwest Power Pool

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import random
from scipy import stats
import datetime as dt
from datetime import timedelta
from sklearn.tree import DecisionTreeRegressor
import sys
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn import metrics
import statsmodels.formula.api as smf
import scipy.stats as stats
import xgboost as xgb

In [None]:
SPP_path = r'C:\Users\felip\Desktop\Electricity\Energy Market\Energy Market (SPP)'

def add_info(df):
    intervals = df[df.columns[0]].values.tolist()
    dates = []
    times = []
    weekday = []
    months = []
    days = []
    hour = []
    minute_of_day = []
    for interval in intervals:
        date = interval.split(' ')[0]
        try:
            date = dt.datetime.strptime(date,'%Y-%m-%d').date()
        except:
            date = dt.datetime.strptime(date,'%m/%d/%Y').date()            
        dates.append(date)
        months.append(date.month)
        days.append(date.day)
        if date.weekday() < 5:
            weekday.append(True)
        else:
            weekday.append(False)
        time = interval.split(' ')[1].split('.')[0]
        time = dt.datetime.strptime(time,'%H:%M:%S').time()
        times.append(time)
        hour.append(dt.time(time.hour))
        minute_of_day.append(60*time.hour+time.minute)
    df['Local Date'] = np.array(dates)
    df['Local Time'] = np.array(times)
    df['Hour'] = np.array(hour)
    df['Weekday'] = np.array(weekday)
    df['Month'] = np.array(months)
    df['Day'] = np.array(days)
    df['Minute of Day'] = np.array(minute_of_day)
    return df

def GMT2CT(s):
    date = s.split('T')[0]
    date = dt.datetime.strptime(date,'%Y-%m-%d').date()
    time = s.split('T')[1][:-1]
    hour = int(time.split(':')[0])
    if hour >= 6:
        hour = hour - 6
    else:
        hour = 24 + (hour - 6)
        date = date - timedelta(1)
    time = str(hour) + ':' + time.split(':')[1] + ':' + time.split(':')[2]
    time = dt.datetime.strptime(time,'%H:%M:%S').time()
    return [date, time]

def ssr(prediction, test):
    return ((prediction - test)**2).sum()

def pe(prediction, test):
    return (abs((prediction - test)/test))*100

def filler(df, DA):
    price_list = df[DA].values.tolist()
    value = 0.0
    new_list = []
    for price in price_list:
        if math.isnan(price)==True:
            new_list.append(value)
        else:
            value = price
            new_list.append(price)
    df[DA] = np.array(new_list)
    return df

def means_dict(d):
    means_d = {}
    for key in d.keys():
        means_d[key] = np.nanmean(np.array(d[key]))
    return means_d

def means_std_dict(d):
    means_d = {}
    for key in d.keys():
        means_d[key] = [round(np.array(d[key]).mean(),6),round(stats.sem(np.array(d[key])),6)]
    return means_d

In [None]:
gen_mix_2018 = pd.read_csv(SPP_path + '\Generation Mix By Fuel Type\GenMix_2018.csv')
list_intervals = gen_mix_2018[gen_mix_2018.columns[0]].values.tolist()
local_time = []
local_date = []
for value in list_intervals:
    local_date.append(GMT2CT(value)[0])
    local_time.append(GMT2CT(value)[1])
gen_mix_2018['Local Date'] = np.array(local_date)
gen_mix_2018['Local Time'] = np.array(local_time)

In [None]:
days_31 = []
for n in range(1,10):
    days_31.append('0'+str(n))
for n in range(10,32):
    days_31.append(str(n))
cal_dict = {'01':days_31,
            '02':days_31[0:28],
            '03':days_31,
           '04':days_31[0:-1],
            '05':days_31,
            '06':days_31[0:-1],
           '07':days_31,
            '08':days_31,
            '09':days_31[0:-1],
           '10':days_31,
           '11':days_31[0:-1],
           '12':days_31}

In [None]:
# Opening and concatenating RT datasets

# run time: ~ 9 min

path = r'C:\Users\felip\Desktop\Electricity\Energy Market\Energy Market (SPP)\RT\2018'
RT_path = 'RTBM-LMP-DAILY-SL-2018'
end = '.csv'
dfs = []
for key in cal_dict.keys():
    for value in cal_dict[key]:
        dfs.append(pd.read_csv(path+'\\'+key+'\\By_Day\\'+RT_path+key+value+end))
RT = pd.concat(dfs)
print('Loaded RT data')

# Aggregating settlement location for system-wide data
RT2018_aggloc = RT.groupby('Interval')[['Interval',' LMP']].agg({'Interval':'first',
                                                                    ' LMP':'mean'})
RT2018 = add_info(RT2018_aggloc)
RT2018 = RT2018.rename(columns={' LMP':'LMP'})
RT2018.head()

In [None]:
# Opening and concatenating DA datasets

# run time: ~ 2 min

path = r'C:\Users\felip\Desktop\Electricity\Energy Market\Energy Market (SPP)\DA\2018'
DA_path = 'DA-LMP-SL-2018'
end = '0100.csv'
dfs = []
for key in cal_dict.keys():
    for value in cal_dict[key]:
        dfs.append(pd.read_csv(path+'\\'+key+'\\By_Day\\'+DA_path+key+value+end))
DA2018 = pd.concat(dfs)
print('Loaded DA data')

# Aggregating settlement location for system-wide data
DA2018_aggloc = DA2018.groupby('Interval')[['Interval','LMP']].agg({'Interval':'first',
                                                                    'LMP':'mean'})
DA2018_aggloc = add_info(DA2018_aggloc)
DA2018_aggloc.head()

In [None]:
# Merging RT, DA and Load data
comparison = DA2018_aggloc.iloc[:,0:2].join(RT2018,how='right',lsuffix='_DA',rsuffix='_RT')
DART2018_5min = filler(comparison, 'LMP_DA')
gen_2018 = gen_mix_2018
new_index = []
for i in range(gen_2018.shape[0]):
    new_index.append(gen_2018['Local Date'].iloc[i].strftime("%m/%d/%Y") + ' ' + gen_2018['Local Time'].iloc[i].strftime("%H:%M:%S"))
gen_2018.index = np.array(new_index)
DART_gen_2018 = gen_2018.join(DART2018_5min,how='right',lsuffix='_gen',rsuffix='_price')
DART_gen_2018 = DART_gen_2018.rename(columns={' Average Actual Load':'Load',
                                             ' Wind Self':'Wind',
                                             ' Coal Market':'Coal_Mkt',
                                             ' Coal Self':'Coal_Self',
                                             'Local Time_price':'Local Time',
                                             'Local Date_price':'Local Date'})

In [None]:
# Opening and concatenating DA Load datasets
path = r'C:\Users\felip\Desktop\Electricity\Energy Market\Energy Market (SPP)\DA Load\2018'
DA_path = 'DA-MC-2018'
end = '0100.csv'
dfs = []
for key in cal_dict.keys():
    for value in cal_dict[key]:
        dfs.append(pd.read_csv(path+'\\'+key+'\\'+DA_path+key+value+end))
DA_load_2018 = pd.concat(dfs)

# Merging Forecast Load to dataset
DA_load_2018.index = DA_load_2018['Interval']
comparison = DA_load_2018.iloc[:,8:9].join(DART2018,how='right')
DART2018 = filler(comparison, ' Total Demand')
DART2018 = DART2018.iloc[11:,:]
DART2018 = DART2018.rename(columns={' Total Demand':'DA Load'})
DART2018['Load_Diff'] = DART2018['Load'] - DART2018['DA Load']

DART2018.head()

In [None]:
# Adding previous-interval and moving averages
DART2018_1 = DART2018.iloc[1:,:]
previous_load_diff = DART2018['Load_Diff'].iloc[0:-1].values
DART2018_1['Previous_Load_Diff'] = previous_load_diff
previous_RT = DART2018['LMP_RT'].iloc[0:-1].values
DART2018_1['Previous_RT'] = previous_RT

previous_spread = DART2018['Spread'].iloc[0:-1].values
DART2018_1['Previous_Spread'] = previous_spread

step = 3
previous_RT = DART2018_1['LMP_RT'].iloc[0:(DART2018_1.shape[0] - step)].values
previous_means_RT = np.empty(previous_RT.size)
previous_load = DART2018_1['Load_Diff'].iloc[0:(DART2018_1.shape[0] - step)].values
previous_means_load = np.empty(previous_load.size)
for i in range(DART2018_1.shape[0] - step):
    previous_means_RT[i] = previous_RT[i:i+step].mean()
    previous_means_load[i] = previous_load[i:i+step].mean()
DART2018_2 = DART2018_1.iloc[step:,:]
DART2018_2['Previous_RT_2'] = previous_means_RT
DART2018_2['Previous_Load_Diff_2'] = previous_means_load

DART2018_2.head()

In [None]:
def sklearn_clusters_2(train_df, depth, plot):
    # converting datetime to int
    minutes = []
    for time in train_df.index:
        minutes.append(time.hour * 60 + time.minute)
    train_df['minutes'] = np.array(minutes)
    X = train_df['minutes'].values.reshape(-1,1)
    y = train_df['LMP_RT']
    # Fit regression model
    regr_1 = DecisionTreeRegressor(max_depth=depth)
    regr_1.fit(X, y)
    # Predict
    X_test = train_df['minutes'].values.reshape(-1,1)
    y_1 = regr_1.predict(X_test)
    train_df['sklearn cluster'] = y_1
    price_leaves = []
    for price in y_1:
        if price not in price_leaves:
            price_leaves.append(price)
    branch_dfs = []
    for price in price_leaves:
        branch_dfs.append(train_df[train_df['sklearn cluster']==price])
    if plot == True:
        plt.plot(X_test, y_1, color="cornflowerblue",label="max_depth="+str(depth), linewidth=2)
        plt.legend()
    return branch_dfs

def crit_pts2(df):
    intervals = df.index.values.tolist()
    i = 0
    points = []
    for time in intervals:
        mean = df['LMP_RT'][0:i+1].mean()
        if df['LMP_RT'][i] > 2*mean:
            if (df['LMP_RT'][i] - mean) > df['LMP_RT'].mean()/3:
                if df['LMP_RT'][i] > 1.6*df['LMP_DA'][i]:
                    points.append(time)
        elif df['LMP_RT'][i] > 1.6*df['LMP_DA'][i]:
            points.append(time)
        i += 1
    x = np.array(points)
    
    i = 0
    LMP = []
    for time in df.index:
        if time in x:
            LMP.append(df['LMP_RT'][i])
        i += 1
    y = np.array(LMP)
    
    return [x,y]

In [None]:
# Testing for 2 years for the 1st time

np.seterr(divide='print',invalid='print')

n = 10
PE_10_list = []
errors = 0

df = DART2018_2

slopes = {}

r2 = []
r2adj = []
for i in range(n):
    for j in range(1,13):
        by_month = df[df['Month']==j]

        train, test = train_test_split(by_month, test_size=0.5)

        train_data = train.groupby('Local Time')[['LMP_RT','LMP_DA','Load','Wind','Load_Diff', 'Previous_Load_Diff',
                                                  'Previous_RT', 'Previous_Load_Diff_2', 'Previous_RT_2']].mean()
        train_data['RT_std'] = train.groupby('Local Time')['LMP_RT'].std()
        train_data['DA_std'] = train.groupby('Local Time')['LMP_DA'].std()        
        test_data = test.groupby('Local Time')[['LMP_RT','LMP_DA','Load','Wind','Load_Diff', 'Previous_Load_Diff', 
                                                'Previous_RT', 'Previous_Load_Diff_2', 'Previous_RT_2']].mean()
        test_data['RT_std'] = test.groupby('Local Time')['LMP_RT'].std()
        test_data['DA_std'] = test.groupby('Local Time')['LMP_DA'].std() 
        test_data_copy = test_data.copy()
        
        test_data_copy['fitted RT (10)'] = np.zeros(test_data_copy.index.shape[0])
        branch_dfs = sklearn_clusters_2(train_data, 2, False)
        for cluster in branch_dfs:
            try:
                result10 = smf.ols(formula="""LMP_RT ~ LMP_DA + RT_std + DA_std + Load + Wind + Load_Diff + Previous_Load_Diff
                                    + Previous_Load_Diff_2 + Previous_RT + Previous_RT_2""", data=cluster).fit()
                r2.append(result10.rsquared)
                r2adj.append(result10.rsquared_adj)
                for var in result10.params.index:
                    if var not in slopes.keys():
                        slopes[var] = []
                        slopes[var].append(result10.params.loc[var])
                    else:
                        slopes[var].append(result10.params.loc[var])
                for hour in cluster.index:
                    test_data_copy['fitted RT (10)'].loc[hour] = (
                                                test_data_copy['Previous_RT_2'].loc[hour]*result10.params[10] +  
                                                test_data_copy['Previous_RT'].loc[hour]*result10.params[9] +
                                                test_data_copy['Previous_Load_Diff_2'].loc[hour]*result10.params[8] + 
                                                test_data_copy['Previous_Load_Diff'].loc[hour]*result10.params[7] + 
                                                test_data_copy['Load_Diff'].loc[hour]*result10.params[6] + 
                                                test_data_copy['Wind'].loc[hour]*result10.params[5] + 
                                                test_data_copy['Load'].loc[hour]*result10.params[4] + 
                                                test_data_copy['DA_std'].loc[hour]*result10.params[3] + 
                                                test_data_copy['RT_std'].loc[hour]*result10.params[2] + 
                                                test_data_copy['LMP_DA'].loc[hour]*result10.params[1] + 
                                                result10.params[0])
            except:
                errors += 1
                print('\nerror occurred at i={} month={}'.format(i,j))
                continue
        PE_10_list.append(pe(test_data_copy['fitted RT (10)'], test_data_copy['LMP_RT']).mean())
        
    print(f'\r{100*(i+1)/n}%',end='')

PE_10_array = np.array(PE_10_list)

print('\nMAPE \u00B1 std')
print('10 predictors: {} \u00B1 {}'.format(PE_10_array.mean(), PE_10_array.std()))
print('# of errors: {}'.format(errors))
print('R^2: {}'.format(np.nanmean(np.array(r2))))
print('Adj R^2: {}'.format(np.nanmean(np.array(r2adj))))
means_std_dict(slopes)

In [None]:
# Reducing loads at critical times

DART2018_ESS = DART2018_2
ESS_size = 7000
months = []
for month in range(1,13):
    by_month = DART2018[DART2018['Month']==month]
    by_month_ESS = DART2018_ESS[DART2018_ESS['Month']==month]

    study = by_month.groupby('Local Time')[['LMP_RT','LMP_DA','Load']].mean()
    
    critical = crit_pts2(study)[0]
    for t in critical:
        by_month_ESS.loc[by_month_ESS['Local Time'] == t, 'Load'] -= ESS_size
    
    study2 = by_month_ESS.groupby('Local Time')[['LMP_RT','LMP_DA','Load']].mean()

    months.append(by_month_ESS)
    
    plt.title('Month {}'.format(month))
    plt.plot(study.index, study['LMP_RT'], 'orange', label='RT Means')
    plt.plot(crit_pts2(study)[0],crit_pts2(study)[1],'ro')
    plt.legend()
    plt.show()
    plt.plot(study.index, study['Load'], 'red')
    plt.plot(study2.index, study2['Load'], 'blue', label='ESS Load')
    plt.title('Month {}'.format(month))
    plt.legend()
    plt.show()
    
DART2018_ESS = pd.concat(months)

In [None]:
# Adding previous-interval and moving averages on updated dataset (with reduced load)

DART2018_ESS['Load_Diff'] = DART2018_ESS['Load'] - DART2018_ESS['DA Load']
DART2018_ESS['Spread'] = DART2018_ESS['LMP_DA'] - DART2018_ESS['LMP_RT']
DART2018_ESS = DART2018_ESS.rename(columns={' Wind':'Wind'})

DART2018_ESS_1 = DART2018_ESS.iloc[1:,:]
previous_load_diff = DART2018_ESS['Load_Diff'].iloc[0:-1].values
DART2018_ESS_1['Previous_Load_Diff'] = previous_load_diff
previous_RT = DART2018_ESS['LMP_RT'].iloc[0:-1].values
DART2018_ESS_1['Previous_RT'] = previous_RT
previous_spread = DART2018_ESS['Spread'].iloc[0:-1].values
DART2018_ESS_1['Previous_Spread'] = previous_RT

step = 3
previous_RT = DART2018_ESS_1['LMP_RT'].iloc[0:(DART2018_ESS_1.shape[0] - step)].values
previous_means_RT = np.empty(previous_RT.size)
previous_load = DART2018_ESS_1['Load_Diff'].iloc[0:(DART2018_ESS_1.shape[0] - step)].values
previous_means_load = np.empty(previous_load.size)
for i in range(DART2018_ESS_1.shape[0] - step):
    previous_means_RT[i] = previous_RT[i:i+step].mean()
    previous_means_load[i] = previous_load[i:i+step].mean()
DART2018_ESS_2 = DART2018_ESS_1.iloc[step:,:]
DART2018_ESS_2['Previous_RT_2'] = previous_means_RT
DART2018_ESS_2['Previous_Load_Diff_2'] = previous_means_load

In [None]:
# Estimating price with peak-only MLR

## Fixed a couple of things and te

df = DART2018_2
df2 = DART2018_ESS_2
total_err = []

r2 = []
r2adj = []
for i in range(10):
    for j in range(1,13):
        by_month = df[df['Month']==j]
        study = by_month.groupby('Local Time')[['LMP_RT','LMP_DA','Load','Wind','Load_Diff', 'Previous_Load_Diff',
                                                  'Previous_RT', 'Previous_Load_Diff_2', 'Previous_RT_2']].mean()
        study['RT_std'] = by_month.groupby('Local Time')['LMP_RT'].std()
        study['DA_std'] = by_month.groupby('Local Time')['LMP_DA'].std()   

        by_month2 = df2[df2['Month']==j]
        study2 = by_month2.groupby('Local Time')[['LMP_RT','LMP_DA','Load','Wind','Load_Diff', 'Previous_Load_Diff',
                                                  'Previous_RT', 'Previous_Load_Diff_2', 'Previous_RT_2']].mean()
        study2['RT_std'] = by_month2.groupby('Local Time')['LMP_RT'].std()
        study2['DA_std'] = by_month2.groupby('Local Time')['LMP_DA'].std() 
        study2['ESS RT'] = study2['LMP_RT']

        critical = crit_pts2(study)[0]
        critical_df = study.loc[critical]
        result_peak = smf.ols(formula="""LMP_RT ~ LMP_DA + RT_std + DA_std + Load + Wind + Load_Diff + Previous_Load_Diff
                                    + Previous_Load_Diff_2 + Previous_RT + Previous_RT_2""", data=critical_df).fit()
        r2.append(result_peak.rsquared)
        r2adj.append(result_peak.rsquared_adj)
        for hour in study.index:
            if hour in critical:
                if False == True:
                    study2['ESS RT'].loc[hour] = (
                                            study2['Previous_RT_2'].loc[hour]*result_peak.params[10] +  
                                            study2['Previous_RT'].loc[hour]*result_peak.params[9] +
                                            study2['Previous_Load_Diff_2'].loc[hour]*result_peak.params[8] + 
                                            study2['Previous_Load_Diff'].loc[hour]*result_peak.params[7] + 
                                            study2['Load_Diff'].loc[hour]*result_peak.params[6] + 
                                            study2['Wind'].loc[hour]*result_peak.params[5] + 
                                            study2['Load'].loc[hour]*result_peak.params[4] + 
                                            study2['DA_std'].loc[hour]*result_peak.params[3] + 
                                            study2['RT_std'].loc[hour]*result_peak.params[2] + 
                                            study2['LMP_DA'].loc[hour]*result_peak.params[1] + 
                                            result_peak.params[0])
                study2['ESS RT'].loc[hour] = (
                                        study2['Previous_RT_2'].loc[hour]*slopes_10['Previous_RT_2'][0] +  
                                        study2['Previous_RT'].loc[hour]*slopes_10['Previous_RT'][0] +
                                        study2['Previous_Load_Diff_2'].loc[hour]*slopes_10['Previous_Load_Diff_2'][0] + 
                                        study2['Previous_Load_Diff'].loc[hour]*slopes_10['Previous_Load_Diff'][0] + 
                                        study2['Load_Diff'].loc[hour]*slopes_10['Load_Diff'][0] + 
                                        study2['Wind'].loc[hour]*slopes_10['Wind'][0] + 
                                        study2['Load'].loc[hour]*slopes_10['Load'][0] + 
                                        study2['DA_std'].loc[hour]*slopes_10['DA_std'][0] + 
                                        study2['RT_std'].loc[hour]*slopes_10['RT_std'][0] + 
                                        study2['LMP_DA'].loc[hour]*slopes_10['LMP_DA'][0] + 
                                        slopes_10['Intercept'][0])            
                p_err = 100*(study2['ESS RT'].loc[hour] - study2['LMP_RT'].loc[hour])/study2['LMP_RT'].loc[hour]
                total_err.append(p_err)
        if i == 0:
            plt.plot(study2.index, study2['LMP_RT'], 'blue', label='RT', alpha=0.5)
            plt.plot(study2.index, study2['ESS RT'], 'red', label='ESS')
            plt.title('Month {}'.format(j))
            plt.ylabel('RT Means ($/MWh)')
            plt.legend()
            plt.show()
    
round(np.array(total_err).mean(),6)

##### Bayesian optimization of ORT hyper-parameters

In [None]:
from skopt import Optimizer
from skopt.space import Real, Categorical, Integer

In [None]:
jan = DART2018[DART2018['Month']==1]
dataset = jan.loc[:,['LMP_DA','Load','Wind','Previous_RT_MA','Hour','Day','LMP_RT','Minute of Day']]
hours = []
for time in dataset['Hour']:
    hours.append(time.hour)
dataset['Hour'] = np.array(hours)

train_days = random.sample(range(1,32), 15)
train = dataset.loc[dataset['Day'].isin(train_days)].dropna()
test = dataset.loc[~dataset['Day'].isin(train_days)].dropna()

X = train.iloc[:,0:5]
y = train.iloc[:,6]

X_test = test.iloc[:,0:5]
y_test = test.iloc[:,6]

In [None]:
def xgb_loss(next_x):
    xg_reg = xgb.XGBRegressor(objective ='reg:squarederror', n_estimators = next_x[0], max_depth = next_x[1], 
                              learning_rate = next_x[2], alpha = next_x[3], colsample_bytree = next_x[4],
                             min_child_weight = next_x[5])
    xg_reg.fit(X, y)
    y_xgb = xg_reg.predict(X_test)
    return pe(y_xgb, y_test)[pe(y_xgb, y_test)!=np.inf].mean()

In [None]:
def optimize(dimensions, start_points, times, function):

    start_points, times = int(start_points), int(times)
    # initial_point_generator, njobs not found
    opt = Optimizer(list(dimensions.values()), base_estimator="gp", n_initial_points=start_points,
                    acq_func="gp_hedge", acq_optimizer="auto", random_state=0)

    loss_vector = np.zeros(times)
    params_list = []
    for j in range(len(opt.ask())):
        params_list.append(list())
    best_loss = 1e10
    for i in range(times):
        next_x = opt.ask()
        loss = function(next_x)
        res = opt.tell(next_x, loss)
        loss_vector[i] = loss
        for p in range(len(next_x)):
            params_list[p].append(next_x[p])
        if loss < best_loss:
            best_loss = loss
            best_params = next_x
        print(f"\rOptimization {100 * (i+1)/times}% completed. Best loss: {best_loss:.4}   ", end="")
    return best_loss, best_params, loss_vector, params_list

dimensions = {'n_estimators': Integer(1, 20, prior='uniform'),
              'max_depth': Integer(1, 5, prior='uniform'),
              'learning_rate': Real(0.1, 0.9, prior='uniform'),
              'alpha': Integer(5, 50, prior='uniform'),
              'colsample_bytree': Real(0.2, 0.8, prior='uniform'),
              'min_child_weight': Integer(1,100, prior='log-uniform')}

loss, params, loss_vector, params_vector = optimize(dimensions, 1e5, 1e3, xgb_loss)

In [None]:
dataset = DART2018.loc[:,['LMP_DA','Load','Wind','Previous_RT_MA','Hour','Day','LMP_RT','Minute of Day']]
hours = []
for time in dataset['Hour']:
    hours.append(time.hour)
dataset['Hour'] = np.array(hours)

train, test = train_test_split(dataset, test_size = 0.5)

train.index = train['Minute of Day']
X = train.iloc[:,0:5]
y = train.iloc[:,6]

In [None]:
data_dmatrix = xgb.DMatrix(data=X,label=y)

params = {"objective":"reg:linear",'colsample_bytree': 0.6,'learning_rate': 0.2,'max_depth': 5, 'alpha': 10}
#params = {"objective":"reg:linear",'max_depth': 4}
xg_reg = xgb.train(params=params, dtrain=data_dmatrix, num_boost_round=10)
xgb.plot_tree(xg_reg,num_trees=0,rankdir='LR')
plt.rcParams['figure.figsize'] = [50, 10]

model = XGBRegressor(max_depth=4)
model.fit(X, y)
fig, ax = plt.subplots(figsize=(30,30))
plot_tree(model, ax=ax, rankdir='LR')
plt.show()