In [0]:
# import key libraries
import pyspark
from pyspark.sql.types import IntegerType, FloatType, DateType
import pandas as pd
import numpy as np
import datetime
from datetime import date
from datetime import datetime as dt
import pytz 
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.inspection import permutation_importance
from sklearn.model_selection import KFold
from scipy import stats
from sklearn.tree import DecisionTreeRegressor
from sklearn import tree
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge 
from sklearn.linear_model import Lasso
from sklearn import model_selection 
import matplotlib
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.mixture import GaussianMixture
from scipy.signal import find_peaks
from sklearn.feature_selection import VarianceThreshold

import warnings 
warnings.filterwarnings("ignore")

import os
os.system("")
class style():
    RED = '\033[31m'
    
# site and machine dictionary
# @Glori, please updatet the site_machine_dict when our data from other machine comes in: 

site_machine_dict = {'ay':['1a','2a','3a','4a','5a','6a'],
                    'beu':['15b'],
                   'cg':['5g','6g','7g'],
                   'gb':['10f','11f','12f','13f','14f','15f'],
                   'mp':['1m','2m','3m','4m','5m','6m','7m','8m'],
                   'ox':['1x','2x']}

# Convert dictionary to dataframe 
site_machine_df = pd.DataFrame (((i,j)for i in site_machine_dict.keys() for j in site_machine_dict[i]), columns = ['site','machines'])

# machine leanring model dictionary
model_dict={'LinearRegression':LinearRegression(),'Ridge':Ridge(),'Lasso':Lasso(),'RandomForestRegressor':RandomForestRegressor()}

def creat_sql(select_site, select_machine):
    """
    Function that create sql that allow users to interact with database
    Parameters:
    select_site: str - Site name user selected
    select_machine: str - Machine name user selected
    """
    # build machine_table 
    machine_table = '_'.join([select_site,select_machine,'timeseries'])
    # build data_table
    data_table = '.'.join(['groupdb_famc_energy_analytics',machine_table])
    # build sql 
    sql = 'select * from ' +data_table
    return sql

def create_widget(name, data, feature, type):
    """
    Function that creates widges in the notebook that allow users interact with the code
    Parameters:
    name: str - Name desired for the widget
    data: pd.DataFrame - dataframe containing the data needed to create the widget
    feature: str - name of the column to be used to extract the unique values from data
    """
    items = data[feature].unique()
    if type == 'dropdown':
        dbutils.widgets.dropdown(name, items[0], [x for x in items])
    elif type == 'multiselect':
        dbutils.widgets.multiselect(name, items[0], [x for x in items])
    else:
        print('Widget type not recognized')

def historical_data (data, n_months):
    """
    Helper function to get the historical data
    Parameters:
    data: pd.DataFrame - dataframe containing the data needed to retrieve historical data
    n_months: int - Number of months the user can track from historoical data, defaulted to be the last 18 months
    """
    today = date.today()
    
    #past_date  = today - pd.DateOffset(months=n_months)
    past_date  = today - pd.DateOffset(months=n_months)
    
    data = data[data['time_bucket_local'] >= str(past_date)]
    
    return data

def prod_codes_sorting(data):
    """
    Helper function that sort the prod_codes of one machine by Descending order via number of datapoints
    Parameters:
    data: pd.DataFrame - dataframe containing the data needed to retrieve prod_codes
    """ 
    prod_code = data.groupby('prod_code')['time_bucket_local'].count().reset_index().sort_values(by='time_bucket_local',ascending = False)
    return prod_code

def get_time_series_data (data,select_prod_codes):
    """
    Function that create training data based on the prod_codes that have been selected
    Parameters:
    data: pd.DataFrame - dataframe containing the data needed to create training data
    """ 
     # filter to only selected prod_codes
    data = data [data['prod_code'].isin(select_prod_codes)] 
    
    # get rid of redundant columns 
    time_series_data = data.iloc[:,2:]
    time_series_data.drop(['machine','time_bucket_utc'],axis = 1,inplace = True)
    
    return time_series_data   

def get_prod_data (data,select_prod_codes):
    """
    Function that create training data based on the prod_codes that have been selected
    Parameters:
    data: pd.DataFrame - dataframe containing the data needed to create training data
    select_prod_codes: list - list conatining the select_prod_codes 
    """
    # filter to only selected prod_codes
    data = data [data['prod_code'].isin(select_prod_codes)]  
    
    return data  

def remove_outlier(df):
    df = df.dropna()
    # define outlier function for each column 
    def outliers(df, ft): 
        
        lower_bound = df[ft].quantile(0.05)
        upper_bound = df[ft].quantile(0.99)
        
        ft_index = df.index[(df[ft]<lower_bound)|(df[ft]>upper_bound)]
        return ft_index
    
    remove_index =[]
    for col in df.columns:
        remove_index.extend(outliers(df, col))
        
    remove_index=sorted(set(remove_index))    
    df = df.drop(remove_index)    
    return df 

# The following section is for auto_visaulizatin DTreeReg_mix_gaussian_splits testing
def get_gmm_splits(data, variable):     
    """
    Function that splits the data base on the GMM algo. 
    Parameters:
    data: pd.DataFrame - dataframe containing the data needed to get peaks
    variable: str - the variable that for peak detection
    """ 
        
    # extract variable data 
    data = np.array(data[variable])
    # select the best n_numbers, looping from 1 to 3 >> edge case that max(data) == min (data), which means the variable is consant, then the best_n_components is defaluted to be 1 culster only.
    #compare with n_components best fit the data, ranging from 1 to 3.
    # based on the matric of gmm.bic Bayesian information criterion (BIC):This criterion gives us an estimation on how much is good the GMM in terms of predicting the data we actually have. The lower is the BIC, the better is the model to actually predict the data we have, and by extension, the true, unknown, distribution.

    if max (data) == min(data):
        best_n_components = 1
    else: 
        gmm_result = []
        for n_components in range(1,4):
            gmm = GaussianMixture(n_components).fit(data.reshape(-1, 1))
            gmm_result.append(gmm.bic(data.reshape(-1, 1)))
        best_n_components = gmm_result.index(min(gmm_result))+1
    
    # use the selected the best n_numbers to split the data base on GMM algo
    gmm = GaussianMixture(best_n_components).fit(data.reshape(-1, 1))
    
    # if there is only one Gaussian distribution, the split will just take the mean of distribution 
    if best_n_components == 1:
        gmm_splits = gmm.means_[0] 
    else:
    # if there is more than two Gaussian distributions, the splits will take the mean and weight and different distribution 
        gmm_df = pd.DataFrame({'weight':gmm.weights_,'means':gmm.means_.reshape(best_n_components,)})
        gmm_df = gmm_df. sort_values(by='means').reset_index(drop=True)

        for i in range(len(gmm_df)-1):
        # the split will consider the means and weight of different distributions 
            gmm_df.loc[i, 'splits'] = gmm_df.loc[i, 'means'] + (gmm_df.loc[i+1, 'means'] -gmm_df.loc[i, 'means'])*gmm_df.loc[i, 'weight']/(gmm_df.loc[i, 'weight']+gmm_df.loc[i+1, 'weight'])
        
        gmm_splits = list(gmm_df['splits'].dropna().values)
    # Return the gmm_splits    
    return gmm_splits  

def get_gmm_splits_number (data, variable): 
    
    """
    Function that splits the data base on the GMM algo and return the number of gaussian distributions.  
    Parameters:
    data: pd.DataFrame - dataframe containing the data needed to get peaks
    variable: str - the variable that for peak detection
    """
    # extract key_feature data 
    data = np.array(data[variable])
    
    # select the best n_numbers, looping from 1 to 3 >> edge case that max(data) == min (data), which means the variable is consant, then the gmm_splits_number is defaluted to be 1 culster only. 
    #compare with n_components best fit the data, ranging from 1 to 3.
    # based on the matric of gmm.bic Bayesian information criterion (BIC):This criterion gives us an estimation on how much is good the GMM in terms of predicting the data we actually have. The lower is the BIC, the better is the model to actually predict the data we have, and by extension, the true, unknown, distribution.
    
    if max (data) == min(data):
        gmm_splits_number = 1
    else: 
        gmm_result = []
        for n_components in range(1,4):
            gmm = GaussianMixture(n_components).fit(data.reshape(-1, 1))
            gmm_result.append(gmm.bic(data.reshape(-1, 1)))
        gmm_splits_number = gmm_result.index(min(gmm_result))+1
    #return the number of gaussian distributions.
    return gmm_splits_number

def DTreeReg_gmm_splits(data, target, variable):
    """
    Function that return tree spltis 
    Parameters:
    data: pd.DataFrame - dataframe containing the data needed for the algo. 
    target: str - the 'dependent' variable in terms of unsupervised learning. 
    variable: str - the 'independent' variable in terms of unsupervised learning. 
    """
    
    # DTreeReg_gmm_splits logic: 
    # if there are only ONE gaussian (gmm_splits_number ==1)detected, use unsupervised Tree Regression to split the dependent variable into two sets with max_depth =1
    # if there are none or more than 2 gaussian detected, use unsupervised Tree Regression to split the dependent variable into more than two sets with max_depth =2 
    
    gmm_splits_number = get_gmm_splits_number(data, variable)
    
    if gmm_splits_number ==1:
        DTreeReg = DecisionTreeRegressor(max_depth =1, min_samples_split = .2)
    else:
        DTreeReg = DecisionTreeRegressor(max_depth =2, min_samples_split = .2)        
        
    fit = DTreeReg.fit(data[[variable]],data[target])
    tree_splits = np.sort(DTreeReg.tree_.threshold[(DTreeReg.tree_.threshold >= 0)])
    
    return tree_splits

# The following section is for auto_visaulizatin DTreeReg_peak_splits testing
def get_peaks(data, variable):    
    """
    Function that return peaks
    Parameters:
    data: pd.DataFrame - dataframe containing the data needed to get peaks
    variable: str - the variable that for peak detection
    """
    # create histograme based on the input key_feature
    hist, bin_edges = np.histogram(data[variable],10)    
    bin_edges = bin_edges[1:]
    
    # Length of the available
    Length = len(data[variable])
 
    # peaks detection logic: 
    # 1) divide the dataset into 10 bins 
    # 2) for any bin that has more than 10% of dataset, is defined as peak (return as index) 
    # 3) return the first element of the peak_bin element as the peak     
    peaks, _ = find_peaks(hist, height=(Length*0.1,Length))
    
    # if there is peaks detected, return all the peaks, if none, return none
    if len(peaks)>0:        
        peaks = bin_edges[peaks]
    else:
        peaks = []
    return peaks

def DTreeReg_peak_splits(data,target,variable):    
    """
    Function that return tree spltis 
    Parameters:
    data: pd.DataFrame - dataframe containing the data needed for the algo. 
    target: str - the 'dependent' variable in terms of unsupervised learning. 
    variable: str - the 'independent' variable in terms of unsupervised learning. 
    """
    # call get_peaks function to get peaks detected from the independent variable
    peaks = get_peaks(data,variable)     
    peaks_number = len(peaks)
    
    # DTreeReg_peak_splits logic: 
    # if there are only ONE peak (peaks_number ==1)detected from get_peaks, use unsupervised Tree Regression to split the dependent variable into two sets with max_depth =1
    # if there are none or more than 2 peaks detected, use unsupervised Tree Regression to split the dependent variable into more than two sets with max_depth =2 
           
    if peaks_number ==1:
        DTreeReg = DecisionTreeRegressor(max_depth =1, min_samples_split = .2)
    else:
        DTreeReg = DecisionTreeRegressor(max_depth =2, min_samples_split = .2)
    # fit the Tree regression data    
    fit = DTreeReg.fit(data[[variable]],data[target])
    tree_splits = np.sort(DTreeReg.tree_.threshold[(DTreeReg.tree_.threshold >= 0)])    
    return tree_splits

# The following section is for auto_visaulizatin Plotting

def BW_plot (data,target,variable,tree_splits):
    """
    Function that return tree spltis 
    Parameters:
    data: pd.DataFrame - dataframe containing the data needed for the algo. 
    target: str - the 'dependent' variable in terms of unsupervised learning. 
    variable: str - the 'independent' variable in terms of unsupervised learning.
    tree_splits: list - the way the unsupervised tree algo. split the 'independent' variable.
    """
    # create empty lists to store data    
    conditions = [None]*(len(tree_splits)+1)
    cats = [None]*(len(tree_splits)+1)    
    #create a new dataframe for target and key_feature only 
    visual_df = data[[target,variable]]
    
    # BW splits logic: 
    # 1) If there is only one tree_splits (len(tree_splits)==1), split the data into two category base on the tree_splits
    # 2) If there is more than one tree_splits, split the data into multiple datasets base on the tree_splits
    
    # Helper function to detect whether certain category is outlier with total datapoint less than 10%
    # Reason, becasue the minimum split of trees is 10% 
    def outlier_detect(visual_df, condition):
        percentage = len(visual_df[condition])/len(visual_df)
        if percentage<0.1:
            outlier = ' (Outlier)'
        else: 
            outlier=''
        return outlier 
    
    if len(tree_splits)==1:        
        conditions[0] = (visual_df[variable] < tree_splits[0])
        cats[0] = '<'+str(round(tree_splits[0],2))
        conditions[1] = (visual_df[variable] >= tree_splits[0])
        cats[1] = '>='+str(round(tree_splits[0],2))    
    else:    

        for i in range(len(tree_splits)+1):
            if i == 0:                
                condition = visual_df[variable] < tree_splits[i] 
                #print(condition)
                conditions[i] = (condition)
                outlier = outlier_detect(visual_df, condition)
                cats[i] = '<'+str(round(tree_splits[i],2))+ outlier 
            elif i == len(tree_splits):                
                condition = visual_df[variable] > tree_splits[-1]
                conditions[i] = (condition)
                outlier = outlier_detect(visual_df, condition)
                cats[i]  = '>'+str(round(tree_splits[-1],2))+outlier                 
            else:
                condition = (visual_df[variable] < tree_splits[i]) & (visual_df[variable] >= tree_splits[i-1])
                conditions[i] = (condition)
                outlier = outlier_detect(visual_df, condition)
                cats[i] = str(round(tree_splits[i-1],2))+'-'+str(round(tree_splits[i],2))+outlier
                
    # conditions_result is used to categoraize targets based on cats setting
    conditions_result = np.select(conditions, cats)
    visual_df.insert(2,'Category',conditions_result) 
    
    ax = sns.boxplot(x='Category', y= target, data=visual_df, order = cats).set(title=variable)     
    # calculate and print out the average of each category 
    avg_df = visual_df.groupby('Category')[target].mean().reindex(cats).reset_index()
    print(avg_df)
    
def feature_distribution_splits(data,feature,splits):
    """
    Function that plot the histogram of interested feature with splits or peaks return from other function. 
    Parameters:
    data: pd.DataFrame - dataframe containing the data needed for the algo. 
    feature: str - the interested feature to plot as histogram
    splits: list - the splits / peaks need to be visualized from the histogram graph. 
    """    
    plt.figure()
    data[feature].hist(bins=100)
    
    for xc in splits:
        plt.axvline(x=xc,color='r',ls='--')
        
    plt.title(feature) 
    plt.show()
    
def feature_distribution(data,feature):
    """
    Function that plot the histogram of interested feature. 
    Parameters:
    data: pd.DataFrame - dataframe containing the data needed for the algo. 
    feature: str - the interested feature to plot as histogram 
    """
    plt.figure()
    data[feature].hist(bins=100)            
    plt.title(feature) 
    plt.show()

def time_series_plot (data,feature):    
    """
    Function that plot the time-series plot of interested feature with rolling average of the last 4 hours
    Parameters:
    data: pd.DataFrame - dataframe containing the data for plotting. 
    feature: str - the interested feature to plot  the time-series
    """
    df = pd.DataFrame(data[[feature,'time_bucket_local']])
    new_col_name = feature + ' moving average (24hr)'
    df[new_col_name] = df[feature].rolling(48).mean()
    
    #Addressing overlare axis issues pt1
    df['time_bucket_local'] = pd.to_datetime(df['time_bucket_local'])
   #n = len(df['time_bucket_local']) // min(int(n_month),8)
    n = len(df['time_bucket_local']) // 8

    # set figure size
    plt.figure( figsize = ( 12, 5))
    
    # plot a simple time series plot
    # using seaborn.lineplot()
    sns.lineplot(x = 'time_bucket_local',
             y = feature,
             data = df,
             label =feature)
  
    # plot using rolling average
    sns.lineplot(x = 'time_bucket_local', 
                 y = new_col_name,
                 data = df,
                 label = 'Average (24hrs)')
    #Creating readable axis part 2
    plt.xticks(df['time_bucket_local'][::n], rotation=45)


# The following section is for machine leanring model selection, feature importance calculating
def ranking_models(data,target,variables,model_dict):
    """
    Function that select & return the most accurated ML that could capture the relationship beween the dependent vs. independent variables. 
    Parameters:
    data: pd.DataFrame - dataframe containing the data for ML training. 
    target: str - dependent variable (y)
    variables: list - list of independent variables (X)
    model_dict: dictinary - dictionary of sets of machine leanring model candidates    
    """
    X = data[variables]
    y=  data[target]
    x_train, x_test, y_train, y_test=train_test_split(X,y,test_size=0.2)
    models =[item for item in model_dict.items()]    
    mape = []
    mse=[]
    names =[]     
    for name, model in models:
        kfold = model_selection.KFold(n_splits=5, random_state=7,shuffle=True)
        mape_results = model_selection.cross_val_score(model, x_train, y_train, cv=kfold,scoring = 'neg_mean_absolute_percentage_error')
        mse_results = model_selection.cross_val_score(model, x_train, y_train, cv=kfold,scoring = 'neg_root_mean_squared_error')
        mape.append(mape_results.mean())
        mse.append(mse_results.mean())
        names.append(name)
    
    mape = list(np.array(mape)*(-1)*100)
    mse = list(np.array(mse)*(-1))
    
    #accuracy_df = pd.DataFrame({'model':names, 'abs_mape_%':mape,'cv':cv})
    accuracy_df = pd.DataFrame({'model':names, 'abs_mape_%':mape,'abs_mse':mse})
    #abs_mape the lower the better
    accuracy_df  = accuracy_df.sort_values(by='abs_mape_%',ascending=True)
    
    selected_model = accuracy_df.iloc[0].model
    
    #print('Selected model is: '+ selected_model)
    #print()
   # print('Please wait for the modelling results...')
    
    return selected_model

def normalize(data):
    """
    Function that normalize the data for linear regression based model 
    Parameters:
    data: pd.DataFrame - dataframe containing the data for ML training. 
    """       
    normalized_data = data.copy()
    for feature_name in normalized_data.columns:
        max_value = normalized_data[feature_name].max()
        min_value = normalized_data[feature_name].min()
        normalized_data[feature_name] = (normalized_data[feature_name] - min_value) / (max_value - min_value)
   
    normalized_data = normalized_data.dropna(how='all',axis='columns')
    return normalized_data

def lasso_regression_feature_importance(data, target, variables):
    """
    Function that calculate the feature importances based on the k-fold lasso_regression
    Parameters:
    data: pd.DataFrame - dataframe containing the data for ML training. 
    target: str - dependent variable (y)
    variables: list - list of independent variables (X)
    """
    # normalized data 
    normalized_data = normalize(data)
    # remaining featues 
    remaining_features = normalized_data.columns.to_list()
    remaining_features.remove(target)    
    
    # Retrieve the target and independent variables 
    y= normalized_data[target]
    X= normalized_data[remaining_features]
    
        
    kf = KFold(n_splits=10)
    lr = Lasso(alpha=0.01)
    
    # Calculate the feature importance
    feature_importance_dict={}
    count = 1     
    for train, test in kf.split(X,y):
        lr.fit(X.iloc[train], y.iloc[train])
        lr_coef_list=lr.coef_.tolist()
        feature_importance=pd.DataFrame({'feature':remaining_features,'coefficient':lr_coef_list})
        feature_importance['feature_importance']=abs(feature_importance['coefficient'])
        feature_importance_dict[count]=feature_importance[['feature_importance','feature']]
        count += 1
    feature_importance_all=pd.concat(feature_importance_dict.values())
    
    feature_importance_all =feature_importance_all.groupby('feature').mean().sort_values('feature_importance',ascending= False).reset_index()
    
    feature_importance_all['index'] = -feature_importance_all['feature_importance']
    
    return feature_importance_all

def ridge_regression_feature_importance(data, target, variables):
    
    """
    Function that calculate the feature importances based on the k-fold ridge_regression
    Parameters:
    data: pd.DataFrame - dataframe containing the data for ML training. 
    target: str - dependent variable (y)
    variables: list - list of independent variables (X)
    """
    # normalized data 
    normalized_data = normalize(data)
    # remaining featues 
    remaining_features = normalized_data.columns.to_list()
    remaining_features.remove(target)    
    
    # Retrieve the target and independent variables 
    y= normalized_data[target]
    X= normalized_data[remaining_features]
    
        
    kf = KFold(n_splits=10)
    rr = Ridge(alpha=0.01)
    
    # Calculate the feature importance
    feature_importance_dict={}
    count = 1     
    for train, test in kf.split(X,y):
        rr.fit(X.iloc[train], y.iloc[train])
        rr_coef_list=rr.coef_.tolist()
        feature_importance=pd.DataFrame({'feature':remaining_features,'coefficient':rr_coef_list})
        feature_importance['feature_importance']=abs(feature_importance['coefficient'])
        feature_importance_dict[count]=feature_importance[['feature_importance','feature']]
        count += 1
    feature_importance_all=pd.concat(feature_importance_dict.values())
    
    feature_importance_all =feature_importance_all.groupby('feature').mean().sort_values('feature_importance',ascending= False).reset_index()
    
    feature_importance_all['index'] = -feature_importance_all['feature_importance']
    
    return feature_importance_all

def linear_regression_feature_importance(data, target, variables):
    
    """
    Function that calculate the feature importances based on the k-fold linear_regression
    Parameters:
    data: pd.DataFrame - dataframe containing the data for ML training. 
    target: str - dependent variable (y)
    variables: list - list of independent variables (X)
    """
    # normalized data 
    normalized_data = normalize(data)
    # remaining featues 
    remaining_features = normalized_data.columns.to_list()
    remaining_features.remove(target)    
    
    # Retrieve the target and independent variables 
    y= normalized_data[target]
    X= normalized_data[remaining_features]
    
        
    kf = KFold(n_splits=10)
    lr = LinearRegression()
    
    # Calculate the feature importance
    feature_importance_dict={}
    count = 1     
    for train, test in kf.split(X,y):
        lr.fit(X.iloc[train], y.iloc[train])
        lr_coef_list=lr.coef_.tolist()
        feature_importance=pd.DataFrame({'feature':remaining_features,'coefficient':lr_coef_list})
        feature_importance['feature_importance']=abs(feature_importance['coefficient'])
        feature_importance_dict[count]=feature_importance[['feature_importance','feature']]
        count += 1
    feature_importance_all=pd.concat(feature_importance_dict.values())
    
    feature_importance_all =feature_importance_all.groupby('feature').mean().sort_values('feature_importance',ascending= False).reset_index()
    
    feature_importance_all['index'] = -feature_importance_all['feature_importance']
    
    return feature_importance_all

def random_forest_feature_selection(data, target, variables):
    """
    Function that calculate the feature importances based on the k-fold random forest regression
    Parameters:
    data: pd.DataFrame - dataframe containing the data for ML training. 
    target: str - dependent variable (y)
    variables: list - list of independent variables (X)
    """
    
    X = data[variables]
    y=  data[target]
    kf = KFold(n_splits=10)
    rf = RandomForestRegressor(n_estimators=50) 
    
    # Calculate the feature importance
    feature_importance_dict={}
    count = 1
    for train, test in kf.split(X, y):
        rf.fit(X.iloc[train], y.iloc[train])
        feature_importance = pd.DataFrame(variables,rf.feature_importances_).reset_index()   
        feature_importance =feature_importance.sort_values('index',ascending=False).reset_index().rename({'index':'feature_importance', 0:'feature'},axis=1).reset_index()  
        feature_importance_dict[count]=feature_importance[['index','feature_importance','feature']]
        count += 1
    feature_importance_all=pd.concat(feature_importance_dict.values())
    feature_importance_all =feature_importance_all.groupby('feature').mean().sort_values('index',ascending=True).reset_index()
    return feature_importance_all

def feature_importance (feature_importance_all):    
    #print(feature_importance_all)
    #plt.figure( figsize = ( 12, 5))
    feature_importance_allt10 = feature_importance_all.head(10)
    #changing to all10 to limit to top 10
    max_index = max(feature_importance_allt10['index'])
    plt.figure(figsize=(10,10))
    plt.suptitle('Feature Importance')
    plt.barh(feature_importance_allt10['feature'],max_index-feature_importance_allt10['index'])
    plt.show()