# Gradient Boosted Trees

In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import pickle
from sklearn.model_selection import GridSearchCV
from xgboost.sklearn import XGBClassifier

DROP_COLUMNS = ['ADDRESS', 'TIMESTAMP', 'UUID', 'MAJOR', 'MINOR', 'TX POWER', 'TEMPERATURE',
                'PITCH', 'ROLL', 'YAW', 'SCAN']
SAMPLE_SIZE = 30000


"""Trains a Gradient Boosted Trees classifier to predict a distance range given RSSI values and other variables.
"""

# Initialize DataFrame
data: pd.DataFrame = pd.DataFrame(columns=['RSSI', 'DISTANCE', 'HUMIDITY', 'PRESSURE'])
data_copy: pd.DataFrame = data.copy()
csv_file: Path
for csv_file in Path('.').glob('indoor-noObstruct-SenseHat*/*.csv'):
    datapart: pd.DataFrame = pd.read_csv(csv_file)
    for column in DROP_COLUMNS:
        if column in datapart.columns:
            datapart = datapart.drop([column], 1)
    data_copy = data_copy.append(datapart)

# Categorize distance
data_copy['DISTANCE'] = data_copy['DISTANCE'].map(categorize)

# Sample data from each distance category
for value in data_copy['DISTANCE'].unique():
    datapart = data_copy[data_copy.DISTANCE == value]
    datapart = datapart.sample(SAMPLE_SIZE, random_state=1)
    data = data.append(datapart)

# Assign features and labels
X: np.array = data.drop(['DISTANCE'], 1).to_numpy()
y: np.array = data['DISTANCE'].to_numpy(dtype=int)
    
grid = GridSearchCV(XGBClassifier(), {'random_state': [1, ],
                                      'max_depth': np.arange(2, 11),
                                      'learning_rate': np.linspace(0.1, 1.0, 10),
                                      'gamma': np.linspace(0.1, 1.0, 10),
                                      'lambda': np.linspace(0.5, 1.5, 10)
                                      }, n_jobs=1)

In [None]:
print(grid.best_params_)
print('accuracy =', grid.best_score_)