In [8]:
from itertools import combinations
import numpy as np
import pandas as pd
from pathlib import Path
from pi_pact_sort import categorize
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import tensorflow as tf

DROP_COLUMNS = ['ADDRESS', 'TIMESTAMP', 'UUID', 'MAJOR', 'MINOR', 'TX POWER', 'TEMPERATURE',
                'PITCH', 'ROLL', 'YAW', 'SCAN']
FEATURE_COLUMNS = ['RSSI', 'HUMIDITY', 'PRESSURE']
SAMPLE_SIZE = 30000

# Initialize DataFrame
data: pd.DataFrame = pd.DataFrame(columns=['RSSI', 'DISTANCE', 'HUMIDITY', 'PRESSURE'])
data_copy: pd.DataFrame = data.copy()
csv_file: Path
for csv_file in Path('.').glob('indoor-noObstruct-SenseHat*/*.csv'):
    datapart: pd.DataFrame = pd.read_csv(csv_file)
    for column in DROP_COLUMNS:
        if column in datapart.columns:
            datapart = datapart.drop([column], 1)
    data_copy = data_copy.append(datapart)

# Categorize distance
data_copy['DISTANCE'] = data_copy['DISTANCE'].map(categorize)

# Sample data from each distance category
for value in data_copy['DISTANCE'].unique():
    datapart = data_copy[data_copy.DISTANCE == value]
    datapart = datapart.sample(SAMPLE_SIZE)
    data = data.append(datapart)

# Assign features and labels
min_max_scaler = preprocessing.MinMaxScaler()
X: pd.DataFrame = min_max_scaler.fit_transform(data.drop(['DISTANCE'], 1))
y: pd.Series = data['DISTANCE']
    
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

# Code below adapted from Tensorflow's "Build a linear model with Estimators" tutorial:
# https://www.tensorflow.org/tutorials/estimator/linear
feature_columns = []
for feature_name in FEATURE_COLUMNS:
    feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))

for feature1_name, feature2_name in combinations(FEATURE_COLUMNS, 2):
    feature_columns.append(tf.feature_column.crossed_column([feature1_name, feature2_name], hash_bucket_size=1000))

def make_input_fn(data_df, label_df, num_epochs=100, shuffle=True, batch_size=32):
    def input_function():
        ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
    if shuffle:
        ds = ds.shuffle(1000)
        ds = ds.batch(batch_size).repeat(num_epochs)
        return ds
    return input_function

train_input_fn = make_input_fn(X_train, y_train)
eval_input_fn = make_input_fn(X_test, y_test, num_epochs=1, shuffle=False)

linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)
linear_est.train(train_input_fn)
result = linear_est.evaluate(eval_input_fn)

UnboundLocalError: local variable 'ds' referenced before assignment