In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd

In [2]:
class Attendance:
    """
    Extracts data given a directory containing reference and transient datasets as .txt files.
    
    :param string directory:
        The directory in which all of the recorded datasets are stored.
        Default is "../streams".
    :param string reference_file:
        The name of the file intended to be used as the reference dataset.
        Default is "podium.txt".
    """
    def __init__(self, directory="streams", reference_file="podium.txt"):
        self.directory = directory
        self.reference_file = reference_file
        self.reference_data = self.dictify_data(self.reference_file)
        self.operational_data = self.dictify_op_data()
        self.location_scores = self.calc_loc_scores()


    def dictify_data(self, file):
        """
        Called by constructor to create reference data attribute.
        Pandas Documentation: https://pandas.pydata.org/pandas-docs/stable/reference/index.html
        
        :param string file:
            The name of the file that contains data to be cleaned.
            Must be inside of the object's directory
            e.g. "podium.txt" or "first_row.txt".

        :return:
            A dictionary containing MAC addresses as keys and their respective received signal 
            strength indicators as values. {"a1:b2:c3:d4:e5:f6": 0, ... "f6:e5:d4:c3:b2:a1": -95}
        :rtype: dict
        """
            
        # use the object's directory attribute, and function parameter for file
        file_path = f"{self.directory}/{file}"
        # convert the text data into a dataframe, while preserving first row
        df = pd.read_csv(file_path, header=None)
        df = df.iloc[:, 3:-1] # drop device id, latitude, longitude & date columns
        df = pd.concat([ # enter hacky list comprehension pandas concatenation
            df.iloc[:, col:col+2].rename(columns={ # index mac address and rssi column pairs
                col+3: "mac_address", # index for the current mac address column
                col+4: "average_rssi" # index for the current rssi column
            }) # the column indexes from the result previous iloc, starting at 3,
            # the columns could be aligned with some transposes and resetting the index instead
            for col in range(0, len(df.columns), 2) # perform concatenation for every pair
        ], ignore_index=True)

        df.replace(0, -95, inplace=True) # replace all zero values with the -95 sensitivity
        # group by mac address and calculate the average rssi for each unique mac address
        df = df.groupby("mac_address", as_index=False)["average_rssi"].mean()
        
        try:
            if self.reference_data:
                df = df[df["mac_address"].isin(self.reference_data)]
                if len(df.index) != len(self.reference_data):
                    indexes = []
                    for mac in self.reference_data:
                        if not df["mac_address"].str.contains(mac).any():
                            indexes.append([mac, -95])
                    df = df.append(pd.DataFrame(indexes, columns=["mac_address", "average_rssi"]))
        except:
            pass
        
        # sort by highest average rssi
        df.sort_values(by="average_rssi", ascending=False, inplace=True, ignore_index=True)
        df = df.head(20) # select the top 20 indexes after sorted
        
        # set the index column to the mac address and select the average rssi column,
        # allowing to_dict() to return in {mac_address: average_rssi} format
        dictified = df.set_index("mac_address")["average_rssi"].to_dict()
        return dictified


    def dictify_op_data(self):
        """
        Called by constructor to create the operational data attribute.
        OS Documentation: https://docs.python.org/3/library/os.html
        Pandas Documentation: https://pandas.pydata.org/pandas-docs/stable/reference/index.html
        
        :return:
            A 2-D dictionary containing file names as keys with dictionaries containing 
            MAC addresses and their respective received signal strength indicators as values. 
            {"file_1.txt": {"a1:b2:c3:d4:e5:f6": 0,  ... "f6:e5:d4:c3:b2:a1": -95}, ...
             "file_n.txt": {"b2:c3:d4:e5:f6:a1": -45,  ... "c3:d4:e5:f6:a1:b2": -90}}
        :rtype: dict
        """
        
        dictified = {}
        
        for file in os.listdir(self.directory): # iterate through provided directory
            if file.endswith(".txt") and file != self.reference_file:
                # use the object's directory attribute, and function parameter for file
                file_path = f"{self.directory}/{file}"
                # convert the text data into a dataframe, while preserving first row
                df = pd.read_csv(file_path, header=None)
                # create new dictionary key value pair matching format
                # {file_name: {mac_address: average_rssi}}
                dictified[file] = self.dictify_data(file)
        return dictified


    def calc_loc_scores(self):
        """
        Called by constructor to create the location scores attribute.
        Calculates the Euclidian Distance between reference and operational data.
        
        :return:
            A dictionary containing file names as keys and their respective calculated
            euclidian distances as values. {"file_1.txt": 50.125, ... "file_n": 93.725}
        :rtype: dict
        """
        
        location_scores = {}

        ref = np.array([self.reference_data[mac] for mac in sorted(self.reference_data)])
        for file, ms_pair in self.operational_data.items():
            op = np.array([ms_pair[mac] for mac in sorted(ms_pair)])
            euclidist = np.linalg.norm(ref - op)
            location_scores[file] = euclidist
        
        return location_scores

In [3]:
cr2055 = Attendance("input", "podium.txt")

In [4]:
cr2055.reference_data

{'5c:5a:c7:6e:d9:22': -53.59016393442623,
 '5c:5a:c7:6e:d9:23': -53.59016393442623,
 '5c:5a:c7:6e:d9:24': -53.622950819672134,
 '5c:5a:c7:6e:d9:21': -53.704918032786885,
 '5c:5a:c7:6e:d9:20': -54.60655737704918,
 '5c:5a:c7:6e:d9:2b': -56.950819672131146,
 '5c:5a:c7:6e:d9:2f': -56.950819672131146,
 '5c:5a:c7:6e:d9:2e': -56.950819672131146,
 '5c:5a:c7:6e:d9:2d': -56.950819672131146,
 '5c:5a:c7:6e:d9:2c': -56.967213114754095,
 'b2:7f:b9:95:cc:9a': -65.81967213114754,
 'b0:7f:b9:95:cc:99': -65.8688524590164,
 'b0:0c:d1:07:31:41': -66.60655737704919,
 '5c:5a:c7:5b:09:c2': -69.26229508196721,
 '5c:5a:c7:5b:09:c4': -69.27868852459017,
 '5c:5a:c7:5b:09:c3': -69.31147540983606,
 '5c:5a:c7:5b:09:c1': -69.49180327868852,
 '5c:5a:c7:5b:09:c0': -69.81967213114754,
 '5c:5a:c7:6b:76:cf': -69.98360655737704,
 '5c:5a:c7:6b:76:ce': -69.98360655737704}

In [5]:
cr2055.operational_data

{'second_row.txt': {'5c:5a:c7:6e:d9:2e': -40.24590163934426,
  '5c:5a:c7:6e:d9:2f': -40.24590163934426,
  '5c:5a:c7:6e:d9:2b': -40.24590163934426,
  '5c:5a:c7:6e:d9:2c': -40.24590163934426,
  '5c:5a:c7:6e:d9:2d': -40.24590163934426,
  '5c:5a:c7:6e:d9:23': -49.09836065573771,
  '5c:5a:c7:6e:d9:22': -49.09836065573771,
  '5c:5a:c7:6e:d9:24': -49.09836065573771,
  '5c:5a:c7:6e:d9:21': -49.14754098360656,
  '5c:5a:c7:6e:d9:20': -50.14754098360656,
  'b0:0c:d1:07:31:41': -65.85245901639344,
  '5c:5a:c7:5b:09:c3': -68.0327868852459,
  '5c:5a:c7:5b:09:c4': -68.04918032786885,
  '5c:5a:c7:5b:09:c1': -68.11475409836065,
  '5c:5a:c7:5b:09:c2': -68.1311475409836,
  '5c:5a:c7:5b:09:c0': -68.77049180327869,
  '5c:5a:c7:6b:76:cf': -71.18032786885246,
  '5c:5a:c7:6b:76:ce': -71.40983606557377,
  'b0:7f:b9:95:cc:99': -72.63934426229508,
  'b2:7f:b9:95:cc:9a': -72.70491803278688},
 'outside_classroom.txt': {'5c:5a:c7:6e:d9:24': -54.19672131147541,
  '5c:5a:c7:6e:d9:23': -54.22950819672131,
  '5c:5a:c7:

In [6]:
cr2055.location_scores

{'second_row.txt': 40.025002264020955,
 'outside_classroom.txt': 33.5160877430141,
 'outside_building.txt': 133.4636729266332,
 'first_row.txt': 53.748509746484714,
 'my_office.txt': 140.25095545658453,
 'fourth_row.txt': 28.796966008184594,
 'red_chairs.txt': 84.7195223888648,
 'third_row.txt': 25.19187854098107}