In [56]:
import itertools
import pandas
import networkx
import matplotlib.pyplot as pyplot

# Fix for matplotlib w/ Python3
#from cairocffi import *

from numpy import abs,mean,isnan

In [2]:
# Import our stationary data.
data = pandas.read_csv('data/raw_data.csv')

In [14]:
# Calculate the distance metric between antennas and items as C * 1/counts
data['dist'] = data['count'].map(lambda x: 1000.0 / x)

# Filter out items that have a very large distance
data = data[data['dist'] < 500]

# Filter out weird items with serial number = 0
data = data[data['serial_number'] != 0]

# Get the set of antennas
antennas = set(data['antenna_id'])

# Create a map of items to data involving that item.
items = set([(datum[0], datum[1]) for datum in data.values])
item_data = {item: [] for item in items}

for datum in data.values:
    item = (datum[0], datum[1])
    antn = datum[2]
    dist = datum[4]
    
    item_data[item].append((antn, dist))

In [36]:
class PairMapper:
    def __init__(self, antennas):
        # All pairings of antennas
        pairs = list(itertools.combinations(antennas, 2))
        
        # Each pairing has a list of minimum distances and maximum distances, and some distances to calculate later.
        self.pair_data = { 
            pair: {
                'min': [], 
                'max': [], 
                'avg_min': None, 
                'avg_max': None,
                'dist': None
            } for pair in pairs }
        
        # Boolean for whether or not the proper averages are calculated
        self.__averages_calculated = False
        
    def process(self, item_scans):
        """
        Takes a list of tuples (antenna, distance) that report scans of a single item by multiple antennas.
        This function finds all pairings of antennas from this list and contributes a single entry into the
        min and max lists that are the difference and sum, respectively, of the two distances reported by the
        pair of antennas.
        """
        # Everytime we process new data, the averages must be recalculated
        self.__averages_calculated = False
        
        num_scans = len(item_scans)
        
        for i in range(num_scans):
            # First antenna
            ant_x, dist_x = item_scans[i]
            
            # Each pairing of antenna
            for j in range(i+1, num_scans):
                ant_y, dist_y = item_scans[j]
                
                if ant_y == ant_x:
                    print(item_scans)
                
                # Need to wrap this in a try-catch because the antennas might not be in the right order in the key.
                try:
                    # Add the min and max distances
                    self.pair_data[(ant_x, ant_y)]['min'].append(abs(dist_x - dist_y))
                    self.pair_data[(ant_x, ant_y)]['max'].append(dist_x + dist_y)
                except KeyError:
                    # Add the min and max distances
                    self.pair_data[(ant_y, ant_x)]['min'].append(abs(dist_x - dist_y))
                    self.pair_data[(ant_y, ant_x)]['max'].append(dist_x + dist_y)
                    
    def calculate_averages(self):
        # Don't recalculate if not needed
        if self.__averages_calculated:
            return self.__get_avg_data()
        
        for pair in self.pair_data.keys():
            mins = self.pair_data[pair]['min']
            maxs = self.pair_data[pair]['max']
            
            # Some pairings have no data
            if len(mins) == 0 or len(maxs) == 0:
                continue
            
            avg_min = mean(mins)
            avg_max = mean(maxs)
            
            self.pair_data[pair]['avg_min'] = avg_min
            self.pair_data[pair]['avg_max'] = avg_max
            
            self.pair_data[pair]['dist'] = mean([avg_min, avg_max])
            
            # We just calculated the averages
            self.__averages_calculated = True
            
        # Return the averages
        return self.__get_avg_data()
            
    def get_distances(self):
        return [(pair, pair_dict['dist']) for (pair, pair_dict) in self.pair_data.items()]
    
    def __get_avg_data(self):
        return [
            (pair, pair_dict['avg_min'], pair_dict['avg_max'], pair_dict['dist']) 
            
            for (pair, pair_dict) in self.pair_data.items()
        ]

In [37]:
# Create an instance of a pair mapper
mapper = PairMapper(antennas)

# Process each item's data
for (item, item_scans) in item_data.items():
    mapper.process(item_scans)

In [43]:
# Calculate the average distances.
pair_dists = pandas.DataFrame(mapper.calculate_averages(), columns=['antenna_pair', 'avg_min', 'avg_max', 'avg_dist'])

# Filter out pairings with no data (the numbers are NaN and NaN != NaN)
pair_dists = pair_dists[pair_dists['avg_dist'] == pair_dists['avg_dist']]

pair_dists

Unnamed: 0,antenna_pair,avg_min,avg_max,avg_dist
0,"(217, 1821)",40.914221,42.419112,41.666667
2,"(1213, 925)",134.786481,245.250149,190.018315
7,"(1625, 1817)",152.272727,364.393939,258.333333
9,"(528, 1505)",13.888889,97.222222,55.555556
12,"(530, 531)",121.094167,158.750001,139.922084
16,"(129, 1728)",98.389694,101.610306,100.000000
18,"(1323, 1004)",131.560140,190.603223,161.081681
22,"(1322, 430)",127.055690,228.328926,177.692308
23,"(617, 1217)",165.517241,234.482759,200.000000
24,"(1211, 1301)",91.333710,129.868017,110.600864


In [46]:
g = networkx.Graph()
g.add_nodes_from(antennas)

edges = [(edge[0][0], edge[0][1], edge[3]) for edge in pair_dists.values]
g.add_weighted_edges_from(edges)

In [57]:
pos = networkx.graphviz_layout(g, prog='neato')
networkx.draw_networkx_nodes(g, pos, nodelist=antennas, node_color = 'b', node_size=100, alpha=0.8)
networkx.draw_networkx_edges(g, pos, width=1,alpha=0.5)
pyplot.show()

NotImplementedError: Surface.create_for_data: Not Implemented yet.

NotImplementedError: Surface.create_for_data: Not Implemented yet.

NotImplementedError: Surface.create_for_data: Not Implemented yet.

  GLib.source_remove(self._idle_event_id)
