In [6]:
import csv, sys, math, datetime, pickle
import matplotlib.pyplot as plt
import numpy

#data structure to store information on each location in relation to a person
class Location():
    def __init__(self, datetime, x, y):
        self.x = x
        self.y = y
        #coordinates is a list of all the close points included in this location
        self.coordinates = []
        self.coordinates.append([x,y])
        
        self.avgTimes = datetime.time()
        self.stdTimes = datetime.time()
        self.avgInterval = 0
        self.stdInterval = 0
        self.range = 0
        self.times = []
        self.intervals = []
        
        #initialized with a time
        self.addTime(datetime, x, y)
    
    #redefines x and y as the average of all nearby points
    def update(self):
        sumx = 0
        sumy = 0
        for coord in self.coordinates:
            sumx += coord[0]
            sumy += coord[1]
        self.x = sumx/len(self.coordinates)
        self.y = sumy/len(self.coordinates)
    
    #takes a datetime type and adds it to the list of times this location was visited
    def addTime(self, datetime, x, y):
        #we will update the new location center as an aggregate 
        self.coordinates.append([x,y])
        self.update()
        
        self.times.append(datetime)
        
    def analyze(self):
        #get the average time
        minutes = [(i.hour*60 + i.minute) for i in self.times]
        taverage = int(numpy.average(minutes))
        self.avgTimes = datetime.time(taverage/60, taverage%60)
        
        #get the standard deviation of times
        tstd = int(numpy.std(minutes))
        self.stdTimes = datetime.time(tstd/60, tstd%60)
        
        #calc range of visit times, is this one off, several weeks, months?
        self.range = self.times[-1] - self.times[0]
        
        #if there is only one instance then we can't have intervals and there's nothing to change
        if len(self.times) == 1:
            return
        
        #the times need to be sorted because I want to track successive visits
        self.times.sort()
        
        #create a list of the intervals between visits
        for i in range(len(self.times)-1):
            self.intervals.append(self.times[i+1] - self.times[i])
        
        #find the average interval between visits
        self.avgInterval = sum(self.intervals, datetime.timedelta(0)) / len(self.times)
        
        #get the variance to see how regular the interval is
        #Converts to second, gets the variance and then converts back to a timedelta
        self.stdInterval = datetime.timedelta(seconds=(numpy.std([time.total_seconds() for time in self.intervals])))
        
        
    def close(self, x, y):
        if (pow((self.x - x), 2) + pow((self.y - y), 2)) < 0.01:
            return True
        return False
        
            
#data structure to hold information for each person separately
class Person():
    def __init__(self, number):
        #given identity and and initialize dictionaries for storing location/time data
        self.identity = number
        self.locations = []
    
    #adds each data point to the locations table.
    def addPoint(self, time, x, y):
        #decide if the point should be added to a pre-existing spot
        for location in self.locations:
            if location.close(x, y):
                location.addTime(time, x, y)
                #this assumes it's not close to more than one pre-existing spot, 
                return
            
        #otherwise add a new location at x
        self.locations.append(Location(time, x, y))
        
    #run the analysis on all locations
    def analyze(self):
        for location in self.locations:
            location.analyze()

def createDate(timestamp):
    dateandtime = timestamp.split(" ")
    dateandtime[0] = dateandtime[0].split("-")
    dateandtime[1] = dateandtime[1].split(":")
    return datetime.datetime(int(dateandtime[0][0]), int(dateandtime[0][1]), int(dateandtime[0][2]), int(dateandtime[1][0]), int(dateandtime[1][1]), int(dateandtime[1][2]))
        

    
data = pickle.load(open("data.p", "rb"))


#used for printing data to a text file
fd = open("data.txt", "w")

for person in data:
    fd.write("\nUser #{}\n".format(person.identity))
    for i in range(len(person.locations)):
        fd.write("Location #{}\n".format(i))
        fd.write("Times Visited={}\n".format(len(person.locations[i].times)))
        fd.write("coordinates={}{}\n".format(person.locations[i].x, person.locations[i].y))
        fd.write("Average time visited={}\n".format(person.locations[i].avgTimes))
        fd.write("Standard deviation of times={}\n".format(person.locations[i].stdTimes))
        fd.write("Average interval between visits={}\n".format(person.locations[i].avgInterval))
        fd.write("Standard deviation of intervals={}\n".format(person.locations[i].stdInterval))
        fd.write("Range of times={}\n\n".format(person.locations[i].range))
        
    
fd.close()

'''

#used for individual person location graphs
#for person in data:
        #xy = person.locations
        #x = []
        #y = []
        #for i in xy:
            #x.append(i.x)
            #y.append(i.y)
        #sctTest = plt.scatter(x, y, color='#900C3F', linewidth=0.1)
        #plt.xlabel('x coordinate')
        #plt.ylabel('y coordinate')
        #plt.axis([-0.4,1.4,-0.4,1.4])
        #plt.savefig(('plots\\' + str(person.identity) + 'graph.png'), bbox_inches='tight', dpi=900)
        
        #plt.cla()
        #plt.clf()
        
#used for aggregate graph
#x = []
#y = []
#for person in data:
#        xy = person.locations
#        for i in xy:
#            x.append(i.x)
#            y.append(i.y)
#
#sctTest = plt.scatter(x, y, color='#900C3F', linewidth=0.1)
#plt.xlabel('x coordinate')
#plt.ylabel('y coordinate')
#plt.axis([-0.4,1.4,-0.4,1.4])
#plt.savefig(('plots\\' + 'allgraph.png'), bbox_inches='tight', dpi=900)
        
#plt.cla()
#plt.clf()
'''
class LocationsMaster():
    def __init__(self, location, identity):
        self.x = 0
        self.y = 0
        
        #coordinates is a list of all the close points included in this location
        self.coordinates = []
        
        self.people = []
        self.avgTimes = []
        self.stdTimes = []
        self.avgIntervals = []
        self.stdIntervals = []
        self.ranges = []
        
        self.addPerson(location, identity)

    def update(self):
        sumx = 0
        sumy = 0
        for coord in self.coordinates:
            sumx += coord[0]
            sumy += coord[1]
        self.x = sumx/len(self.coordinates)
        self.y = sumy/len(self.coordinates)
        
    def addPerson(self, location, identity):
        self.coordinates.extend(location.coordinates)
        self.update()
        
        if identity not in self.people:
            self.people.append(identity)
        self.avgTimes.append(location.avgTimes)
        self.stdTimes.append(location.stdTimes)
        self.avgIntervals.append(location.avgInterval)
        self.stdIntervals.append(location.stdInterval)
        self.ranges.append(location.range)
        
    def close(self, x, y):
        if (pow((self.x - x), 2) + pow((self.y - y), 2)) < 0.01:
            return True
        return False
    
#create one masterlist of locations
master = []
for person in data:
    for location in person.locations:
        found = False
        for mlocation in master:
            if mlocation.close(location.x, location.y):
                mlocation.addPerson(location, person.identity)
                #this assumes it's not close to more than one pre-existing spot, 
                found = True
            
        #otherwise add a new location at x
        if (not found):
            master.append(LocationsMaster(location, person.identity))
            
for i in master:
    print i.people

'''
#used for box plots of average times at locations
c = 0
for location in master:
    x = []
    for i in location.avgTimes:
        x.append(i.hour)

    sctTest = plt.hist(x, label="histogram of average time for location {}".format(c))
    plt.xlabel('average time')
    plt.ylabel('occurances')
    plt.savefig(('plots\\' + 'avg_times_hist'+ str(c) + '.png'), bbox_inches='tight', dpi=900)
    c += 1
    
    plt.cla()
    plt.clf()
'''
        



[0, 1, 2, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 29, 30, 31, 32, 35, 36, 37, 39, 40, 41, 42, 43, 45, 46, 47, 48, 49, 50, 51, 52, 53, 55, 57, 59, 60, 62, 64, 66, 67, 72, 73, 74, 75, 78, 80, 81, 82, 83, 86, 87, 88, 89, 90, 91, 92, 95, 96, 98, 99]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 15, 16, 17, 20, 21, 22, 24, 26, 27, 29, 31, 32, 35, 36, 37, 38, 40, 41, 42, 45, 46, 47, 49, 50, 51, 52, 53, 56, 57, 59, 61, 62, 63, 64, 65, 67, 68, 70, 73, 76, 78, 79, 80, 81, 82, 84, 85, 86, 88, 89, 90, 91, 92, 94, 95, 96, 97, 98, 99]
[0, 1, 2, 3, 4, 8, 9, 14, 16, 17, 19, 20, 21, 22, 24, 25, 27, 29, 30, 33, 35, 39, 40, 41, 42, 43, 44, 45, 47, 49, 50, 53, 54, 56, 62, 63, 64, 66, 67, 70, 71, 77, 81, 82, 84, 85, 87, 89, 91, 92, 93, 94, 95]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, 17, 18, 20, 21, 22, 23, 26, 27, 31, 34, 35, 36, 37, 38, 39, 41, 43, 46, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 60, 61, 62, 64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 76, 79, 80, 84

'\nc = 0\nfor location in master:\n    x = []\n    for i in location.avgTimes:\n        x.append(i.hour)\n\n    sctTest = plt.hist(x, label="histogram of average time for location {}".format(c))\n    plt.xlabel(\'average time\')\n    plt.ylabel(\'occurances\')\n    plt.savefig((\'plots\\\' + \'avg_times_hist\'+ str(c) + \'.png\'), bbox_inches=\'tight\', dpi=900)\n    c += 1\n    \n    plt.cla()\n    plt.clf()\n'