# GPS Summary & Steepness Calculator
Cameron Kennedy, Fall 2017

## Notes
This is the Jupyter Notebook used to create the program before porting it over to a command line .py file.  Though the main program is in the next cell, there's a significant amount of fiddling around with code and other thoughts below that.

In [14]:
## [Cameron Kennedy]
'''### Program Overview ###
This program will take one or multiple GPX files, and provide summary information on them that I’ve found
difficult to obtain from existing software, specifically, both steepness analysis of a single event, and
analysis comparing multiple events.

For those unfamiliar, a GPX file (GPS Exchange Format) is an XML file generated from GPS data – often from fitness 
devices such as watches, phones, or dedicated devices (e.g., a cycling GPS) – when a person records a journey 
(typically an athletic event such as running, cycling, or hiking).  Most devices either store their data natively 
in GPX format, or can export to GPX.

Complexities of this program include classes calling other classes, the inherent imprecise nature of GPS data  
(especially with altitude data) and thus writing the necessary smoothing / approximating functions, working with the 
file system, and file sizes (GPX files I use are typically hundreds of kilobytes, which is fairly small but in 
aggregate could be a bit slow).

Users are required to supply their own GPX files, though several are included for testing purposes in folders d1 and 
d2 (located in the program's main folder).'
'''
from datetime import datetime, timedelta
import numpy as np
import os  #Used for file & directory access
import gpxpy  #GPX parser
#If the gpxpy package is missing, the Anaconda installation command is: conda install -c conda-forge gpxpy
from sklearn.cluster import DBSCAN  #Used for clustering similar events
from operator import itemgetter

print('Welcome to the GPS Summary & Steepness Calculator!')
print()
print('***** W200 Instructors/Graders: Note, several sample GPX files for testing are located in '
      + 'sub-folders d1 and d2. *****')
print()

class Menu:
    '''Main menu function. Prompts the user to load a file, a folder, analyze the loaded data, or exit the program.'''
    def __init__(self):
        self.choice = 0
        self.choice_errormsg = '  INPUT ERROR! Please enter a valid main menu choice: 1, 2, 3 or 4.'
        self.data_to_analyze = None
        self.display_menu()
    
    def display_menu(self):
        '''Prints the menu, prompts users for choice, displays currently loaded file(s), and handles errors.'''
        #Print menu
        while self.choice != 4:
            if str(self.data_to_analyze) == 'None':
                self.data_to_analyze = None
            print()
            print('MAIN MENU')
            print('Currently loaded file / folder: ' + str(self.data_to_analyze)) #Print loaded files, or None
            print('Please choose from the following options:')
            print('  1. Load Single GPX File.')
            print('  2. Load Multiple GPX Files.')
            print('  3. Analyze Data.')
            print('  4. Quit the program.')

            try:
                self.choice = int(input('  Enter your choice here: '))
            except:
                self.choice = 0  #Resets to 0. Actual error message handled in 'else' statement below
            
            if self.choice == 1:
                print('\n### Load Single GPX File selected. ###')
                self.load_single_file()
            elif self.choice == 2:
                print('\n### Load Multiple GPX Files selected. ###')
                self.load_multi_file()
            elif self.choice == 3:
                print('\n### Analyze Data selected. ###')
                Analyze_Data(self.data_to_analyze)
            elif self.choice == 4:
                print('Goodbye!')
            else:
                print(self.choice_errormsg)

    def load_single_file(self):
        '''Solicits then passes a filename string (valid or invalid) to the Single_Event class.'''
        print('Please enter a file name. Absolute or relative path names are accepted.')
        filename = str(input())
        self.data_to_analyze = Single_Event(filename)
        
    def load_multi_file(self):
        '''Solicits then passes a directory string (valid or invalid) to the Multi_Event class.
        Directories with only 1 GPX file are convered to a Single_Event class.
        '''
        #For all files in folder, call Multi_Event to add them to a list.
        print('Please enter a directory (i.e., folder) name. Absolute or relative path names are accepted.')
        dir_name = str(input())
        self.data_to_analyze = Multi_Event(dir_name)
        if len(self.data_to_analyze.gpx_file_list) == 0:  #Convert to Single_Event class if only one event in list.
            self.data_to_analyze = None
        elif len(self.data_to_analyze.gpx_file_list) == 1:  #Convert to Single_Event class if only one event in list.
            print('Only one GPX file found in this folder. Treating as single file (Single_Event class).')
            self.data_to_analyze = self.data_to_analyze.gpx_file_list[0]

class Single_Event:
    '''Takes a filename (and path, optionally) and 'returns' a parsed GPX file in its self.event attribute.'''
    def __init__(self, file_loc):
        self.file_loc = file_loc
        self.event = None
        self.load_file()  #Learned __init__ methods can't return things (other than None), so calling separate method
    
    def load_file(self):
        '''Loads a single file.'''
        
        try:
            if self.file_loc[-4:] != '.gpx':
                raise Exception()
            gpx_file = open(self.file_loc, 'r')  #r for read-only
            self.event = gpxpy.parse(gpx_file)  #This is where gpxpy does its magic of interpreting the gpx xml code.
            self.event.adjust_time(timedelta(hours=-7))  #Offsets event times 7 hours from GMT.
            #A future version could include local time of the event, but it doesn't appear to be stored in the GPX file.
            gpx_file.close()
            
        except:
            print('Invalid file. Please enter a valid GPX file.')
            self.file_loc = None
            return None
            
    def __str__(self):
        if self.file_loc is not None:
            return str('Single Event: File = ' + os.path.basename(self.file_loc))  #Print file, not full path.
        else:
            return 'None'

class Multi_Event:
    '''Takes a directory and 'returns' a list of multiple Single_Event objects.'''
    def __init__(self, dir_loc):
        self.dir_loc = dir_loc
        self.gpx_file_list = []
        self.load_dir()
        
    def load_dir(self):
        '''Checks to ensure a valid directory is passed in, and if so, loops through every file to include all
        .gpx files as Single_Event objects in a list. Also sorts the list by event time, in descending order.
        '''
        if os.path.isdir(self.dir_loc): #Check if valid folder
            i = 0
            for fn in os.listdir(self.dir_loc):  #Loop through items in folder
                if fn.endswith('.gpx'):  #If it's a GPX file ...
                    i += 1
                    #Makes list of Single_Event objects by calling the Single_Event class.
                    try:
                        self.gpx_file_list.append(Single_Event(os.path.abspath(self.dir_loc) + '\\' + fn))
                    except:
                        print('BOGUS GPX FILE!')
                        i -= 1
                        continue
                    if i % 10 == 0:  #Give status every 10 files.
                        print('{} files loaded so far!'.format(i))
            print('{} total files loaded!'.format(i))
            if len(self.gpx_file_list) == 0:
                print('No GPX files found in this folder. Please choose a different folder.')
                return None
            else:  #Learned nifty way to sort list by attribute of object in list!
                self.gpx_file_list.sort(key=lambda obj:obj.event.time, reverse=True)
                self.group_like_events()
                
        else:
            print('Invalid directory selected. Please enter a valid directory. Returning to Main Menu.')
            self.dir_loc = None
            return None
         
    def __str__(self):
        return str('Multi_Event loaded, folder: ' + self.dir_loc)
            
    def group_like_events(self):
        '''Clusters like events together.  Then prompts the user with a list of events from which to choose.
        Then reduces the Multi_Event list (self.gpx_file_list) to only the events in that group.
        '''
        print('\nGrouping Events ...')
        
        #Build array of start_lat, start_lon, end_lat, end_lon, dist
        data_to_cluster = []
        items_to_remove = []
        i = -1
        event_dist_scale = 100000  #Used to scale event distance when clustering.
        for SE in self.gpx_file_list:  #SE stands for an instance of the Single_Event class.
            distance = 0
            duration = 0
            i += 1
            try:  #These occasionally fail, so wrapping them in a try statement and eliminating event upon failure.
                distance = Conversions(SE.event.length_2d()).meters_to_miles()
                duration = SE.event.get_duration()
            except:
                items_to_remove.append(i)
                continue
            if distance < 0.25 or duration < (5 * 60):  #Exclude events < 0.25 miles or < 5 min.
                items_to_remove.append(i)                
            else:
                data_to_cluster.append([SE.event.length_2d() / event_dist_scale,  
                                            #Convert meters to smaller scale for clustering
                                        SE.event.get_points_data()[0].point.latitude,
                                        SE.event.get_points_data()[0].point.longitude,
                                        SE.event.get_points_data()[-1].point.latitude,
                                        SE.event.get_points_data()[-1].point.longitude,])
        print('Removing {} events because they were too short or failed to load.'.format(len(items_to_remove)))
        if items_to_remove != []:
            items_to_keep = set(range(len(self.gpx_file_list))) - set(items_to_remove)
            self.gpx_file_list = itemgetter(*items_to_keep)(self.gpx_file_list)
        
        if len(data_to_cluster) == 0:
            print('The selected directory does contains no valid gpx files. Nothing to analyze. Returning to Main Menu.')
            self.gpx_file_list = []
        else:
            db = DBSCAN(eps=0.005, min_samples=2).fit(np.array(data_to_cluster)) #Clustering algorithm
            '''This is where the clustering "magic" happens.  The eps variable took a fair amount of tweaking to
            get right, along with manually scaling the distance variable to be a similar (roughly) scale as the
            lat & lon variables.  This approach is admittedly not perfect, but it works.  Also, this clustering
            algorithm will fail near the North and South Poles due to converging longitudes; I can live with that.
            Set min_samples=2 to allow for groups as small as 2 events.
            '''

            labels = list(db.labels_)  #List of all events and their groups.
                #Makes a list of events from 0 to groups_count. -1 indicates an individual event (unique, not clusetered).
            indiv_events_count = labels.count(-1)
            total_events_count = len(labels)
            groups_count = max(labels) + 1
            print('You have {} total events. '.format(len(labels)))
            if groups_count == 0:
                print('None of them are the same event. Nothing to analyze. Returning to Main Menu.')
                self.gpx_file_list = []
            else:
                print('{} are unique events (not grouped), with the remaining '.format(indiv_events_count)
                      + '{} clustered into {} groups.'.format(total_events_count - indiv_events_count, groups_count)
                     )
                print('Here are the groups of events:')

                for i in range(groups_count):
                    distance = Conversions(data_to_cluster[labels.index(i)][0] * event_dist_scale).meters_to_miles()
                    print('  Event Group {}: {} events, {:.2f} mi., '.format(i+1, labels.count(i), distance)
                          + 'Most recent: {:%D %H:%M:%S}'.format(self.gpx_file_list[labels.index(i)].event.get_time_bounds()[0])
                         )

                input_text = ('Please enter the number of the group you would like to analyze.\n' +
                             'Enter 0 to return to the main menu.')
                print('\n' + input_text)
                #I considered an option to analyze all events, but the analysis becomes nonsensical, so I opted against it.
                
                #Whittle down list of events to only those in selected groups:
                x = None
                while x not in range(groups_count + 1):
                    try:
                        x = int(input())
                        if x == 0:
                            self.gpx_file_list = []
                        elif 1 <= x <= groups_count:                            
                            indicies = [index for index, value in enumerate(labels) if value == x-1]
                            #* lets itemgetter accept a list.
                            self.gpx_file_list = itemgetter(*indicies)(self.gpx_file_list)  
                        else:
                            raise Exception()
                    except:
                        x = None
                        print('\nInvalid input. ' + input_text)
        
class Analyze_Data:
    '''Takes a Single_Event or Multi_Event object as input.  For Single_Event objects, it prints summary statistics
    including a steepness breakdown.  For Multi_Event objects, it prints summary statistics along with a listing
    of each event, including a comparison to the best and median times for that event. 
    '''
    def __init__(self, obj_to_analyze):
        print('\nAnalyzing Data ...')
 
        self.obj_to_analyze = obj_to_analyze
        
        if isinstance(self.obj_to_analyze, Single_Event):
            self.analyze_single()
        elif isinstance(self.obj_to_analyze, Multi_Event):
            self.analyze_multi()
        else:
            print('No data found or wrong type of data loaded. Please reload data and try again.')

    def analyze_single(self):
        '''Analyze a Single_Event instance. Calculates summary stats and steepness bands.'''
        print('Analyzing Single Event.')
        event = self.obj_to_analyze.event
        
        #CALCULATE SUMMARY STATS.
        #Grab summary stats
        start_end_times = event.get_time_bounds()
        duration = event.get_duration()
        distance = Conversions(event.length_2d()).meters_to_miles()
        asc_des = event.tracks[0].get_uphill_downhill()
        asc = Conversions(asc_des[0]).meters_to_feet()
        des = Conversions(asc_des[1]).meters_to_feet()
        
        #Print summary stats
        print('Here\'s a summary of your event.')        
        print('  Start Date & Time: {:%D %H:%M:%S}.'.format(start_end_times[0]))
        print('  End Date & Time: {:%D %H:%M:%S}.'.format(start_end_times[1]))
        print('  Duration: {:%H:%M:%S}.'.format(Conversions(duration).sec_to_datetime()))
        print('  Distance: {:.2f} miles.'.format(distance))
        print('  Average Speed: {:.1f} mph.'.format(distance / (duration/3600)))
        print('  Elevation Ascent / Descent: {:,.0f} ft. / {:,.0f} ft.'.format(asc, des))
        
        #CALCULATE STEEPNESS GRADES
        #Bucket into 10 groups, symmetrically distributed around 0.
        dists = []
        grades = []
        smooth_dist_thresh = 5  #Length in meters.
        '''Had to tweak this several times to get a good value for smoothing.'''

        #Grab points
        '''Makes a new list of points that are at least smooth_dist_thresh apart.'''
        prev_item = event.get_points_data()[0]
        for item in event.get_points_data():
            if item.point.distance_2d(prev_item.point) > smooth_dist_thresh:  #Serves as a smoothing function.
                distance = item.point.distance_2d(prev_item.point)
                dists.append(distance)
                
                #Calculate all grades
                ele_change = item.point.elevation - prev_item.point.elevation
                if distance > 0:
                    grades.append(ele_change / distance)
                else:
                    grades.append(0)
                prev_item = item
        
        #Also a smoothing component, to remove the most extreme grades (since they're typically erroneous).
        #Gets the indicies of the top and bottom grades, then removes those items from both grades and lists.
        pct_thresh = 5
        lower_pct = np.percentile(grades, pct_thresh)
        upper_pct = np.percentile(grades, 100 - pct_thresh)
        indices_to_omit = []
        i = 0        
        for item in grades:
            if item > upper_pct or item < lower_pct:
                indices_to_omit.append(i)
            i += 1
        
        items_to_keep = set(range(len(grades))) - set(indices_to_omit)
        grades = itemgetter(*items_to_keep)(grades)
        dists = itemgetter(*items_to_keep)(dists)
        
        if len(dists) != len(grades):
            print('WARNING! Distance and Grade list lengths aren\'t the same.')
    
        #Get min/max grade; calculate upper grade boundary (5%, 10%, 15%, 20%, etc.; use absolute grade)
        max_abs_grade = abs(max(grades, key=abs))
        #Round to the nearest 0.05 higher than highest absolute grade
        max_grade_bound = (((max_abs_grade*100)//5)*5 + 5)/100
        grade_incr = max_grade_bound / 5
        
        #Make list of 10 lists.
        dists_by_grade = []
        [dists_by_grade.append([]) for dummy in range(10)]
            
        #Assign distances to buckets. Loop through grades; determine which index (0-9); apply index for dists
        i = 0
        for grade in grades:
            #Math to determine bucket; better than looping
            index = int(10 - 10*(grade+max_grade_bound)/(max_grade_bound*2))
            dists_by_grade[index].append(dists[i])
            i += 1
        
        dist_grades = sum(sum(i) if isinstance(i, list) else i for i in dists_by_grade) #Quickly sums 2-level list.
        '''Important to sum distance associated with grades and use that to calc %'s.
        It'll vary slightly vs. actual event distance. That's okay because we're getting the percentage of grades,
        so the variation in distances is negligible. But the %'s won't add up if using event distance.       
        '''
                
        #Scales grade output back to actual distance travelled
        scale_factor = event.length_2d() / dist_grades
        
        print('  Steepness Breakdown:')
        for i in range(10):
            print('    {:+.0%} to {:+.0%} grade: '.format(max_grade_bound-grade_incr, max_grade_bound), end='')
            print('{:.2f} mi., '.format(Conversions(sum(dists_by_grade[i])).meters_to_miles()*scale_factor), end='')
            print('{:.0%} of distance.'.format(sum(dists_by_grade[i]) / dist_grades))
            max_grade_bound -= grade_incr
        
    def analyze_multi(self):
        '''Prints summary statistics and comparisons of multiple events in the selected group.'''
        print('Analyzing Multiple Events.')
        events = self.obj_to_analyze.gpx_file_list
        
        durations_list = []
        speeds_list = []
        
        for SE in events:  #SE stands for an instance of the Single_Event class
            durations_list.append(SE.event.get_duration())
            
        distance = Conversions(events[0].event.length_2d()).meters_to_miles()
        dur_min = min(durations_list)
        dur_med = np.median(durations_list)
        dur_max = max(durations_list)
        dur_fastest = Conversions(dur_min).sec_to_datetime()
        dur_median = Conversions(dur_med).sec_to_datetime()
        dur_slowest = Conversions(dur_max).sec_to_datetime()
        speed_fastest = distance / (dur_min/3600)
        speed_median = distance / (dur_med/3600)
        speed_slowest = distance / (dur_max/3600)
        
        print('Here\'s a summary of this event group!')
        print('  You have completed this event {} times.'.format(len(events)))
        print('  Distance: {:.2f} mi.'.format(distance))
        print('  Duration and Speed Summary: ')
        print('    Fastest: {:%H:%M:%S}, {:.1f} mph avg.'.format(dur_fastest, speed_fastest))
        print('    Median: {:%H:%M:%S}, {:.1f} mph avg.'.format(dur_median, speed_median))
        print('    Slowest: {:%H:%M:%S}, {:.1f} mph avg.'.format(dur_slowest, speed_slowest))
        print()
        print('  Specific events:')
        
        #Remember, list is already sorted from most recent to least recent
        i = 0
        for SE in events:  #SE stands for an instance of the Single_Event class
            i += 1
            dur_event = durations_list[i-1]
            dur_vs_median = ((dur_event / dur_med) - 1) * (-1)
            dur_vs_best = ((dur_event / dur_min) - 1) * (-1)
            print('    {}. Date/Time: {:%D %H:%M:%S}'.format(i, SE.event.time)
                  + ', Duration: {:%H:%M:%S}, '.format(Conversions(dur_event).sec_to_datetime())
                  + '{:+.1%} vs. Median, {:+.1%} vs. Fastest.'.format(dur_vs_median, dur_vs_best)
                 )

class Conversions:
    '''A handful of conversions used across different classes.'''
    def __init__(self, value_in):
        self.value_in = value_in
    
    def meters_to_miles(self):
        return self.value_in * 0.000621371
    
    def meters_to_feet(self):
        return self.value_in * 3.28084
    
    def sec_to_datetime(self):
        '''Converts an integer of seconds to a datetime object.'''
        return datetime(1,1,1) + timedelta(seconds=int(self.value_in))

Menu();  #Run it!

Welcome to the GPS Summary & Steepness Calculator!

***** W200 Instructors/Graders: Note, several sample GPX files for testing are located in sub-folders d1 and d2. *****


MAIN MENU
Currently loaded file / folder: None
Please choose from the following options:
  1. Load Single GPX File.
  2. Load Multiple GPX Files.
  3. Analyze Data.
  4. Quit the program.
  Enter your choice here: 2

### Load Multiple GPX Files selected. ###
Please enter a directory (i.e., folder) name. Absolute or relative path names are accepted.
d2
10 files loaded so far!
20 files loaded so far!
30 files loaded so far!
40 files loaded so far!
50 files loaded so far!
60 files loaded so far!
70 files loaded so far!
80 files loaded so far!
90 files loaded so far!
100 files loaded so far!
110 files loaded so far!
120 files loaded so far!
130 files loaded so far!
140 files loaded so far!
150 files loaded so far!
160 files loaded so far!
170 files loaded so far!
180 files loaded so far!
190 files loaded so far!
200 fil

# Punch List
## Definitely Implement
- DONE Format feet and miles to 0 and 2 decimals, respectively. Elsewhere too.
- DONE Fix error after entering an invalid file in Option 1 (might occur in Option 2 also).
- DONE Clean up debugging print statements, e.g., printing when a function runs.
- DONE Add spacing before / after printouts.
- DONE Make PDF of Reflections (Observations).  1 page.
- DONE Must run as .py file from the command line.
- Sync Design Doc v02
- DONE Review code, line by line.
- DONE Final preparation:
  - DONE Copy to .py file and test.
  - DONE Push to github.
- Get Out of Scope items from the design document.
- DONE Find optimal eps value for clustering.
  - DONE Play with various values
  - DONE Scale distance
- DONE Put more comments in the code.
- DONE S&R for GMT.
- DONE Change "individual" to "unique".
- DONE (It was correct without needing a fix.) Fix percentage errors (invert?).
- Testing
  - DONE Put non-GPX files in the folder d1
  - DONE Test thoroughly for other errors  
  - DONE Zero length / duration rides, to the extent they cause errors
  - DONE Use multiple GPS devices, to account for devices that might produce different types of GPX files.
  - DONE Test case with no groups (all indiv. files).  Can set eps to 0.01 to test this case.

## Probably Implement
- DONE Include % of mileage in steepness bands
- DONE (Bad idea; doesn't work). Consider normalizing (mean=0, sd=1) clustering variables (lat, lon, dist).
- DONE Either change the threshold for smoothing, or change to moving average. We're still getting some wonky-high grades.

## Implement, Time Permitting (Nice to Have, likely out of scope)
- V2.0 When user selects '.' folder, print the actual folder name.  Consider printing the full path for all cases.
- V2.0 Multi track / multi segment aggregation.
- DONE Time zone.  Investigate if gpx files / gpxpy has a local time offset. Seems plausible they would.
- V2.0 Events with stopped time (e.g., when the user pauses the GPS).
- V2.0 Sum time and % time within steepness bands.
- V2.0 In Multi_Event, calculate average distance instead of picking the first event.  This change will be of negligible impact.
- DONE Option to "go back to main menu" at all prompts.
- V2.0 If reloading the same folder, skip reload.
- Plotting
  - Plot Color Steepness Map
  - Graphing multi-event analysis (e.g., bar charts of steepness or event durations)

## Out of Scope
- Make variable number of bands (not going to happen).
- Don't try to handle bad GPX files (unless gpxpy can do this easily).
- File system / naming conventions outside Windows.  Program might work, but it migth not.
- Testing with non-Garmin GPS devices.
- Optimizing clustering algorithm features to hone into specific thresholds of start / end and distance separation parameters.
- Automated comparisons across different events / different groups (the user can run it for different events).
- Window pop-up for selecting files / folders (e.g., like you’d have in Microsoft Word)
- Landmarks / waypoints
- Ability for users to create custom groups of events (other than their existing ability to include only the GPX files in a folder they want analyzed)
- Date filter of events in groups

# Observations
- I have a greater appreciation for why software is released iteratively. The wish list builds quickly!  I feel like I could never stop making enhancements to add cool new functionality.
- So much of code / time dedicated to edge cases, formatting, error handling, etc. Less to core functionality.
- Learned quite a bit about using another library (gpxpy).  Not well documented, but sufficiently intuitive.  I spent quite a bit of time learning the attributes and methods of this library.
- Working with the file system through the os library seemed much more challenging than it should have been.  I struggled mightily with os.path.isfile().
- Practially, N size is not a large factor, because a single athlete can only have a finite number of events.  Imagine a 'worst case' scenario, where an athlete records one event per day for 50 years; they'd have 18,262 events, a relatively small N.  So even though the clustering algorithm is O(N^2), it will make little real-world difference.  If this algorithm were used on groups of athletes, e.g., Garmin's user base, then N would be much larger, likely necessitating a different clustering algorithm.
- As an aspiring data scientist, I took this as an opportunity to learn about clustering algorithms, and I loved it! I knew a little about K-Means prior to this exercise (enough to know it wouldn't work because it requires the nubmer of groups to be specified, and this program needs an unspecified number of events), but nothing about other ways to form clusters.  I chose DBSCAN because it appeared to work the best, based on its description and confirmed with my repository of 310 GPS files. It's possible it wouldn't work in every case for other users, but it performed well for me, and since I've already put in several hours researching it, and because this is a programming class rather than a machine learning class, I consider it sufficient.  I also saw no slow down.
- I wish I knew of a way to load files in parallel (perhaps to separate lists which I could then easily merge).  It takes about a while to load 300 files, and I'd love to cut that down by using all 4 cores on my machine.
- Interesting to discover and learn how scaling negatively affected (i.e., broke) my clustering algorithm with a group of GPX files that had nearly identical components (e.g., several events that had different distances, but which started and stopped from essentially the same point).  This led me to manually scale my data.
- It's been great to learn a bunch of new things, such as:
  - Sorting a list of objects by one of their attributes
  - The Conversions class I used, and passing values into it
  - Working with the file system, and using relative and absolute file / folder names
  - Even little things, like how *list returns list elements without the list (e.g., for arguments of itemgetter).

# eps Results
Results on initial 6 events
- 0.1: Failed. Too small, same events not clustered
- 0.2: Worked?
- 0.5: Worked?
- 0.6: Clustered two more events together, with distances of 4.43 and 4.56 km. 0.13 km difference.
- 0.7: Failed. Every event clustered together.

# Doodling

In [None]:
#Doodling
import os
ftc = "sample01.gpx"
print(os.path.isdir(ftc))
print(os.path.exists(ftc))

In [None]:
#Keep this code, it works! Example of passing a class and its attributes to other classes.
import os  #Used for file & directory access
import gpxpy

class First:
    def __init__(self):
        print('Running First class')
        self.data_to_analyze = Fetch('s.gpx')
        print(type(self.data_to_analyze))
        print(type(self.data_to_analyze.event))
        Analyze(self.data_to_analyze)
        
class Fetch:
    def __init__(self, file_loc):
        self.file_loc = file_loc
        self.load_file()  #__init__ methods don't return things (other than None), so calling separate method
    
    def load_file(self):
        gpx_file = open(self.file_loc, 'r')  #r for read-only
        self.event = gpxpy.parse(gpx_file)

class Analyze:
    def __init__(self, fetch_in):
        print('Running Analyze class')
        print(type(fetch_in))
        print(type(fetch_in.event))
        print(fetch_in.event.get_duration())

First();

In [None]:
#Convert Seconds to D:H:M:S
from datetime import datetime, timedelta

def GetTime(sec_in):
    sec = timedelta(seconds=sec_in)
    d = datetime(1,1,1) + sec

    print("DAYS:HOURS:MIN:SEC")
    print("%d:%d:%d:%d" % (d.day-1, d.hour, d.minute, d.second))
    
GetTime(99999)

In [None]:
print('Here\'s a summary of your event.')

In [None]:
#Calc for how to get grade bands
((20//5)*5 + 5)/100  #Round to the nearest 0.05 higher than highest absolute grade

In [None]:
#Get abs value of items in a list.  Used to get max steepness, whether ascending or descending.
abs(max([-9.4, 1, 2, -3, -4.5, 4.5], key=abs))

In [None]:
#Function # one liner to sum nested lists.
L = ([[1, 1, 1], [1, [1, 1]], 1])
def list_sum(L):
    'Recursion enables this to work for any number of nest levels.'
    total = 0  
    for i in L:
        if isinstance(i, list): 
            total += list_sum(i)
        else:
            total += i
    return total

print(list_sum(L))
# sum(sum(i) if isinstance(i, list) else i for i in L)  #Only works for 2 level lists

In [None]:
#Median function
from numpy import median
print(median([1, 2, 3, 4.7])) #Lots of values
print(median([4.7, 5.4])) #Two values
print(median([4.7])) #Single value

In [None]:
import os
x = 'C:/Users/camke/OneDrive/Education/UCB/W200_Python/CKW200GitRepo/assignments_upstream_fall17/SUBMISSIONS/Project_01/data/abc.gpx'
print(os.path.isfile(x))

try:
    print('hi')
except:
    pass

In [None]:
myList = [1,2,3,4]
print(myList)
print(*myList)

In [None]:
30 % 10

In [None]:
#Practice string formatting
print('blah blah {a:+.2f} more blah {b:+.1f}'.format(b=123, a=-456)) #Remember for +/-
print('blah blah {a:+.0%} more blah {b:+.1%}'.format(b=-0.345, a=1.234)) #Percentages
print('blah blah {a:+0} more blah {b:+.1}'.format(b=-0.345, a=1.234)) #Decimals
print('    {:.0f} to {:.2f}'.format(0.1, 0.2), end=': ')
print('next')

In [2]:
#Find percentile
import numpy as np
myList = [1, 2, 3, 4, 60, 70, 80, 99, 100, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009]
np.random.shuffle(myList)
upper = np.percentile(myList, 90)
lower = np.percentile(myList, 10)
indices_to_omit = []
i = 0
for item in myList:    
    if item > upper or item < lower:
        indices_to_omit.append(i)
    i += 1
print(indices_to_omit)

[1, 8, 10, 11]


In [6]:
x = 'blah.gpx'
print(x[-4:])

.gpx
