### Import Activities File

In [None]:
import pandas as pd
from pathlib import Path
from os import listdir
from os.path import isfile, join
import gzip
import shutil
from benedict import benedict as bdict
from ipyleaflet import Map, Polyline


filepath = "data/"
fileOutputPath= filepath + "output/"

activityPath = filepath + "activities/"
activityOutputPath = fileOutputPath + "activities/"


def loadData():
    df = pd.read_csv(filepath+ 'activities.csv')
    data = df[df.columns[[0,1,2,3,4,5,6,10,13,14,15,16,17,18,19,20,21,22,23,26,27,31,32,41,44,45,47,50]]]

    rideTypes = ['Virtual Ride', 'Ride']
    rideData = data[data['Activity Type'].isin(rideTypes)]    
    return rideData.dropna(subset=['Filename']).reset_index()


def extractAllGPXFiles():
    # Create activity files Path if it's not exists
    Path(activityOutputPath).mkdir(parents=True, exist_ok=True)
    
    # check for all files in activities folder
    activityfiles = [f for f in listdir(filepath+"activities") if isfile(join(filepath+"activities", f))]

    # Extract all files from gz zip files
    for activity in activityfiles:
        # Extract activity filename
        filenamegz =activity
        filename=filenamegz[:-3]
       
        # Extract GPX File
        with gzip.open(activityPath + filenamegz, 'rb') as f_in:
            with open(activityOutputPath + filename, 'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)
    
    print("done!")
    
    
def getLocations(gpx_output_path,gpx_filename):
    # data-source can be an url, a filepath or data-string (as in this example)
    data_source = gpx_output_path + gpx_filename
    xmldata = bdict.from_xml(data_source)
    trk_list = xmldata['gpx.trk.trkseg.trkpt']
    
    locs = [[float(t['@lat']),float(t['@lon'])] for t in trk_list]
    
    return locs


def showOnMap(locations):

    line = Polyline(
        locations=locations,
        color="green" ,
        fill=False
    )

    m = Map(center = (float((min(locations)[0]+max(locations)[0])/2), float(min(locations)[1]+max(locations)[1])/2), zoom =12)
    m.add_layer(line)
    return m

data = loadData()
extractAllGPXFiles()

### Data Visualization

In [None]:
from matplotlib import pyplot as plt  

In [None]:
x = list(data['Activity Date'])
y = list(data['Elevation Gain'])
  
plt.plot(x,y) 
plt.show()  

In [None]:
import seaborn as sns
sns.set()

In [None]:
plt.plot(y)
plt.legend('Elevation Gain', ncol=2, loc='upper left');

### Data Visualization with Seaborn

In [None]:
with sns.axes_style('white'):
    sns.jointplot("Distance", "Elevation Gain", data, kind='kde');

In [None]:
with sns.axes_style('white'):
    sns.jointplot("Distance", "Elevation Gain", data, kind='hex')

In [None]:
sns.jointplot("Distance", "Elevation Gain", data=data, kind='reg');

In [None]:
with sns.axes_style('white'):
    g = sns.factorplot("Activity Type", data=data, aspect=2,
                       kind="count", color='steelblue')
    g.set_xticklabels(step=5)

In [None]:
sns.distplot(data['Distance'], kde=False);
plt.axvline(0, color="k", linestyle="--");

In [None]:
g = sns.PairGrid(data, vars=['Distance', 'Max Speed', 'Elevation Gain', 'Average Watts'],
                 hue='Activity Type', palette='RdBu_r')
g.map(plt.scatter, alpha=0.8)
g.add_legend();

In [None]:
# Extract Date 
#data['Date'] = getDate(data['Activity Date'])