### Import Activities File

In [None]:
import pandas as pd
from pathlib import Path
from os import listdir
from os.path import isfile, join
import gzip
import shutil
from benedict import benedict as bdict
from ipyleaflet import Map, Polyline


filepath = "data/"
fileOutputPath= filepath + "output/"

activityPath = filepath + "activities/"
activityOutputPath = fileOutputPath + "activities/"

def loadCleanData():
    df = pd.read_csv(filepath+ 'activities_clean.csv')
    df['Week'] = pd.to_datetime(df.Week)
    return df

def getFilename(fileextension):
    return fileextension[:-3]

def extractAllGPXFiles():
    # Create activity files Path if it's not exists
    Path(activityOutputPath).mkdir(parents=True, exist_ok=True)
    
    # check for all files in activities folder
    activityfiles = [f for f in listdir(filepath+"activities") if isfile(join(filepath+"activities", f))]

    # Extract all files from gz zip files
    for activityzip in activityfiles:
        # Extract activity filename
        filename=getFilename(activityzip)
       
        # Extract GPX File
        with gzip.open(activityPath + activityzip, 'rb') as f_in:
            with open(activityOutputPath + filename, 'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)
    
    print("done!")
    
    
def getLocations(gpx_output_path,gpx_filename):
    # data-source can be an url, a filepath or data-string (as in this example)
    data_source = gpx_output_path + gpx_filename
    xmldata = bdict.from_xml(data_source)
    trk_list = xmldata['gpx.trk.trkseg.trkpt']
    
    locs = [[float(t['@lat']),float(t['@lon'])] for t in trk_list]
    
    return locs

def getVirtualLocations(gpx_output_path,gpx_filename):
    try:
        data_source = gpx_output_path + gpx_filename
        xmldata = bdict.from_xml(data_source)
        trk_list = xmldata['gpx.trk.trkseg.trkpt']

        locs = [[float(t['@lat']),float(t['@lon']),float(t['ele']),t['extensions']['power'],t['time']] for t in trk_list]
    
        return locs
    except:
        return null


def showOnMap(locations):

    line = Polyline(
        locations=locations,
        color="green" ,
        fill=False
    )

    m = Map(center = (float((min(locations)[0]+max(locations)[0])/2), float(min(locations)[1]+max(locations)[1])/2), zoom =12)
    m.add_layer(line)
    return m


In [None]:
data = loadCleanData()

# Extract All gpx.gz files
#extractAllGPXFiles()

### General Stats

In [None]:
# Total km by outdoor ride
outdoorRide = round(data[data['Activity Type']=='Ride']['Distance'].sum())
print("Total outdoor ride: ", outdoorRide)

# Total km by outdoor ride
virtualRide = round(data[data['Activity Type']=='Virtual Ride']['Distance'].sum())
print("Total virtual ride: ", virtualRide)

In [None]:
# Ride Distance Graph
import seaborn as sns
sns.set()
sns.barplot(x="Activity Type", y="Distance", data=data, estimator=sum)

In [None]:
# Ride Elevation Graph
sns.barplot(x="Activity Type", y="Elevation Gain", data=data, estimator=sum)

In [None]:
import matplotlib.pyplot as plt

sns.distplot(data['Distance'], kde=False)
plt.axvline(0, color="k", linestyle="--")

In [None]:
# Distance Graph per day
distGraph = sns.lineplot(data=data, x="Activity Date", y="Distance", hue="Activity Type")
distGraph.set(xticklabels=[]) 
distGraph.set(xlabel=None)
distGraph.tick_params(bottom=False) 
distGraph

### Data Visualization

In [None]:
from matplotlib import pyplot as plt  

In [None]:
x = list(data['Activity Date'])
y = list(data['Elevation Gain'])
  
plt.plot(x,y) 
plt.show()  

In [None]:
import seaborn as sns
sns.set()

In [None]:
plt.plot(y)
plt.legend('Elevation Gain', ncol=2, loc='upper left');

### Data Visualization with Seaborn

In [None]:
with sns.axes_style('white'):
    sns.jointplot("Distance", "Elevation Gain", data, kind='kde');

In [None]:
with sns.axes_style('white'):
    sns.jointplot("Distance", "Elevation Gain", data, kind='hex')

In [None]:
sns.jointplot("Distance", "Elevation Gain", data=data, kind='reg');

In [None]:
g = sns.PairGrid(data, vars=['Distance', 'Max Speed', 'Elevation Gain', 'Average Watts'],
                 hue='Activity Type', palette='RdBu_r')
g.map(plt.scatter, alpha=0.8)
g.add_legend();

### Graph Time Distribution

In [None]:
# One sample for a Power per ride
len(getVirtualLocations(fileOutputPath,getFilename(data['Filename'][155])))

In [None]:
virtualSteps = getVirtualLocations(fileOutputPath,getFilename(data['Filename'][155]))

import pandas as pd 
  
dfSteps = pd.DataFrame(virtualSteps, columns =['Latitude', 'Longtitude','Elevation','Power','Date']) 
dfSteps = dfSteps.astype({"Power": int})

powerPlot = sns.lineplot(data=dfSteps, x="Date", y="Power")
powerPlot.set(xticklabels=[]) 
powerPlot.set(xlabel=None)
powerPlot.tick_params(bottom=False) 
powerPlot

### Modify Data to Generate New Features

In [None]:
data['Time/Distance'] = data['Distance.1'] / data['Elapsed Time.1']
data.head(5)

### Data Visualization Per Column

In [None]:
data.plot(kind='scatter',x='index',y='Activity Type')

In [None]:
data.plot(kind='scatter',x='index',y='Distance')

In [None]:
data.plot(kind='scatter',x='index',y='Elapsed Time')

In [None]:
data.plot(kind='scatter',x='index',y='Moving Time')

In [None]:
data.plot(kind='scatter',x='index',y='Max Speed')

In [None]:
data.plot(kind='scatter',x='index',y='Elevation Gain')

In [None]:
data.plot(kind='scatter',x='index',y='Calories')

In [None]:
data.plot(kind='scatter',x='index',y='Activity Name')

In [None]:
data.plot(kind='scatter',x='index',y='Average Watts')

In [None]:
data.plot(kind='scatter',x='index',y='Average Cadence')