## Convert Mick's movement data

1. Read CSV
2. Convert CSV to XML file
3. Join the two files together

In [1]:
#!pip install pandas

In [2]:
import pandas as pd
from pathlib import Path

In [3]:
# DATASOURCE_TYPE = "local"  # Local machine
DATASOURCE_TYPE = "gh"     # Github - Synthetic or non-PII data only

#TODO: Need to check that both local and remote (in this instance Github.com) data modes work

In [4]:
DATAFILE_PATH = "data"
DATAFILE_CSV  = "2021_09_10_15_09_Sassafras_Power_Climb,_Sunset,_South_Carolina.csv"

# DATAFILE_URL = "https://raw.githubusercontent.com/DataBooth/client-youngm-ifit/main/data/" + "2021_09_10_15_09_Sassafras_Power_Climb%2C_Sunset%2C_South_Carolina.csv"

DATAFILE_URL_PATH = "https://raw.githubusercontent.com/DataBooth/client-youngm-ifit/main/data/" 
DATAFILE_URL = DATAFILE_URL_PATH + DATAFILE_CSV.replace(",", "%2C")

In [5]:
# print(DATAFILE_URL)

In [6]:
def set_local_or_remote_data_path(datasource_type):
  if datasource_type ==  "local":
    datafile = Path(DATAFILE_PATH) / DATAFILE_CSV
    print("Local datasource: " + datafile.as_posix())
    tcxfile = Path(DATAFILE_PATH) / DATAFILE_CSV.replace("csv", "tcx")
    print("TCX file to be created: " + tcxfile.as_posix())
    return datafile, tcxfile
  elif datasource_type == "gh":
    datafile = DATAFILE_URL
    print("Remote datasource: " + datafile)
    tcxfile = DATAFILE_CSV.replace("csv", "tcx")
    print("TCX file to be created: " + tcxfile)
    return datafile, tcxfile
  else:
    print("ERROR: Unknown `datasource_type`")
  return None

  #TODO: Aaron - we can refactor further 

In [7]:
datafile, tcxfile = set_local_or_remote_data_path(DATASOURCE_TYPE)

Remote datasource: https://raw.githubusercontent.com/DataBooth/client-youngm-ifit/main/data/2021_09_10_15_09_Sassafras_Power_Climb%2C_Sunset%2C_South_Carolina.csv
TCX file to be created: 2021_09_10_15_09_Sassafras_Power_Climb,_Sunset,_South_Carolina.tcx


In [8]:
# DATAFILE

#TODO: Aaron we can re-factor this together if you'd like 

In [9]:
#!head $DATAFILE

In [10]:
# How to read csv

data_df = pd.read_csv(datafile, skiprows=2)
print(data_df.head())

# How to read xml
# xml_data = open('tcxfile.tcx', 'r').read()
# print(xml_data)

    Time   Miles   MPH  Watts  ...  RPM  Resistance  Relative Resistance  Incline
0  00:00  0.0000  6.56     20  ...   49           2                    8      0.0
1  00:01  0.0021  6.56      1  ...   49           2                    8      0.0
2  00:02  0.0043  6.56      1  ...   49           2                    8      0.0
3  00:03  0.0064  6.56      2  ...   49           2                    8      0.0
4  00:04  0.0085  6.56      3  ...   48           2                    8      0.0

[5 rows x 9 columns]


In [11]:
data_df.rename(columns={"Relative Resistance": "RelativeResistance"}, inplace=True)

In [12]:
# TODO: Aaron to read about self-documenting code 

def convert_csv_row_to_xml(row):
    return """<Time>%s</Time>
    <Miles>%s</Miles>
    <MPH>%s</MPH>
    <Watts>%s</Watts>
    <HR>%s</HR>
    <RPM>%s</RPM>
    <Resistance>%s</Resistance>
    <Relative Resistance>%s</Relative Resistance>
    <Incline>%s</Incline>""" % (row.Time, row.Miles, row.MPH, row.Watts, row.HR, row.RPM, row.Resistance, row.RelativeResistance, row.Incline)

# TODO: The issue I currently have is how to change "Relative Resistance" to "RelativeResistance" for the above code to work

In [13]:
new_tcx = ''.join(data_df.apply(convert_csv_row_to_xml, axis=1))

In [14]:
#data_df

In [15]:
# Join modified XML file to original TCX file


# TCXFILE = Path(DATAFILE_PATH) / DATAFILE_CSV.replace("csv", "tcx") - see above

In [16]:
with open(tcxfile, "a+") as tcxwrite: 
  for line in new_tcx:
    tcxwrite.write(line)

In [17]:
#!tail -40 $TCXFILE - convert to actual .py code for compatibility between notebook & script

get_ipython().system('tail -40 ' + tcxfile)

    <Miles>5.2245</Miles>
    <MPH>9.79</MPH>
    <Watts>17</Watts>
    <HR>134.0</HR>
    <RPM>65</RPM>
    <Resistance>6</Resistance>
    <Relative Resistance>25</Relative Resistance>
    <Incline>0.0</Incline><Time>32:57</Time>
    <Miles>5.2295</Miles>
    <MPH>9.94</MPH>
    <Watts>8</Watts>
    <HR>134.0</HR>
    <RPM>66</RPM>
    <Resistance>1</Resistance>
    <Relative Resistance>4</Relative Resistance>
    <Incline>0.0</Incline><Time>32:58</Time>
    <Miles>5.2295</Miles>
    <MPH>9.26</MPH>
    <Watts>8</Watts>
    <HR>133.5</HR>
    <RPM>66</RPM>
    <Resistance>1</Resistance>
    <Relative Resistance>4</Relative Resistance>
    <Incline>0.0</Incline><Time>32:59</Time>
    <Miles>5.2295</Miles>
    <MPH>8.57</MPH>
    <Watts>8</Watts>
    <HR>133.0</HR>
    <RPM>65</RPM>
    <Resistance>1</Resistance>
    <Relative Resistance>4</Relative Resistance>
    <Incline>0.0</Incline><Time>33:00</Time>
    <Miles>5.2313</Miles>
    <MPH>8.44</MPH>
    <Watts>8</Watts>
    <HR>134.0</

## References

* https://towardsdatascience.com/the-easy-way-to-work-with-csv-json-and-xml-in-python-5056f9325ca9
* https://stackabuse.com/reading-and-writing-xml-files-in-python-with-pandas/
* https://roytuts.com/how-to-convert-csv-to-xml-using-python/
* https://stackoverflow.com/questions/41059264/simple-csv-to-xml-conversion-python