# Workout the Dashboard Logic

In [1]:
from build_workout_dashboard.utilities import *
import toml
import os

# Utilities load the pyproject.toml file
with open("pyproject.toml", "r") as f:
    config = toml.load("pyproject.toml")

dbConfig = config['tool']['project']['database']

input_filepath = config['tool']['poetry']['name'].replace('-', '_') + os.path.sep + config['tool']['project']['input_filename']


In [2]:
## Toggle if development to use local host instead of RDS on AWS:
if not(config['tool']['project']['debug']):
    print("Using production database configuration.")
    dbConfig['host'] = os.getenv('RDS_ENDPOINT')
    dbConfig['username'] = os.getenv('RDS_USER')
    dbConfig['password'] = os.getenv('RDS_PASSWORD')


print("Connecting to MySQL database...")
print(dbConfig)
connection = mysql.connector.connect(**dbConfig)
cursor = connection.cursor()

cursor.execute("SHOW DATABASES")
databases = cursor.fetchall()                  # Fetch all results
databases = [item[0] for item in databases]    # Convert tuple responses to a list

print("Databases visible to your user:")
for db in databases:            # Print the databases
    print(db)

Connecting to MySQL database...
{'host': 'localhost', 'port': 3306, 'username': 'barbs', 'password': 'barbs'}
Databases visible to your user:
amazon_products
assert_db
assert_schema_1
dogs_db
information_schema
mysql
performance_schema
sweat
sys


In [5]:
dbConfig['database'] = "sweat"

if dbConfig['database'] not in databases:
    print(f"No database named {dbConfig['database']} in {dbConfig['host']};")  


In [13]:
# Return df from "sweat.workout_summary" table
cursor = connection.cursor()
cursor.execute(f"USE {dbConfig['database']}")
cursor.execute("SELECT * FROM workout_summary;")

# Fetch the data and column names
data = cursor.fetchall()  # Get all rows of the result
column_names = [i[0] for i in cursor.description]  # Get column names

# Step 4: Convert the data into a Pandas DataFrame
DF = pd.DataFrame(data, columns=column_names).sort_values(by='workout_date', ascending=True).reset_index(drop=True)

DF

Unnamed: 0,workout_id,workout_date,activity_type,kcal_burned,distance_mi,duration_sec,avg_pace,max_pace,steps,link
0,57005018,2011-07-22,Run,0,5.403910,2280.0,7.03195,0.00000,0.0,http://www.mapmyfitness.com/workout/57005018
1,372819401,2011-08-14,Run,1381,13.025600,5687.0,7.27682,4.16161,,http://www.mapmyfitness.com/workout/372819401
2,76822852,2011-11-09,Run,0,6.804380,2700.0,6.61339,0.00000,0.0,http://www.mapmyfitness.com/workout/76822852
3,305280701,2013-06-17,Run,0,4.967120,3380.0,11.34120,0.00000,0.0,http://www.mapmyfitness.com/workout/305280701
4,309076653,2013-06-19,Run,384,3.330000,1680.0,8.41000,0.00000,0.0,http://www.mapmyfitness.com/workout/309076653
...,...,...,...,...,...,...,...,...,...,...
2259,8234102029,2024-10-19,Run,91,1.014630,1726.0,28.35180,11.46280,4325.0,http://www.mapmyfitness.com/workout/8234102029
2260,8235855694,2024-10-20,Run,91,1.122110,1639.0,24.34400,14.11540,4138.0,http://www.mapmyfitness.com/workout/8235855694
2261,8236842598,2024-10-21,Run,132,1.656390,2364.0,23.78670,12.41610,5975.0,http://www.mapmyfitness.com/workout/8236842598
2262,8238825466,2024-10-22,Run,159,1.922110,2904.0,25.18070,8.70019,7319.0,http://www.mapmyfitness.com/workout/8238825466


# Main metrics

## DEFINE THEM HERE

GroupBy **"activity_type"**  

ROLL UP BY Week, Month:   

- how many workouts each (COUNT) 
- avg distance in miles (AVG distance_mi) 
- avg duration in minutes (AVG duration_sec / 60) 
- avg pace in min/mile (AVG avg_pace)

- TOTAL kcal (AVG kcal_burned)

- LONGEST workout in miles (MAX distance_mi) 
- LONGEST workout in minutes in minutes (MAX duration_sec / 60) 
- MAX pace within a workout in min/mile (MAX max_pace)


In [None]:
### Read CSV file
print(f'Reading data from {input_filepath}')
df = pd.read_csv(input_filepath)

### Clean data (drop unnecessary columns)
df = clean_data(df)

# Enrich data (for now, just extract workoutID_
df = enrich_data(df)

## Identify new workout_ids that are not in database

In [None]:
# Assuming 'workout_id' is the column in the DataFrame containing workout IDs
newDf = df[~df['workout_id'].isin(existing_workout_ids)]
print(newDf.shape[0])
newDf

## Update the database 

In [None]:
try:
    # Establish MySQL connection
    if 'connection' not in locals():
        print("Connecting to MySQL database...")
        print(dbConfig)
        connection = mysql.connector.connect(**dbConfig)

    # If connection was lost, either reconnect OR re-establish cursor
    if not(connection.is_connected()):
        cursor = connection.cursor()        

    # Insert data
    rows_affected = insert_data_NEW(cursor, newDf)

    # Commit changes
    connection.commit()

    print(f"Data import completed. {rows_affected} rows were inserted.")

    # Get the last inserted id
    cursor.execute("SELECT LAST_INSERT_ID()")
    last_id = cursor.fetchone()[0]
    print(f"The last inserted ID was: {last_id}")

    # Get the total number of rows in the table
    cursor.execute("SELECT COUNT(*) FROM workout_summary")
    total_rows = cursor.fetchone()[0]
    print(f"Total rows in the table after insert: {total_rows}")

except Error as e:
    print(f"Error: {e}")
    print(f"Error Code: {e.errno}")
    print(f"SQLSTATE: {e.sqlstate}")
    print(f"Message: {e.msg}")
    
finally:
    if connection.is_connected():
        cursor.close()
        connection.close()
        print("MySQL connection is closed")
        

## Use connection

In [None]:
# Establish MySQL connection
if not(connection.is_connected()):
    connection = mysql.connector.connect(**dbConfig)
    cursor = connection.cursor()

cursor.execute("DESCRIBE workout_summary;")
cursor.fetchall()

In [None]:
newDf