In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('activity_log.csv')

In [None]:
df

In [None]:
def clean_numeric(column):
    df[column] = pd.to_numeric(df[column], errors='coerce')

numeric_columns = ['Distance', 'Calories', 'Avg HR', 'Max HR', 'Avg Run Cadence', 'Max Run Cadence', 
                   'Elev Gain', 'Elev Loss', 'Avg Stride Length', 'Best Lap Time', 'Number of Laps']

for column in numeric_columns:
    clean_numeric(column)

def pace_to_seconds(pace):
    try:
        minutes, seconds = map(int, pace.split(':'))
        return minutes * 60 + seconds
    except:
        return np.nan

df['Avg Pace (sec)'] = df['Avg Pace'].apply(pace_to_seconds)
df['Best Pace (sec)'] = df['Best Pace'].apply(pace_to_seconds)
df = df[~df['Avg Pace'].str.contains(r'\.|--', regex=True)]
df = df[~df['Best Pace'].str.contains(r'\.|--', regex=True)]

In [None]:
df

In [None]:
print("Dataset Information:")
print(df.info())

print("\nBasic Statistics:")
print(df.describe())

In [None]:
df['Avg Pace'].unique()

In [None]:
# total distance covered
total_distance = df['Distance'].sum()
print(f"\nTotal Distance Covered: {total_distance} km")

# total calories burned
total_calories = df['Calories'].sum()
print(f"Total Calories Burned: {total_calories}")

# average heart rate
average_hr = df['Avg HR'].mean()
print(f"Average Heart Rate: {average_hr:.2f} bpm")

# maximum heart rate
max_hr = df['Max HR'].max()
print(f"Maximum Heart Rate Recorded: {max_hr} bpm")




In [None]:
# average pace
df['Avg Pace (sec)'] = df['Avg Pace'].apply(lambda x: sum(int(part) * 60 ** i for i, part in enumerate(reversed(x.split(':')))))
average_pace = df['Avg Pace (sec)'].mean()
print(f"Average Pace: {60 / average_pace:.2f} km/min")

# best pace
df['Best Pace (sec)'] = df['Best Pace'].apply(lambda x: sum(int(part) * 60 ** i for i, part in enumerate(reversed(x.split(':')))))
best_pace = df['Best Pace (sec)'].min()
print(f"Best Pace: {60 / best_pace :.2f} km/min")

# elevation gain and loss
total_elev_gain = df['Elev Gain'].sum()
total_elev_loss = df['Elev Loss'].sum()
print(f"Total Elevation Gain: {total_elev_gain} m")
print(f"Total Elevation Loss: {total_elev_loss} m")


In [None]:
# activity over time

df['Date'] = pd.to_datetime(df['Date'])

activity_by_date = df.groupby(df['Date'].dt.date).agg({
    'Distance': 'sum',
    'Calories': 'sum',
    'Time': 'sum',
    'Avg HR': 'mean',
    'Max HR': 'max',
    'Elev Gain': 'sum',
    'Elev Loss': 'sum'
})

print("\nActivity By Date:")
print(activity_by_date)

In [None]:
activity_by_date.reset_index(inplace=True)

In [None]:
# distance over time
plt.figure(figsize=(12, 6))
sns.lineplot(data=activity_by_date, x='Date', y='Distance')
plt.title('Distance Over Time')
plt.xlabel('Date')
plt.ylabel('Distance (miles)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# calories over time
plt.figure(figsize=(12, 6))
sns.lineplot(data=activity_by_date, x='Date', y='Calories')
plt.title('Calories Burned Over Time')
plt.xlabel('Date')
plt.ylabel('Calories')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# average heart rate over time
plt.figure(figsize=(12, 6))
sns.lineplot(data=activity_by_date, x='Date', y='Avg HR')
plt.title('Average Heart Rate Over Time')
plt.xlabel('Date')
plt.ylabel('Average Heart Rate (bpm)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# maximum heart rate over time
plt.figure(figsize=(12, 6))
sns.barplot(data=activity_by_date, x='Date', y='Max HR')
plt.title('Maximum Heart Rate Over Time')
plt.xlabel('Date')
plt.ylabel('Maximum Heart Rate (bpm)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# elevation change over time
plt.figure(figsize=(12, 6))
sns.lineplot(data=activity_by_date, x='Date', y='Elev Gain', label='Elev Gain')
sns.lineplot(data=activity_by_date, x='Date', y='Elev Loss', label='Elev Loss')
plt.title('Elevation Gain and Loss Over Time')
plt.xlabel('Date')
plt.ylabel('Elevation (feet)')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()