In [73]:
# Loading libraries
# Libraries for data wrangling
import pandas as pd

# Libraries for data visualization
import plotly.express as px

In [74]:
# Read CSV file path
file_path = 'D:/smart_bangladesh_run/race_results_ecdf.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

# Read first 5 rows
df.head()

Unnamed: 0,OverallPosition,Name,Gender,Age,ChipTime,ClockTime,AveragePace,ChipTimeSeconds,Speed,Avg speed km/hr,AveragePaceSeconds,AgeGroup,Age Category
0,1,MD IMRAN HASAN,Male,24,0:24:32,0:24:32,00:03:16,1472.0,18.342391,18.37,196.0,20-24,20-24
1,2,TUFAYEL AHMED,Male,27,0:24:56,0:24:57,00:03:19,1496.0,18.048128,18.09,199.0,25-29,25-29
2,3,ASHRAFUL ALAM,Male,24,0:25:28,0:25:30,00:03:23,1528.0,17.670157,17.73,203.0,20-24,20-24
3,4,DWEEP TALUKDER,Male,22,0:25:31,0:25:31,00:03:24,1531.0,17.635532,17.65,204.0,20-24,20-24
4,5,PALASH SHEIKH,Male,23,0:26:42,0:26:42,00:03:33,1602.0,16.853933,16.9,213.0,20-24,20-24


In [75]:
df.describe()

Unnamed: 0,OverallPosition,Age,ChipTimeSeconds,Speed,Avg speed km/hr,AveragePaceSeconds
count,1746.0,1746.0,1746.0,1746.0,1746.0,1746.0
mean,873.5,31.587629,3506.375716,8.084618,8.09331,467.053837
std,504.171102,8.865538,739.101396,1.921273,1.925878,98.544075
min,1.0,3.0,1472.0,4.692388,4.69,196.0
25%,437.25,25.0,2997.25,6.715165,6.72,399.0
50%,873.5,31.0,3478.5,7.76197,7.77,463.5
75%,1309.75,38.0,4020.75,9.008258,9.02,536.0
max,1746.0,65.0,5754.0,18.342391,18.37,767.0


In [76]:
# Convert ChipTime to total seconds
df['Chiptime'] = pd.to_datetime(df['ChipTime'], format='%H:%M:%S').dt.hour * 3600 + pd.to_datetime(
    df['ChipTime'], format='%H:%M:%S').dt.minute * 60 + pd.to_datetime(df['ChipTime'], format='%H:%M:%S').dt.second

# Create dataframe suitable for ECDF (an empirical cumulative distribution function)
ecdf = df.loc[:, ['Gender', 'Chiptime']]

# Plot ECDF (an empirical cumulative distribution function)
fig = px.ecdf(ecdf, x="Chiptime", color="Gender", markers=True,
            lines=False, marginal="histogram", template="simple_white")

# Modify x-axis ticks with Chiptime ticks
fig.update_xaxes(
    showline=True,
    tickvals=list(range(0, int(df['Chiptime'].max()) + 10, 100)),
    ticktext=pd.to_datetime(list(range(0, int(
        df['Chiptime'].max()) + 10, 100)), unit='s').strftime('%H:%M:%S'),
    tickangle=45
)

# Customize properties for title, subtitle and annotations
fig.update_layout(
    height=600,
    width=1200,
    title_text="<b>Smart Bangladesh Run 2024</b>",
    title_font=dict(size=20),
    title_x=0.5,
    title_y=0.98,
    title_xanchor='center',
    title_yanchor='top',
    font=dict(family="Arial, sans-serif", size=14),
    showlegend=True,
    xaxis=dict(showgrid=True),
    yaxis=dict(showgrid=True),
    annotations=[
        dict(
            text="<i>Empirical Cumulative Distribution Function (ECDF)</i>",
            align='center',
            showarrow=False,
            xref='paper',
            yref='paper',
            x=0.52, 
            y=1.02,
            xanchor='center',
            yanchor='bottom',
            font=dict(size=14, color='gray')
        ),
        dict(
            text="Author: Abdul Aziz Pervez",
            align='right',
            showarrow=False,
            xref='paper',
            yref='paper',
            x=0.97,
            y=-0.20,
            xanchor='right',
            yanchor='bottom',
            font=dict(size=15)
        )
    ]
)

# Set axes title
fig.update_yaxes(title_text='<b>Probability</b>', row=1, col=1)
fig.update_xaxes(title_text='<b>Chip Time</b>', row=1, col=1)

# Show the plot
fig.show()