In [1]:
import os
import os.path
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import re

In [2]:
datadir = "data/tidy_data"

data = os.path.join(datadir, "coded_injuries.csv")
df_injuries = pd.read_csv(data)

In [3]:
df_injuries.head(10)

Unnamed: 0,Code,Organizations,ProblemDate,DateOfReturn,ReportedDate,BodyPart,AffectedArea,Side,InjuryType,BodyPartName,CurrentStatus
0,FB64,Football,8/10/2023,-,8/15/2023,Shoulder,Upper Extremity,Left,Other Shoulder Injury,,Not Cleared
1,FB29,Football,8/11/2023,-,8/15/2023,Thigh,Lower Extremity,Left,Strain,Hamstring (partial tear),Limited Activity
2,FB74,Football,8/12/2023,-,8/15/2023,Elbow,Upper Extremity,Right,Entrapment Neuropathy (javelin elbow),,Limited Activity
3,FB46,Football,8/14/2023,-,8/26/2023,Thigh,Lower Extremity,Left,Strain,Hamstring (partial tear),Limited Activity
4,FB24,Football,8/17/2023,-,8/26/2023,Knee,Lower Extremity,Right,Sprain,Anterior Cruciate Ligament,Not Cleared
5,VB13,Volleyball,8/18/2023,8/18/2023,8/23/2023,Wrist,Upper Extremity,Left,Triangular Fibrocartilage Complex Tear,,Returned To Play As Tolerated
6,WSOC6,Womens Soccer,8/18/2023,-,9/13/2023,Knee,Lower Extremity,Left,Other Knee Injury,,Limited Activity
7,MSCO7,Men's Soccer,8/19/2023,-,8/23/2023,Hip,Lower Extremity,Left,Strain,Psoas major,Returned To Play and Closed
8,FB16,Football,8/20/2023,-,8/26/2023,Shoulder,Upper Extremity,Left,Subluxation,,Returned To Play As Tolerated
9,SB2,Softball,8/20/2023,8/19/2023,8/22/2023,Shoulder,Upper Extremity,Right,Strain,General Rotator Cuff,Returned To Play As Tolerated


In [4]:
# Changing date formatting
df_injuries["ProblemDate"] = df_injuries["ProblemDate"].str.replace("/", "-")
df_injuries["DateOfReturn"] = df_injuries["DateOfReturn"].str.replace("/", "-")
df_injuries["ReportedDate"] = df_injuries["ReportedDate"].str.replace("/", "-")

# Filling NA's
df_injuries["Side"] = df_injuries["Side"].fillna("Does Not Apply")

In [5]:
# Summary count for sport groups
sport_group = df_injuries.groupby("Organizations").count().filter(["Code"]).rename(columns = {"Code": "Count"}).sort_values(by = "Count", ascending = False)
sport_group.head(10)

Unnamed: 0_level_0,Count
Organizations,Unnamed: 1_level_1
Football,29
Men's Swimming & Diving,23
Baseball,20
Women's Tennis,20
Men's Track & Field,20
Softball,18
Women's Fencing,17
Women's Basketball,16
Womens Soccer,12
Men's Soccer,11


In [6]:
# Summary count for body part groups
body_part_group = df_injuries.groupby("BodyPart").count().filter(["Code"]).rename(columns = {"Code": "Count"}).sort_values(by = "Count", ascending = False)
body_part_group.head(10)

Unnamed: 0_level_0,Count
BodyPart,Unnamed: 1_level_1
Knee,44
Shoulder,42
Thigh,40
Ankle,31
Hip,27
Lower Leg,23
Lower Back,11
Brain,9
Elbow,9
Abdomen,8


In [7]:
# Summary count for injury type groups
injury_group = df_injuries.groupby("InjuryType").count().filter(["Code"]).rename(columns = {"Code": "Count"}).sort_values(by = "Count", ascending = False)
injury_group.head(10)

Unnamed: 0_level_0,Count
InjuryType,Unnamed: 1_level_1
Strain,80
Sprain,48
Tightness,10
Subluxation,10
Concussion,9
Other Knee Injury,8
Other,7
Other Hip Injury,6
Knee pain (finding),6
Discomfort (finding),6


In [8]:
# Summary count for recovery status groups
recovery_group = df_injuries.groupby("CurrentStatus").count().filter(["Code"]).rename(columns = {"Code": "Count"}).sort_values(by = "Count", ascending = False)
recovery_group.head(10)

Unnamed: 0_level_0,Count
CurrentStatus,Unnamed: 1_level_1
Returned To Play As Tolerated,154
Limited Activity,71
Not Cleared,31
Returned To Play and Closed,28
Waiting For Assessment,3
