In [None]:
import pandas as pd

df = pd.read_csv('./Data/combined_injury_records.csv')
df.drop(columns=['injury_category', 'name', 'date_of_birth', 'date_injury', 'date_il_retro', 'date_return'], inplace=True)
df

In [None]:
df.query("injury.str.contains('surgery', case=False, na=False)")

In [None]:
injury_category_map = {
    'Major Surgery': r'surgery',
    'Muscle strain': r'strain',
    'Ligament sprain': r'sprain',
    'Tendon injury': r'tendon|impingement|tendinopathy',
    'Bone': r'fracture|bone|stress|bone bruise|dislocation',
    'Contusion & Impact': r'contusion|impact',
    'Skin': r'skin|cut|laceration|blister',
    'Illness': r'illness|viral|bacterial|covid|flu',
    'Discomfort & Tightness': r'spasm|tightness|discomfort|fatigue|stiffness|soreness',
    'Neurological': r'concussion|headache|nerve|thoracic outlet',
    'Imflammation': r'itis|inflammation|metatarsalgia',
    'Infection': r'infection|sepsis'
}

In [None]:
def categorize_injury(injury):
    injury_lower = str(injury).lower()
    for category, pattern in injury_category_map.items():
        if pd.Series(injury_lower).str.contains(pattern, case=False, na=False).any():
            return category
    return 'Other'

df['injury_category'] = df['injury'].apply(categorize_injury)
df = df.loc[df['injury'] != 'TBD']
df = df.loc[df['injury'] != 'Illness']

df


In [None]:
import matplotlib.pyplot as plt
import numpy as np

df.boxplot(column='days_injured', by='injury_category', figsize=(12, 6))
plt.title('Days Injured by Injury Category')
plt.suptitle('')
plt.xlabel('Injury Category')
plt.ylabel('Days Injured')
plt.xticks(rotation=45)
plt.show()

In [None]:
df.groupby('injury_category')['days_injured'].describe()

In [None]:
df.boxplot(column='days_injured', by='injury_category', figsize=(12, 6))
plt.title('Days Injured by Injury Category')
plt.suptitle('')
plt.xlabel('Injury Category')
plt.ylabel('Days Injured')
plt.xticks(rotation=45)
plt.show()



In [None]:
# divide by body parts and visualization

body_part_map = {
    'Joints' : r'shoulder|rotator cuff|scapula|teres major|thoracic|wrist|carpal|ankle|achilles|elbow|tommy john|ucl|ulnar|knee|acl|meniscus|patellar|patella',
    'Lower': r'leg|glute|gluteal|hamstring|quad|calf|shin|adductor|groin|hip|tibial|thigh',
    'Upper': r'chest|pectoral|rib|ribcage|ab|sports hernia|abdominal|oblique|abdomen|core|intercostal|costochondral',
    'Arm': r'arm|biceps|triceps|forearm|flexor',
    'Hand': r'hand|finger|nail|thumb|hamate|pinky',
    'Foot': r'|foot|toe|plantar|heel',
    'Internal': r'kidney|lung|append|col|covid|viral|flu|gastr',
    'Head/Neck' :  r'head|concussion|nasal|nose|facial|face|eye|jaw|neck|cervical',

}

In [None]:
def extract_body_part(injury):
    injury_lower = str(injury).lower()

    for body_part, sub_text in body_part_map.items():
        if pd.Series(injury_lower).str.contains(sub_text, case=False, na=False).any():
            return body_part
    return 'Other'

df['injured_part'] = df['injury'].apply(extract_body_part)
df.loc[df['injury'] == 'Thoracic outlet syndrome']


In [None]:
q1 = df.groupby(['injury_category', 'injured_part'])['days_injured'].transform(lambda x: x.quantile(0.25))
q3 = df.groupby(['injury_category', 'injured_part'])['days_injured'].transform(lambda x: x.quantile(0.75))

outlier = (q3 - q1) * 1.5

df = df[(df['days_injured'] < (q3 + outlier)) & (df['days_injured'] > (q1 - outlier))]

In [None]:
df_major_surgery = df.loc[df['injury_category'] == 'Major Surgery']

df_major_surgery.boxplot(column='days_injured', by='injured_part', figsize=(12, 6))
plt.title('Days Injured by Injury Category (Surgery)')
plt.suptitle('')
plt.xlabel('Injured Part')
plt.ylabel('Days Injured')
plt.xticks(rotation=45)
plt.yticks(np.arange(0, 800, step=100))
plt.show()

In [None]:
def plot_by_injury_category(injury_category):
    df_category = df.loc[df['injury_category'] == injury_category]

    df_category.boxplot(column='days_injured', by='injured_part', figsize=(12, 6))
    plt.title(f'Days Injured by Injury Category ({injury_category})')
    plt.suptitle('')
    plt.xlabel('Injured Part')
    plt.ylabel('Days Injured')
    plt.xticks(rotation=45)
    plt.show()

In [None]:
plot_by_injury_category('Muscle strain')


In [None]:
plot_by_injury_category('Ligament sprain')

In [None]:
plot_by_injury_category('Tendon injury')

In [None]:
plot_by_injury_category('Bone')

In [None]:
plot_by_injury_category('Contusion & Impact')

In [None]:
plot_by_injury_category('Skin')

In [None]:


plot_by_injury_category('Illness')



In [None]:
plot_by_injury_category('Discomfort & Tightness')

In [None]:
plot_by_injury_category('Neurological')


In [None]:
plot_by_injury_category('Imflammation')

In [None]:
plot_by_injury_category('Infection')

df[df.injury_category == 'Infection']

In [None]:
def plot_by_injury_category_pos(injury_category):
    df_category = df.loc[df['injury_category'] == injury_category]

    df_category.boxplot(column='days_injured', by='pos', figsize=(12, 6))
    plt.title(f'Days Injured by Injury Category ({injury_category})')
    plt.suptitle('')
    plt.xlabel('Position')
    plt.ylabel('Days Injured')
    plt.xticks(rotation=45)
    plt.show()

In [None]:
plot_by_injury_category_pos("Muscle strain")

df[df.pos == 'P'][df[df.pos == 'P'].injury_category == 'Muscle strain'].sort_values(by='days_injured')



In [None]:
def plot_by_injured_part(injured_part):
    df_category = df.loc[df['injured_part'] == injured_part]

    df_category.boxplot(column='days_injured', by='injury_category', figsize=(12, 6))
    plt.title(f'Days Injured by Injured Part ({injured_part})')
    plt.suptitle('')
    plt.xlabel('Injury Category')
    plt.ylabel('Days Injured')
    plt.xticks(rotation=45)
    plt.show()

In [None]:
'''
body_part_map = {
    'Shoulder': r'shoulder|rotator cuff|scapula|teres major|thoracic',
    'Wrist': r'wrist|carpal',
    'Back': r'back|lat|\bdisc\b|spine|lumbar',
    'Lower': r'leg|glute|gluteal|hamstring|quad|calf|shin|adductor|groin|hip|tibial|thigh',
    'Ankle': r'ankle|achilles',
    'Foot': r'foot|toe|plantar|heel',
    'Hand': r'hand|finger|nail|thumb|hamate|pinky',
    'Arm': r'arm|biceps|triceps|forearm|flexor',
    'Elbow': r'elbow|tommy john|ucl|ulnar',
    'Upper': r'chest|pectoral|rib|ribcage|ab|sports hernia|abdominal|oblique|abdomen|core|intercostal|costochondral',
    'Knee': r'knee|acl|meniscus|patellar|patella',
    'Head': r'head|concussion|nasal|nose|facial|face|eye|jaw',
    'Neck': r'neck|cervical',
    'Internal': r'kidney|lung|append|col|covid|viral|flu|gastr'
}
'''

plot_by_injured_part('Joints')
plot_by_injured_part('Arm')
plot_by_injured_part('Upper')
plot_by_injured_part('Lower')
plot_by_injured_part('Hand')
plot_by_injured_part('Foot')


In [None]:
df = df.assign(severity = lambda x: x.days_injured > 100)


In [None]:
cond_list = [
    df['days_injured'] <= 30,
    (df['days_injured'] > 30) & (df['days_injured'] <= 60),
    (df['days_injured'] > 60)
]

choice_list = ['Mild', 'Moderate', 'Severe']

df['severity'] = np.select(cond_list, choice_list, default='Mild')
df

In [None]:
df_major_surgery = df.loc[df['injury_category'] == 'Major Surgery']
df_major_surgery_severity = df_major_surgery['severity'].value_counts()


df_major_surgery_group = df_major_surgery.groupby('injured_part')
df_major_surgery_severity_by_part = df_major_surgery_group['severity'].value_counts()

df_major_surgery_severity_by_part.plot(kind='bar')

In [None]:
def plot_severity_by_category(injury_category):
    df_category = df.loc[df['injury_category'] == injury_category]
    df_category_group = df_category.groupby('injured_part')
    df_category_severity_by_part = df_category_group['severity'].value_counts()
    
    df_category_severity_by_part.plot(kind='bar')

In [None]:

'''
injury_category_map = {
    'Major Surgery': r'surgery',
    'Muscle strain': r'strain',
    'Ligament sprain': r'sprain',
    'Tendon injury': r'tendon|impingement|tendinopathy',
    'Bone': r'fracture|bone|stress|bone bruise|dislocation',
    'Contusion & Impact': r'contusion|impact',
    'Skin': r'skin|cut|laceration|blister',
    'Illness': r'illness|viral|bacterial|covid|flu',
    'Discomfort & Tightness': r'spasm|tightness|discomfort|fatigue|stiffness|soreness',
    'Neurological': r'concussion|headache|nerve|thoracic outlet',
    'Imflammation': r'itis|inflammation|metatarsalgia',
    'Infection': r'infection|sepsis'
}
'''

plot_severity_by_category("Muscle strain")
 

In [None]:
plot_severity_by_category('Ligament sprain')


In [None]:
plot_severity_by_category('Tendon injury')
 

In [None]:
plot_severity_by_category('Bone')


In [None]:
plot_severity_by_category('Contusion & Impact')


In [None]:
plot_severity_by_category('Skin')


In [None]:
plot_severity_by_category('Discomfort & Tightness')     


In [None]:
plot_severity_by_category('Neurological')


In [None]:
plot_severity_by_category('Imflammation')

In [None]:

plot_severity_by_category('Infection')

In [None]:
def severity_counts(injury_category):
    df_category = df[df.injury_category == injury_category]
    test = df_category.loc[:, ['injured_part', 'severity']]
    return test.groupby('injured_part').value_counts()


severity_counts("Major Surgery")

In [None]:
df_imflammation = df[df.injury_category == 'Imflammation']
df_imflammation = df_imflammation.loc[:, ['pos', 'severity', 'injured_part', 'age_at_injury']]

df_imflammation.groupby(['pos', 'severity', 'injured_part']).value_counts()


In [None]:
df_imflammation = df_imflammation.loc[:, ['pos', 'severity', 'injured_part', 'age_at_injury']]

df_imflammation.groupby(['pos', 'severity', 'injured_part'])['age_at_injury'].mean()


In [None]:
chronic_acute_map = {
    'chronic': r'surgery|impingement|tendinopathy|\bstress fracture\b|stress|itis|inflammation|metatarsalgia|spasm|tightness|discomfort|fatigue|stiffness|soreness',
    'acute':  r'strain|sprain|fracture|bone bruise|dislocation|contusion|impact|skin|cut|laceration|blister|illness|viral|bacterial|covid|flu|infection|sepsis'
}

def categorize_injury(injury):
    injury_lower = str(injury).lower()
    for category, pattern in chronic_acute_map.items():
        if pd.Series(injury_lower).str.contains(pattern, case=False, na=False).any():
            return category
    return 'other'

df['chronic_acute'] = df['injury'].apply(categorize_injury)

df


In [None]:
plt.title('Number of Chronic Injuries by Days Injured')
plt.suptitle('')
plt.xlabel('Days Injured')
plt.ylabel('Frequency')
df.loc[df['chronic_acute'] ==  'chronic']['days_injured'].plot(kind='hist')

plt.show()

In [None]:
df.loc[df['chronic_acute'] ==  'acute']['days_injured'].plot(kind='hist')
plt.title('Number of Acute Injuries by Days Injured')
plt.suptitle('')
plt.xlabel('Days Injured')
plt.ylabel('Frequency')

Unnamed: 0,pos,injury,days_injured,age_at_injury,injury_category,injured_part,severity,chronic_acute
0,P,Tommy John surgery,393,31,Major Surgery,Joints,Severe,chronic
1,INF,Shoulder surgery,163,23,Major Surgery,Joints,Severe,chronic
2,INF,Hip surgery,240,34,Major Surgery,Lower,Severe,chronic
4,INF,Sprained thumb (right),27,29,Ligament sprain,Hand,Mild,acute
5,OF,Knee contusion,30,33,Contusion & Impact,Joints,Mild,acute
...,...,...,...,...,...,...,...,...
4453,P,Strained shoulder,3,28,Muscle strain,Joints,Mild,acute
4454,P,Shoulder surgery (torn labrum),384,26,Major Surgery,Joints,Severe,chronic
4455,C,Hip surgery,235,27,Major Surgery,Lower,Severe,chronic
4456,P,Tommy John surgery,192,26,Major Surgery,Joints,Severe,chronic
