In [1]:
# injury_prediction_pipeline/preprocessing.py
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler


def load_and_harmonize_datasets(afl_path, nfl_path, soccer_path):
afl = pd.read_csv(afl_path)
nfl = pd.read_csv(nfl_path)
soccer = pd.read_csv(soccer_path)


afl['sport'] = 'AFL'
nfl['sport'] = 'NFL'
soccer['sport'] = 'Soccer'


common_cols = ['player_id', 'week', 'RPE', 'TotalDistance', 'HighSpeedDistance',
'ACWR', 'FatigueIndex', 'PriorInjuryFlag', 'Injury', 'sport']


afl = afl[common_cols]
nfl = nfl[common_cols]
soccer = soccer[common_cols]


df = pd.concat([afl, nfl, soccer], ignore_index=True)
df.dropna(subset=['Injury'], inplace=True)
return df


def scale_features(df, feature_cols):
scaler = StandardScaler()
df[feature_cols] = scaler.fit_transform(df[feature_cols])
return df, scaler

IndentationError: expected an indented block after function definition on line 7 (2879617844.py, line 8)

In [2]:
# injury_prediction_pipeline/train_models.py
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score




def train_logistic(X_train, y_train):
model = LogisticRegression(max_iter=1000, class_weight='balanced')
model.fit(X_train, y_train)
return model




def train_rf(X_train, y_train):
model = RandomForestClassifier(n_estimators=100, class_weight='balanced')
model.fit(X_train, y_train)
return model




def train_xgb(X_train, y_train):
model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X_train, y_train)
return model




def build_lstm(input_shape):
model = Sequential()
model.add(Bidirectional(LSTM(64, return_sequences=False), input_shape=input_shape))
model.add(Dropout(0.3))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer=Adam(0.001), metrics=['accuracy'])
return model

IndentationError: expected an indented block after function definition on line 10 (4132269241.py, line 11)

In [3]:
# injury_prediction_pipeline/evaluate.py
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, roc_curve, auc




def evaluate_model(model, X_test, y_test, model_name="Model"):
y_pred = model.predict(X_test)
if hasattr(model, "predict_proba"):
y_prob = model.predict_proba(X_test)[:, 1]
else:
y_prob = y_pred


print(f"\n{model_name} Report")
print(classification_report(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_prob))


cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt="d")
plt.title(f"Confusion Matrix - {model_name}")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()


fpr, tpr, _ = roc_curve(y_test, y_prob)
plt.plot(fpr, tpr, label=f"{model_name} AUC: {auc(fpr, tpr):.2f}")
plt.plot([0, 1], [0, 1], linestyle='--')
plt.title(f"ROC Curve - {model_name}")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend()
plt.show()

IndentationError: expected an indented block after function definition on line 9 (3939981724.py, line 10)