In [1]:
# Import dependencies
import pandas as pd
from imblearn.ensemble import BalancedRandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score, confusion_matrix
from imblearn.metrics import classification_report_imbalanced

In [None]:
# Load file into dataframe
file_path = ""
nfl_df = pd.read_csv(file_path)

In [None]:
# Drop columns that we won't use
bad_columns = []
nfl_df = nfl_df.drop(bad_columns, axis=1)

In [None]:
nfl_df['Position_Type'].value_counts()

In [None]:
def rf_model(df):
    # Create dataframe for features
    X = df.drop("Drafted", axis=1)
    
    # Create target variable
    y = df.Drafted.values
    
    # Seperate into training and testing sets
    X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=5)
    
    # Creating a StandardScaler instance.
    scaler = StandardScaler()
    # Fitting the Standard Scaler with the training data.
    X_scaler = scaler.fit(X_train)
    # Scaling the data.
    X_train_scaled = X_scaler.transform(X_train)
    X_test_scaled = X_scaler.transform(X_test)
    
    # Create and fit the model
    rf_model = BalancedRandomForestClassifier(n_estimators = 128, random_state = 10)
    rf_model.fit(X_train_scaled, y_train)
    
    # Use the model to predict
    y_pred = rf_model.predict(X_test_scaled)
    
    # Calculated the balanced accuracy score
    accuracy = balanced_accuracy_score(y_test, y_pred)
    
    # Display the confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    
    # Print the imbalanced classification report
    class_report = classification_report_imbalanced(y_test, y_pred)
    
    # List the features sorted in descending order by feature importance
    features = sorted(zip(model.feature_importances_, X.columns), reverse=True)
    
    #Put all useful results into a dictionary
    results = {'Accuracy': accuracy, 'cm': cm, 'class_report': class_report, 'features': features}
    
    # Return that dictionary
    return results

In [None]:
# Create an empty list to hold results
results = []

# Go through each position type
for position_type in nfl_df['Position_Type'].unique():
    # Make a dataframe of players just in that position
    df = nfl_df.loc[nfl_df['Position_type']==position_type]
    # Make a random forest model for this position and append the results to the results list
    results.append(rf_model(df))