<a href="https://colab.research.google.com/github/aparajitakar/nfl_yards_gained/blob/main/Combined.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import re
import math
import matplotlib.pyplot as plt
import seaborn as sns

# metrics
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
main_dir = '/content/gdrive/Shareddrives/CSCI 5523 Project/dev/csv/'
print(main_dir)

# Pre-trained model directory
train_data_dir = main_dir + 'train.csv'
test_data_dir = main_dir + 'test.csv'
print(train_data_dir)
print(test_data_dir)

/content/gdrive/Shareddrives/CSCI 5523 Project/dev/csv/
/content/gdrive/Shareddrives/CSCI 5523 Project/dev/csv/train.csv
/content/gdrive/Shareddrives/CSCI 5523 Project/dev/csv/test.csv


In [None]:
train_df = pd.read_csv(train_data_dir, low_memory=False)
test_df = pd.read_csv(test_data_dir, low_memory=False)

## Transform Functions

In [None]:
median_humidity = train_df["Humidity"].median()

In [None]:
def clean_wind_directions(val):
    if not isinstance(val, str):
        return np.nan
    aliases = [['^South$', '^s$', '^From S$', '^Southerly$'],
                ['^West$', '^From W$', '^from W$'],
                ['^North$'],
                ['^East$', '^EAST$'],
                ['^Southwest$', '^SouthWest$', '^South west$', '^From SW$'],
                ['^Northwest$'],
                ['^Northeast$', '^From NE$', '^NorthEast$', '^North East$'],
                ['^Southeast$'],
                ['^South-Southwest$', '^From SSW$', '^South Southwest$', '^S-SW$'],
                ['^West-Southwest$', '^W-SW$', '^From WSW$'],
                ['^West Northwest$', '^W-NW$'],
                ['^North/Northwest$', '^From NNW$'],
                ['^N-NE$', '^From NNE$'], 
                ['^East North East$', '^East NE$'],
                ['^East Southeast$', '^From ESE$'],
                ['^South, Southeast$', '^South Southeast$', '^From SSE$']]
    actuals = ['S', 'W', 'N', 'E', 'SW', 'NW', 'NE', 'SE', 'SSW', 'WSW', 'WNW', 'NNW', 'NNE', 'ENE', 'ESE', 'SSE']
    
    for i in range(len(aliases)):
        pattern = '|'.join(aliases[i])
        if re.match(pattern, val):
            return actuals[i]
    return val

def clean_turf(val):
    turf_aliases = [['^Grass$', '^Natural Grass$', '^Natural grass$', '^Naturall Grass$', '^natural grass$', '^grass$', '^Natural$'],
                ['^FieldTurf$', '^Field Turf$', '^FieldTurf360$', '^FieldTurf 360$', '^Field turf$', '^UBU Speed Series-S5-M$', '^UBU-Speed Series-S5-M$', '^UBU Sports Speed S5-M$', '^Twenty Four/Seven Turf$', '^Artifical$', '^A-Turf Titan$', '^SISGrass$', '^DD GrassMaster$', '^Twenty-Four/Seven Turf$']]
    turf_actuals = ['Natural', 'Artificial']

    for i in range(len(turf_aliases)):
        pattern = '|'.join(turf_aliases[i])
        if re.match(pattern, val):
            return turf_actuals[i]
    return val

def new_orientation(angle, play_direction):
    if play_direction == 0:
        new_angle = 360.0 - angle
        if new_angle == 360.0:
            new_angle = 0.0
        return new_angle
    else:
        return angle

def StadiumType(val):
    turf_aliases = [['^Outdoor$', '^Open$', '^Cloudy$', '^Bowl$', '^Outdoors$', '^OUTDOOR$', '^Oudoor$', '^Outddors$', '^Outside$', '^Ourdoor$', '^Outdor$', '^Heinz Field$'],
                ['^Indoors$', '^Indoor$', '^Indoors$']]
    turf_actuals = ['outdoor', 'indoor']

    for i in range(len(turf_aliases)):
        pattern = '|'.join(turf_aliases[i])
        if re.match(pattern, val):
            return turf_actuals[i]
    return val

def WindSpeed(X):
    X.loc[:, 'WindSpeed_dc'] = X['WindSpeed'].apply(lambda x: x.lower().replace('mph', '').strip() if not pd.isna(x) else x)
    X.loc[:, 'WindSpeed_dc'] = X['WindSpeed_dc'].apply(lambda x: x.replace(', gusts to ', '').strip().split()[0] if not pd.isna(x) else x)
    X.loc[:, 'WindSpeed_dc'] = X['WindSpeed_dc'].apply(lambda x: (int(x.split('-')[0])+int(x.split('-')[1]))/2 if not pd.isna(x) and '-' in x else x)
    X.loc[:, 'WindSpeed_dc'].replace("calm",0,inplace = True)
    X.loc[:, 'WindSpeed_dc'] = X["WindSpeed_dc"].apply(pd.to_numeric, errors='coerce').fillna(X["WindSpeed_dc"].mode())
    X.loc[:, 'WindSpeed_dc'].replace(np.nan, X["WindSpeed_dc"].mean(),inplace = True)

def strtosecs(txt):
    txt = txt.split(':')
    ans = int(txt[0])*60 + int(txt[1]) + int(txt[2])/60
    return ans

def getPositionCount(listVals, position):
    count=0
    for x in listVals:
        if position in x:
            count = int(re.findall(r'\d+', x)[0])
            break
    return count

def fix_weather(x):
    x = str(x).lower()
    x = x.replace("&", "and")
  
    for keyword in ["coudy", "clouidy"]:
        x = x.replace(keyword, "cloudy")

    x = x.replace("mostly ","")
    x = x.replace("party ", "")
    x = x.replace("partly ","")
    x = x.replace("nan", "indoor")

    if "rain" in x or "shower" in x:
        x = "rainy"
    elif "snow" in x:
        x = "snow"
    elif "cloud" in x or "overcast" in x:
        x = "cloudy"
    elif "sunny" in x:
        x = "sunny"
    elif "indoor" in x or "controlled" in x or "t: 51" in x:
        x = "indoor"
    elif "cold" in x or "cool" in x:
        x = "cold"
    elif "fair" in x or "clear" in x:
        x = "clear" 

    return x

In [None]:
def transform_data(og_data):
    data = og_data.copy() # create copy so as to not touch original data
    data.loc[:, 'WindDirection'] = data['WindDirection'].apply(clean_wind_directions)
    data.loc[:, 'Turf'] = data['Turf'].apply(clean_turf)
    data.loc[:, 'IsRusher'] = data['NflId'] == data['NflIdRusher']

    data.loc[data.Season==2017,'S'] = data.loc[data.Season==2017,'S']*1.1320096503100632
    data.loc[data.Season==2017,'A'] = data.loc[data.Season==2017,'A']*1.1210484653841495

    data['PlayDirection'] = data['PlayDirection'].apply(lambda x: x.strip() == 'right')
    data['X'] = data.apply(lambda row: row['X'] if row['PlayDirection'] else 120-row['X'], axis=1)
    data['Orientation'] = data.apply(lambda row: new_orientation(row['Orientation'], row['PlayDirection']), axis=1)
    data['Dir'] = data.apply(lambda row: new_orientation(row['Dir'], row['PlayDirection']), axis=1)

    data["DP_DL"] = data["DefensePersonnel"].apply(lambda x: int(x.split()[0]))
    data["DP_LB"] = data["DefensePersonnel"].apply(lambda x: int(x.split()[2]))
    data["DP_DB"] = data["DefensePersonnel"].apply(lambda x: int(x.split()[4]))
    data["DP_RB"] = data["DefensePersonnel"].apply(lambda x: 1 if "RB" in x else 0)
    data["DP_OL"] = data["DefensePersonnel"].apply(lambda x: 1 if "OL" in x else 0)
    data["Height_in_cms"] = data["PlayerHeight"].apply(lambda x:(int(x.split("-")[0])*12 + int(x.split("-")[1]))*2.54)

    data['StadiumType'].fillna(value= "outdoor", inplace=True)
    data["StadiumType"] = data["StadiumType"].apply(StadiumType)
    data["StadiumType"] = data["StadiumType"].apply(lambda x: x.lower())
    data["StadiumType"] = data["StadiumType"].apply(lambda x: "outdoor" if "closed" in x else x)
    data["StadiumType"] = data["StadiumType"].apply(lambda x: "indoor" if "open" in x else x)
    data["WindSpeed"] = data["WindSpeed"].astype(str)
    data.loc[:, 'WindSpeed_dc'] = data['WindSpeed'].apply(lambda x: x.lower().replace('mph', '').strip() if not pd.isna(x) else x)
    data.loc[:, 'WindSpeed_dc'] = data['WindSpeed_dc'].apply(lambda x: x.replace(', gusts to ', '').strip().split()[0] if not pd.isna(x) else x)
    data.loc[:, 'WindSpeed_dc'] = data['WindSpeed_dc'].apply(lambda x: (int(x.split('-')[0])+int(x.split('-')[1]))/2 if not pd.isna(x) and '-' in x else x)
    data.loc[:, 'WindSpeed_dc'] = data['WindSpeed_dc'].replace("calm",0)
    data.loc[:, 'WindSpeed_dc'] = data["WindSpeed_dc"].apply(pd.to_numeric, errors='coerce').fillna(data["WindSpeed_dc"].mode())
    data.loc[:, 'WindSpeed_dc'] = data['WindSpeed_dc'].replace(np.nan, data["WindSpeed_dc"].mean())

    data.loc[:, "GameClock"] = data["GameClock"].apply(strtosecs)

    # Setting up yard bins length
    binsize = 2
    aux_bins = []
    aux_bins.append([-np.inf, -1])
    aux_bins.append(list(range(0,11,binsize)))
    aux_bins.append([15,20,np.inf])
    # flatten list
    bins_yds_flat = [item for sublist in aux_bins for item in sublist]
    bins_yds_flat
    data["YardBins"] = pd.cut(data["Yards"], bins_yds_flat)

    data["HumidityNoNull"] = data["Humidity"].copy()
    data.loc[:, "HumidityNoNull"] = data["HumidityNoNull"].replace(np.nan, median_humidity)

    data.FieldPosition.fillna('MFL', inplace=True)
    df_players = pd.DataFrame({'OffensePersonnel' : []})
    df_players['OffensePersonnel'] = data['OffensePersonnel'].str.replace('\d+', '')
    df_players = df_players['OffensePersonnel'].str.split(',', expand=True)
    unique_player_positions = []
    unique_player_positions.append(df_players[0].unique().tolist())
    unique_player_positions.append(df_players[1].unique().tolist())
    unique_player_positions.append(df_players[2].unique().tolist())
    unique_player_positions.append(df_players[3].unique().tolist())
    unique_player_positions.append(df_players[4].unique().tolist())
    data["OP_RB"] = data["OffensePersonnel"].apply(lambda x: getPositionCount(x.split(','),'RB'))
    data["OP_OL"] = data["OffensePersonnel"].apply(lambda x: getPositionCount(x.split(','),'OL'))
    data["OP_QB"] = data["OffensePersonnel"].apply(lambda x: getPositionCount(x.split(','),'QB'))
    data["OP_TE"] = data["OffensePersonnel"].apply(lambda x: getPositionCount(x.split(','),'TE'))
    data["OP_WR"] = data["OffensePersonnel"].apply(lambda x: getPositionCount(x.split(','),'WR'))
    data["OP_DL"] = data["OffensePersonnel"].apply(lambda x: getPositionCount(x.split(','),'DL'))
    data["OP_LB"] = data["OffensePersonnel"].apply(lambda x: getPositionCount(x.split(','),'LB'))
    data["OP_DB"] = data["OffensePersonnel"].apply(lambda x: getPositionCount(x.split(','),'DB'))

    data['PlayerBirthYear'] = data['PlayerBirthDate'].apply(lambda x : int(x.split("/")[-1]))
    data['Age'] = data["Season"] - data['PlayerBirthYear']
    data.loc[:, 'GameWeather'] = data["GameWeather"].apply(fix_weather)

    return data

In [None]:
train_data = transform_data(train_df)
test_data = transform_data(test_df)

In [None]:
# drop columns now
drop_cols = ['Unnamed: 0', 'unique_idx', 'NflId', 'NflIdRusher', 'JerseyNumber', 'Yards',
             'PlayerHeight', 'PlayerBirthDate', 'PlayerCollegeName', 'WindSpeed', 'Humidity', 'PlayerBirthYear', 'TimeHandoff', 'TimeSnap']
train_data.drop(drop_cols, axis=1, inplace=True)
test_data.drop(drop_cols, axis=1, inplace=True)

## Models

In [None]:
# # CLUSTERED Data -- uncomment if we want to use it instead of the regular dataset
# train_data = pd.read_csv('/content/gdrive/Shareddrives/CSCI 5523 Project/dev/csv/merged/clustered_data_train.csv', low_memory=False)
# test_data = pd.read_csv('/content/gdrive/Shareddrives/CSCI 5523 Project/dev/csv/merged/clustered_data_test.csv', low_memory=False)

## Oversampling and Undersampling

In [None]:
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler

In [None]:
def sample(X_train, y_train, sampling_type = 'over'):
  unique_labels = y_train.YardBins.unique()

  if(sampling_type == 'over'):
    number_of_rows = y_train.YardBins.value_counts().max()
    return RandomOverSampler(sampling_strategy={y_label: number_of_rows for y_label in unique_labels}).fit_resample(X_train, y_train)
  else:
    number_of_rows = y_train.YardBins.value_counts().min()
    return RandomUnderSampler(sampling_strategy={y_label: number_of_rows for y_label in unique_labels}).fit_resample(X_train, y_train)

## Random Forests

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
# Train
X_train = train_data.dropna()
y_train = X_train[["YardBins"]].astype(str)
X_train = X_train.drop(["GameId", "PlayId", "YardBins", "DisplayName"], axis = 1)
X_train = pd.get_dummies(X_train)

# Test
X_test = test_data.dropna()
y_test = X_test[["YardBins"]].astype('str')
X_test = X_test.drop(["GameId", "PlayId", "YardBins", "DisplayName"], axis = 1)
X_test = pd.get_dummies(X_test)

X_test = X_test.reindex(columns = X_train.columns, fill_value=0) # to remove any new columns added in OHE of test

# uncomment if we want to use over/undersampling
# X_train, y_train = sample(X_train, y_train, 'under')
display(y_train.value_counts())

YardBins    
(0.0, 2.0]      98887
(2.0, 4.0]      88152
(4.0, 6.0]      52140
(-inf, -1.0]    43184
(-1.0, 0.0]     38344
(6.0, 8.0]      27158
(10.0, 15.0]    20856
(8.0, 10.0]     17072
(20.0, inf]      9196
(15.0, 20.0]     8337
dtype: int64

In [None]:
# define the model
model = RandomForestClassifier()

# fit the model on the whole dataset
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print(classification_report(y_test, y_pred))

  """


              precision    recall  f1-score   support

 (-1.0, 0.0]       0.11      0.07      0.08     16410
(-inf, -1.0]       0.13      0.09      0.10     18260
  (0.0, 2.0]       0.26      0.40      0.31     42590
(10.0, 15.0]       0.06      0.03      0.04      8558
(15.0, 20.0]       0.02      0.01      0.01      3454
  (2.0, 4.0]       0.24      0.32      0.27     38717
 (20.0, inf]       0.03      0.01      0.02      4025
  (4.0, 6.0]       0.15      0.12      0.13     21757
  (6.0, 8.0]       0.06      0.03      0.04     11725
 (8.0, 10.0]       0.07      0.03      0.04      7546

    accuracy                           0.21    173042
   macro avg       0.11      0.11      0.11    173042
weighted avg       0.17      0.21      0.18    173042



## Decision Tree

In [None]:
from sklearn import tree

In [None]:
# Train
X_train = train_data.dropna()
y_train = X_train[["YardBins"]].astype('str')
X_train = X_train.drop(["GameId", "PlayId", "YardBins", "DisplayName", "GameClock", "Dis"], axis = 1)
X_train = pd.get_dummies(X_train)

# Test
X_test = test_data.dropna()
y_test = X_test[["YardBins"]].astype('str')
X_test = X_test.drop(["GameId", "PlayId", "YardBins", "DisplayName", "GameClock", "Dis"], axis = 1)
X_test = pd.get_dummies(X_test)

X_test = X_test.reindex(columns = X_train.columns, fill_value=0) # to remove any new columns added in OHE of test

X_train, y_train = sample(X_train, y_train, 'under')

#Decision Tree 
max_depth = 200
model = tree.DecisionTreeClassifier(random_state = 1, max_depth = max_depth)
#model = RandomForestClassifier()
model.fit(X_train,y_train)
y_pred = model.predict(X_test)

# target_names = ["Class 1", "Class 2","Class 3", "Class 4", "Class 5", "Class 6", "Class 7","Class 8","Class 9", "Class 10"]
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

 (-1.0, 0.0]       0.12      0.19      0.15     16410
(-inf, -1.0]       0.12      0.17      0.14     18260
  (0.0, 5.0]       0.56      0.35      0.43     94617
(10.0, 15.0]       0.06      0.10      0.07      8558
(15.0, 20.0]       0.02      0.05      0.03      3454
 (20.0, inf]       0.02      0.04      0.03      4025
 (5.0, 10.0]       0.16      0.20      0.18     27718

    accuracy                           0.26    173042
   macro avg       0.15      0.15      0.15    173042
weighted avg       0.36      0.26      0.30    173042



## Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
#Logistic Regression 

logit = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter = 800)
logit.fit(X_train,y_train)
y_pred = logit.predict(X_test)
print(accuracy_score(y_test, y_pred))

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


0.13750418973428416


## Gaussian Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB

In [None]:
#Naive Bayes 

Nb = GaussianNB()
Nb.fit(X_train,y_train)
pred = Nb.predict(X_test)
print(accuracy_score(y_test, pred))

  y = column_or_1d(y, warn=True)


0.0384762080882098


In [None]:
display(y_train.value_counts())

YardBins    
(9.0, 15.0]     8337
(6.0, 9.0]      8337
(3.0, 6.0]      8337
(20.0, inf]     8337
(15.0, 20.0]    8337
(0.0, 3.0]      8337
(-inf, -1.0]    8337
(-1.0, 0.0]     8337
dtype: int64

## SVM

In [None]:
from sklearn import svm

In [None]:
clf = svm.SVC(kernel='rbf',gamma='scale') ## 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'
clf.fit(X_train, y_train)

train_accuracy = clf.score(X_train, y_train)
print('Train Accuracy %.3f'%(train_accuracy))

y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred, labels=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))

  y = column_or_1d(y, warn=True)


## XGBoost

For XGBoost, we do One-Hot Encoding for all categorical features and combine 22 rows into a single row. 

In [None]:
import xgboost as xgb
from sklearn.model_selection import StratifiedKFold

In [None]:
rusher_X_train = train_data[train_data['IsRusher'] == True]
rusher_X_test = test_data[test_data['IsRusher'] == True]
y_train = rusher_X_train['YardBins']
y_test = rusher_X_test['YardBins']

In [None]:
# drop bins from train and test
X_train = train_data.drop("YardBins", axis=1)
X_test = test_data.drop("YardBins", axis=1)

In [None]:
def combine_rows(X):
    unused_columns = ["GameId","PlayId","Team", "index", " 	Unnamed: 0"]
    unique_columns = ["X", "Y", "S", "A", "Dis", "Orientation", "Dir", "PlayerWeight", "Height_in_cms"]
    training_cols = []
    for c in X.columns:
        if c not in unique_columns + unused_columns:
            training_cols.append(c)
    for c in unique_columns:
        for i in range(22):
            training_cols.append(c+str(i))

    training_data=np.zeros((X.shape[0]//22,len(training_cols)))
    for i in range(0,X.shape[0],22):#for all plays
        count=0
        for c in training_cols:
            if c in X: #not in unique_columns and not in unused_columns
                training_data[i//22][count] = X[c][i]
                count+=1
        for c in unique_columns:
            for j in range(22):
                training_data[i//22][count] = X[c][i+j]
                count+=1
    X = pd.DataFrame(data=training_data, columns=training_cols)

    return X

In [None]:
X_train = pd.get_dummies(X_train)
X_test = pd.get_dummies(X_test)
X_test = X_test.reindex(columns = X_train.columns, fill_value=0) # to remove any new columns added in OHE of test

In [None]:
X_train = X_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)

In [None]:
combined_X_train = combine_rows(X_train)
combined_X_test = combine_rows(X_test)

In [None]:
y_train = y_train.reset_index(drop=True).astype(str)
y_test = y_test.reset_index(drop=True).astype(str)

In [None]:
# maintaining order when we pass the array as an XGBoost array
mapping = {'(-inf, -1.0]': 0, '(-1.0, 0.0]': 1, '(0.0, 2.0]': 2, '(2.0, 4.0]': 3, '(4.0, 6.0]': 4, '(6.0, 8.0]': 5, '(8.0, 10.0]': 6, '(10.0, 15.0]': 7, '(15.0, 20.0]': 8, '(20.0, inf]': 9}
y_train_enc = np.array(y_train.replace(mapping))
y_test_enc = np.array(y_test.replace(mapping))

In [None]:
kfold = 5
skf = StratifiedKFold(n_splits=kfold)

In [None]:
params = {
    'booster': 'dart',
    'max_depth': 10,
    'objective': 'multi:softmax',  # error evaluation for multiclass training
    'num_class': 10,
    'eval_metric': 'mlogloss', 
    'nthread': 25,
    'verbosity': 1,
    'learning_rate': 0.1,
    'min_child_weight': 2,
    'rate_drop': 0.3,
    'skip_drop': 0.3
}

In [None]:
for i, (train_index, test_index) in enumerate(skf.split(combined_X_train, y_train_enc)):
    print('[Fold %d/%d]' % (i + 1, kfold))
    X_train1, X_valid = combined_X_train.values[train_index], combined_X_train.values[test_index]
    y_train1, y_valid = y_train_enc[train_index], y_train_enc[test_index]
    # Convert data into XGBoost format
    d_train = xgb.DMatrix(X_train1, y_train1)
    d_valid = xgb.DMatrix(X_valid, y_valid)
    d_test = xgb.DMatrix(combined_X_test.values)
    watchlist = [(d_train, 'train'), (d_valid, 'valid')]

    # Train the model. We pass in a max of 500 rounds (with early stopping after 50)
    mdl = xgb.train(params, d_train, 500, watchlist, early_stopping_rounds=50, verbose_eval=100)

    print('[Fold %d/%d Prediciton:]' % (i + 1, kfold))
    # Predict on our test data
    p_test = mdl.predict(d_test, ntree_limit=mdl.best_ntree_limit)
    print(classification_report(y_test_enc, p_test))

[Fold 1/5]
[0]	train-mlogloss:2.21711	valid-mlogloss:2.2699
Multiple eval metrics have been passed: 'valid-mlogloss' will be used for early stopping.

Will train until valid-mlogloss hasn't improved in 50 rounds.
[100]	train-mlogloss:1.36816	valid-mlogloss:2.05772
Stopping. Best iteration:
[139]	train-mlogloss:1.15438	valid-mlogloss:2.01942

[Fold 1/5 Prediciton:]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.19      0.05      0.08       991
           1       0.22      0.02      0.04       860
           2       0.26      0.58      0.36      2297
           3       0.24      0.41      0.31      2089
           4       0.14      0.04      0.07      1172
           5       0.09      0.00      0.00       633
           6       0.33      0.00      0.00       409
           7       0.25      0.00      0.01       456
           8       0.00      0.00      0.00       188
           9       0.00      0.00      0.00       208

    accuracy                           0.25      9303
   macro avg       0.17      0.11      0.09      9303
weighted avg       0.21      0.25      0.18      9303

[Fold 2/5]
[0]	train-mlogloss:2.21824	valid-mlogloss:2.27222
Multiple eval metrics have been passed: 'valid-mlogloss' will be used for early stopping.

Will train until valid-mlogloss hasn't improved in 50 rounds.
[100]	train-mlogloss:1.3721

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.16      0.06      0.09       991
           1       0.23      0.02      0.04       860
           2       0.26      0.55      0.35      2297
           3       0.24      0.42      0.30      2089
           4       0.15      0.04      0.07      1172
           5       0.29      0.00      0.01       633
           6       0.00      0.00      0.00       409
           7       0.27      0.01      0.01       456
           8       0.00      0.00      0.00       188
           9       0.00      0.00      0.00       208

    accuracy                           0.24      9303
   macro avg       0.16      0.11      0.09      9303
weighted avg       0.21      0.24      0.18      9303

[Fold 3/5]
[0]	train-mlogloss:2.22047	valid-mlogloss:2.27069
Multiple eval metrics have been passed: 'valid-mlogloss' will be used for early stopping.

Will train until valid-mlogloss hasn't improved in 50 rounds.
[100]	train-mlogloss:1.3643

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.22      0.05      0.08       991
           1       0.21      0.03      0.05       860
           2       0.25      0.56      0.35      2297
           3       0.25      0.41      0.31      2089
           4       0.14      0.05      0.07      1172
           5       0.00      0.00      0.00       633
           6       0.17      0.00      0.00       409
           7       0.11      0.00      0.01       456
           8       0.00      0.00      0.00       188
           9       0.00      0.00      0.00       208

    accuracy                           0.24      9303
   macro avg       0.13      0.11      0.09      9303
weighted avg       0.19      0.24      0.18      9303

[Fold 5/5]
[0]	train-mlogloss:2.21158	valid-mlogloss:2.26887
Multiple eval metrics have been passed: 'valid-mlogloss' will be used for early stopping.

Will train until valid-mlogloss hasn't improved in 50 rounds.
