In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import pandas as pd
from pathlib import Path
from collections import Counter

In [3]:
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced

In [4]:
#load the data
file_path = Path('SanClemente_MachineLearningDataSet/eunjais_clemente_2020.csv')
sc2020_df = pd.read_csv(file_path)

# testing dataset will be 
###  file_path = Path('SanClemente_MachineLearningDataSet/sanclemente_2021.csv')
sc2020_df.head()

Unnamed: 0.1,Unnamed: 0,#YY,MM,DD,hh,mm,WSP,GST,WVHT,DPD,APD,MWD,PRES,ATMP,DEWP,Wave_Size,Wind_Speed
0,4,2020,1,1,0,40,3.7,5.1,2.19,16.0,9.15,263,1016.4,16.2,11.3,Ideal (3-9 ft),Moderate (3-19 mph)
1,10,2020,1,1,1,40,2.0,3.8,2.07,13.79,8.97,267,1016.7,16.0,11.7,Ideal (3-9 ft),Ideal(<3 mph)
2,16,2020,1,1,2,40,4.9,6.4,1.97,11.43,8.99,263,1017.0,16.3,10.9,Small (<2 ft),Moderate (3-19 mph)
3,22,2020,1,1,3,40,5.3,6.6,2.11,14.81,9.08,267,1017.4,15.9,12.1,Ideal (3-9 ft),Moderate (3-19 mph)
4,28,2020,1,1,4,40,5.9,7.7,2.01,14.81,9.04,262,1017.9,16.2,11.7,Ideal (3-9 ft),Moderate (3-19 mph)


In [5]:
file_path = Path('SanClemente_MachineLearningDataSet/eunjais_clemente_2021.csv')
sc2021_df = pd.read_csv(file_path)
sc2021_df.head()

Unnamed: 0.1,Unnamed: 0,#YY,MM,DD,hh,mm,WSP,GST,WVHT,DPD,APD,MWD,PRES,ATMP,DEWP,Wave_Size,Wind_Speed
0,10,2021,1,1,1,40,9.0,11.6,2.98,9.09,6.8,285,1013.9,14.5,10.4,Ideal (3-9 ft),Moderate (3-19 mph)
1,16,2021,1,1,2,40,9.3,11.8,2.94,17.39,6.92,293,1014.1,14.6,10.7,Ideal (3-9 ft),Moderate (3-19 mph)
2,28,2021,1,1,4,40,8.2,10.5,2.79,9.09,6.97,285,1014.2,14.8,9.8,Ideal (3-9 ft),Moderate (3-19 mph)
3,34,2021,1,1,5,40,7.5,9.8,2.65,16.0,7.09,286,1014.6,14.9,9.6,Ideal (3-9 ft),Moderate (3-19 mph)
4,40,2021,1,1,6,40,8.0,10.2,2.5,16.0,7.21,288,1014.8,15.2,9.2,Ideal (3-9 ft),Moderate (3-19 mph)


In [6]:
sc_df = pd.concat([sc2020_df,sc2021_df])
sc_df= sc_df.rename(columns={'#YY':'YY'})
sc_df['YY'] = sc_df['YY'].astype(str)
sc_df.tail()

Unnamed: 0.1,Unnamed: 0,YY,MM,DD,hh,mm,WSP,GST,WVHT,DPD,APD,MWD,PRES,ATMP,DEWP,Wave_Size,Wind_Speed
8539,51325,2021,12,31,19,40,9.1,11.0,1.95,11.43,5.93,292,1008.0,14.1,11.1,Small (<2 ft),Moderate (3-19 mph)
8540,51331,2021,12,31,20,40,9.4,11.3,2.04,12.12,5.75,279,1007.8,13.9,10.5,Ideal (3-9 ft),Moderate (3-19 mph)
8541,51337,2021,12,31,21,40,9.4,12.1,2.2,6.25,5.92,284,1007.6,13.9,10.2,Ideal (3-9 ft),Moderate (3-19 mph)
8542,51343,2021,12,31,22,40,10.1,12.5,2.43,6.67,6.08,287,1007.3,13.7,9.8,Ideal (3-9 ft),Moderate (3-19 mph)
8543,51349,2021,12,31,23,40,11.5,14.9,2.74,7.69,6.18,276,1007.6,13.7,9.3,Ideal (3-9 ft),Moderate (3-19 mph)


In [7]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(sc_df.dtypes)

Unnamed: 0      int64
YY             object
MM              int64
DD              int64
hh              int64
mm              int64
WSP           float64
GST           float64
WVHT          float64
DPD           float64
APD           float64
MWD             int64
PRES          float64
ATMP          float64
DEWP          float64
Wave_Size      object
Wind_Speed     object
dtype: object


In [8]:
sc_df = sc_df.dropna(axis='columns', how='all')
sc_df = sc_df.dropna()

# convert interest rate to numerical
sc_df['WVHT'] = sc_df['WVHT'].astype('float') 

# Convert the target column values to surfing_ideal and surfing__unideal vased on wave size
x = { "Ideal (3-9 ft)": 'surfing_ideal'}   
sc_df = sc_df.replace(x)
x = dict.fromkeys(["Small (<2 ft)", "Large (10-12 ft)"], 'surfing_unideal')    
sc_df = sc_df.replace(x)

# Convert the target column values to surfing_ideal and surfing__unideal vased on wind speed
x = { "Ideal(<3 mph)": 'surfing_ideal', "Moderate (3-19 mph)":'surfing_ideal'}   
sc_df = sc_df.replace(x)
x = dict.fromkeys(["Dangerous (>20 mph)"], 'surfing_unideal')    
sc_df = sc_df.replace(x)


sc_df.reset_index(inplace=True, drop=True)
sc_df.head(5)

Unnamed: 0.1,Unnamed: 0,YY,MM,DD,hh,mm,WSP,GST,WVHT,DPD,APD,MWD,PRES,ATMP,DEWP,Wave_Size,Wind_Speed
0,4,2020,1,1,0,40,3.7,5.1,2.19,16.0,9.15,263,1016.4,16.2,11.3,surfing_ideal,surfing_ideal
1,10,2020,1,1,1,40,2.0,3.8,2.07,13.79,8.97,267,1016.7,16.0,11.7,surfing_ideal,surfing_ideal
2,16,2020,1,1,2,40,4.9,6.4,1.97,11.43,8.99,263,1017.0,16.3,10.9,surfing_unideal,surfing_ideal
3,22,2020,1,1,3,40,5.3,6.6,2.11,14.81,9.08,267,1017.4,15.9,12.1,surfing_ideal,surfing_ideal
4,28,2020,1,1,4,40,5.9,7.7,2.01,14.81,9.04,262,1017.9,16.2,11.7,surfing_ideal,surfing_ideal


In [119]:
#2020 set for training
#2021 set for testing

# try and make a separate column for both csvs that separates WVHT into 
# different swell height / surfable wave category
# so that it can be used for the surfing ideal vs unideal categorization in ML

## also maybe drop all the 99 / 999 columns and merge the date time

In [35]:
df_bin_encode = pd.get_dummies(sc_df, columns=["MM",
                                                   "DD",
                                                   "hh",
                                                   "mm",
                                                   "WSP",
                                            "GST",
                                            "DPD",
                                            "MWD",
                                            "PRES",
                                            "ATMP",
                                            "DEWP",
                                                  "Wind_Speed"])

YY_num = {
    '2020' : 1,
    '2021' : 2
}

# Months' and Loan Status condition encoded using the dictionary values
df_bin_encode["years_num"] = df_bin_encode["YY"].apply(lambda x: YY_num[x])
# Drop the issue_d columns
df_bin_encode = df_bin_encode.drop(["YY"], axis=1)

df_bin_encode.head()

Unnamed: 0.1,Unnamed: 0,WVHT,APD,Wave_Size,MM_1,MM_2,MM_3,MM_4,MM_5,MM_6,...,DEWP_21.5,DEWP_21.6,DEWP_21.7,DEWP_21.8,DEWP_21.9,DEWP_22.0,DEWP_22.1,DEWP_999.0,Wind_Speed_surfing_ideal,years_num
0,4,2.19,9.15,surfing_ideal,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
1,10,2.07,8.97,surfing_ideal,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
2,16,1.97,8.99,surfing_unideal,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
3,22,2.11,9.08,surfing_ideal,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
4,28,2.01,9.04,surfing_ideal,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1


In [38]:
# Create our features
X = df_bin_encode.drop(columns=["Wave_Size"])

# Create our target
y = pd.DataFrame(df_bin_encode["Wave_Size"])

In [39]:

X.describe()

Unnamed: 0.1,Unnamed: 0,WVHT,APD,MM_1,MM_2,MM_3,MM_4,MM_5,MM_6,MM_7,...,DEWP_21.5,DEWP_21.6,DEWP_21.7,DEWP_21.8,DEWP_21.9,DEWP_22.0,DEWP_22.1,DEWP_999.0,Wind_Speed_surfing_ideal,years_num
count,17129.0,17129.0,17129.0,17129.0,17129.0,17129.0,17129.0,17129.0,17129.0,17129.0,...,17129.0,17129.0,17129.0,17129.0,17129.0,17129.0,17129.0,17129.0,17129.0,17129.0
mean,25891.090723,1.472187,7.170257,0.086286,0.079514,0.073501,0.083601,0.086753,0.083776,0.081616,...,0.000409,0.000409,0.000642,0.000292,0.000292,0.000117,0.000117,0.000409,1.0,1.497752
std,14954.049177,0.504612,1.388752,0.280795,0.270548,0.260965,0.276796,0.281482,0.27706,0.273787,...,0.020212,0.020212,0.025334,0.017083,0.017083,0.010805,0.010805,0.020212,0.0,0.50001
min,4.0,0.52,4.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
25%,12958.0,1.13,6.18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
50%,25843.0,1.37,7.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
75%,38851.0,1.69,7.93,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0
max,52139.0,5.64,15.07,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0


In [40]:
y.head(10)

Unnamed: 0,Wave_Size
0,surfing_ideal
1,surfing_ideal
2,surfing_unideal
3,surfing_ideal
4,surfing_ideal
5,surfing_unideal
6,surfing_unideal
7,surfing_unideal
8,surfing_unideal
9,surfing_unideal


In [41]:

# Check the size of our target values
y['Wave_Size'].value_counts()

surfing_unideal    14959
surfing_ideal       2170
Name: Wave_Size, dtype: int64

In [42]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
Counter(y_train)

Counter({'Wave_Size': 1})

In [43]:
X_train

Unnamed: 0.1,Unnamed: 0,WVHT,APD,MM_1,MM_2,MM_3,MM_4,MM_5,MM_6,MM_7,...,DEWP_21.5,DEWP_21.6,DEWP_21.7,DEWP_21.8,DEWP_21.9,DEWP_22.0,DEWP_22.1,DEWP_999.0,Wind_Speed_surfing_ideal,years_num
14476,35379,1.38,4.82,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,2
438,2644,0.83,6.44,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
8725,753,2.06,9.81,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,2
9936,8047,1.32,6.36,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,2
8500,51527,1.89,8.27,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11284,16188,2.11,6.79,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,1,2
11964,20268,1.85,5.78,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,1,2
5390,32803,1.01,7.30,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
860,5181,1.22,6.52,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1


In [44]:
X_test

Unnamed: 0.1,Unnamed: 0,WVHT,APD,MM_1,MM_2,MM_3,MM_4,MM_5,MM_6,MM_7,...,DEWP_21.5,DEWP_21.6,DEWP_21.7,DEWP_21.8,DEWP_21.9,DEWP_22.0,DEWP_22.1,DEWP_999.0,Wind_Speed_surfing_ideal,years_num
11542,17736,1.20,7.73,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,1,2
14313,34395,1.38,4.59,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,2
123,754,2.15,7.83,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
14229,33879,0.82,7.30,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,2
16092,45115,0.94,7.09,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15452,41251,1.69,6.07,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,2
9100,3015,1.34,7.90,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,2
965,5811,1.63,6.90,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
444,2680,0.88,7.10,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1


In [45]:
# Resample the training data with the BalancedRandomForestClassifier
# Create a random forest classifier
from imblearn.ensemble import BalancedRandomForestClassifier
brf_model = BalancedRandomForestClassifier(n_estimators=100, random_state=42)

# Fitting the model
brf_model = brf_model.fit(X_train, y_train)

# Making predictions using the testing data
predictions = brf_model.predict(X_test)
predictions

array(['surfing_unideal', 'surfing_unideal', 'surfing_ideal', ...,
       'surfing_unideal', 'surfing_unideal', 'surfing_ideal'],
      dtype=object)

In [46]:
y_train

Unnamed: 0,Wave_Size
14476,surfing_unideal
438,surfing_unideal
8725,surfing_ideal
9936,surfing_unideal
8500,surfing_unideal
...,...
11284,surfing_ideal
11964,surfing_unideal
5390,surfing_unideal
860,surfing_unideal


In [47]:
y_test

Unnamed: 0,Wave_Size
11542,surfing_unideal
14313,surfing_unideal
123,surfing_ideal
14229,surfing_unideal
16092,surfing_unideal
...,...
15452,surfing_unideal
9100,surfing_unideal
965,surfing_unideal
444,surfing_unideal


In [48]:
# Calculate the balanced accuracy score
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

y_pred = brf_model.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.9996956786366402

In [49]:
# Display the confusion matrix
# Calculating the confusion matrix
cm = confusion_matrix(y_test, predictions)

# Create a DataFrame from the confusion matrix
cm_df = pd.DataFrame(cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])

cm_df

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,724,0
Actual 1,3,4926


In [50]:
# Print the imbalanced classification report
print(classification_report_imbalanced(y_test, y_pred))

                       pre       rec       spe        f1       geo       iba       sup

  surfing_ideal       1.00      1.00      1.00      1.00      1.00      1.00       724
surfing_unideal       1.00      1.00      1.00      1.00      1.00      1.00      4929

    avg / total       1.00      1.00      1.00      1.00      1.00      1.00      5653



In [51]:
# List the features sorted in descending order by feature importance
# Calculate feature importance in the Random Forest model
importances = brf_model.feature_importances_
sorted(zip(brf_model.feature_importances_, X.columns), reverse=True)

[(0.4766339539503239, 'WVHT'),
 (0.05429664020069681, 'Unnamed: 0'),
 (0.042205856998581495, 'APD'),
 (0.023606906993233617, 'MM_1'),
 (0.015499586218680592, 'MM_7'),
 (0.010506790381688466, 'MM_8'),
 (0.008865391939545254, 'DPD_8.33'),
 (0.006769035507602138, 'MM_9'),
 (0.0061183147887358125, 'MM_12'),
 (0.005996993384896737, 'years_num'),
 (0.004993598820932537, 'DPD_10.0'),
 (0.004274776980034296, 'DPD_9.09'),
 (0.004031801532830713, 'MM_6'),
 (0.0031764571744248298, 'MM_5'),
 (0.003039805432760481, 'DD_3'),
 (0.0026260776000900975, 'DD_8'),
 (0.0023821867442170774, 'MM_11'),
 (0.0022356403993423497, 'DPD_7.69'),
 (0.0022074732270469023, 'DPD_14.81'),
 (0.0021410032039271567, 'DD_9'),
 (0.0021395173553006982, 'MM_4'),
 (0.0020446915353501127, 'DD_18'),
 (0.0019390962035243195, 'MM_3'),
 (0.0018956893654795517, 'DPD_12.9'),
 (0.0018721580227110271, 'DD_5'),
 (0.0018575917795058269, 'DD_21'),
 (0.001800072638756214, 'DD_7'),
 (0.00174672437020018, 'DPD_13.79'),
 (0.001739186739853088,

In [53]:
# Easy Ensemble AdaBoost

# Train the EasyEnsembleClassifier
from imblearn.ensemble import EasyEnsembleClassifier
eec_model = EasyEnsembleClassifier(n_estimators=100, random_state=1)

# Fitting the model
eec_model = eec_model.fit(X_train, y_train)

# Making predictions using the testing data
predictions = eec_model.predict(X_test)

In [None]:
# Calculated the balanced accuracy score
y_pred = eec_model.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

In [None]:
# Display the confusion matrix
cm = confusion_matrix(y_test, predictions)

# Create a DataFrame from the confusion matrix
cm_df = pd.DataFrame(cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])

cm_df

In [None]:
# Print the imbalanced classification report
print(classification_report_imbalanced(y_test, y_pred))

In [54]:
## Naive Random Oversampling

# Resample the training data with the RandomOversampler
from imblearn.over_sampling import RandomOverSampler

ros = RandomOverSampler(random_state=1)
X_resampled, y_resampled = ros.fit_resample(X_train, y_train)

Counter(y_resampled)

Counter({'Wave_Size': 1})

In [55]:
# Train the Logistic Regression model using the resampled data
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(solver='lbfgs', random_state=1)
model.fit(X_resampled, y_resampled)

LogisticRegression(random_state=1)

In [56]:
# Calculated the balanced accuracy score
from sklearn.metrics import balanced_accuracy_score
y_pred = model.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.9690249330549046

In [57]:
# Display the confusion matrix
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred)

array([[ 705,   19],
       [ 176, 4753]], dtype=int64)

In [58]:
# Print the imbalanced classification report
from imblearn.metrics import classification_report_imbalanced
print(classification_report_imbalanced(y_test, y_pred))

                       pre       rec       spe        f1       geo       iba       sup

  surfing_ideal       0.80      0.97      0.96      0.88      0.97      0.94       724
surfing_unideal       1.00      0.96      0.97      0.98      0.97      0.94      4929

    avg / total       0.97      0.97      0.97      0.97      0.97      0.94      5653



In [59]:
## SMOTE OVERSAMPLING
# Resample the training data with SMOTE
from imblearn.over_sampling import SMOTE

X_resampled, y_resampled = SMOTE(random_state=1, sampling_strategy='auto').fit_resample(X_train, y_train)
Counter(y_resampled)

Counter({'Wave_Size': 1})

In [60]:
# Train the Logistic Regression model using the resampled data
model = LogisticRegression(solver='lbfgs', random_state=1)
model.fit(X_resampled, y_resampled)

LogisticRegression(random_state=1)

In [61]:
# Calculated the balanced accuracy score
y_pred = model.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.9309360039634635

In [62]:
# Display the confusion matrix
confusion_matrix(y_test, y_pred)

array([[ 645,   79],
       [ 143, 4786]], dtype=int64)

In [63]:
# Print the imbalanced classification report
print(classification_report_imbalanced(y_test, y_pred))

                       pre       rec       spe        f1       geo       iba       sup

  surfing_ideal       0.82      0.89      0.97      0.85      0.93      0.86       724
surfing_unideal       0.98      0.97      0.89      0.98      0.93      0.87      4929

    avg / total       0.96      0.96      0.90      0.96      0.93      0.87      5653



In [67]:
## UNDERSAMPLING

# Resample the data using the ClusterCentroids resampler
# Warning: This is a large dataset, and this step may take some time to complete
from imblearn.under_sampling import ClusterCentroids

cc = ClusterCentroids(random_state=1)
X_resampled, y_resampled = cc.fit_resample(X_train, y_train)
Counter(y_resampled)

Counter({'Wave_Size': 1})

In [68]:
# Train the Logistic Regression model using the resampled data
model = LogisticRegression(solver='lbfgs', random_state=42)
model.fit(X_resampled, y_resampled)

LogisticRegression(random_state=42)

In [69]:
# Display the confusion matrix
confusion_matrix(y_test, y_pred)

array([[ 645,   79],
       [ 143, 4786]], dtype=int64)

In [70]:

# Print the imbalanced classification report
print(classification_report_imbalanced(y_test, y_pred))

                       pre       rec       spe        f1       geo       iba       sup

  surfing_ideal       0.82      0.89      0.97      0.85      0.93      0.86       724
surfing_unideal       0.98      0.97      0.89      0.98      0.93      0.87      4929

    avg / total       0.96      0.96      0.90      0.96      0.93      0.87      5653



In [71]:
# Resample the training data with SMOTEENN
# Warning: This is a large dataset, and this step may take some time to complete
from imblearn.combine import SMOTEENN

smote_enn = SMOTEENN(random_state=42)
X_resampled, y_resampled = smote_enn.fit_resample(X, y)

Counter(y_resampled)

Counter({'Wave_Size': 1})

In [72]:
# Train the Logistic Regression model using the resampled data
model = LogisticRegression(solver='lbfgs')
model.fit(X_resampled, y_resampled)

LogisticRegression()

In [73]:
# Calculated the balanced accuracy score
y_pred = model.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.6160768828973635

In [74]:
# Display the confusion matrix
confusion_matrix(y_test, y_pred)

array([[ 535,  189],
       [2498, 2431]], dtype=int64)

In [75]:
# Print the imbalanced classification report
print(classification_report_imbalanced(y_test, y_pred))

                       pre       rec       spe        f1       geo       iba       sup

  surfing_ideal       0.18      0.74      0.49      0.28      0.60      0.37       724
surfing_unideal       0.93      0.49      0.74      0.64      0.60      0.36      4929

    avg / total       0.83      0.52      0.71      0.60      0.60      0.36      5653

