### Prepare imports

In [None]:
import pandas as pd
from datetime import datetime
import numpy as np
import warnings
import random
#import requests
import zipfile
import os
import glob 
#import re
import matplotlib.pyplot as plt
import fastparquet
import openpyxl
import logging

from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor

from sklearn.model_selection import train_test_split, cross_val_score, KFold, ParameterSampler
from sklearn.metrics import mean_squared_error, r2_score

#from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler

from sklearn.svm import SVR
from sklearn.pipeline import Pipeline

### Read parquet as dataframe

In [None]:
df_MLready = pd.read_parquet('df_MLready.parquet')

In [None]:
df_MLready = pd.read_parquet('df_MLready_1_1000.parquet')
df_MLready = df_MLready.sample(frac=0.01, random_state=14)

# print shape
print(df_MLready.shape)

df_MLready.to_parquet("df_MLready_1_100000.parquet", index=False)

(344, 33)


## ML: LinearRegression, DecisionTreeRegressor, XGBRegressor, KNNRegressor, SVRegressor

### Define features and target

In [3]:
# Features and target
features = ['Month', 'DayofWeek', 'Hour', 'Minute']
X = df_MLready[features]
y = df_MLready['Delta_seconds']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=14)

# Define 5-fold cross-validation
cv = KFold(n_splits=5, shuffle=True, random_state=14)

### Create empty df to fill with data for visualizations

In [4]:
# Create df_pred for visualizations

df_pred = pd.DataFrame(index=range(len(y_test)))
df_pred['y_test'] = y_test.reset_index(drop=True)

In [7]:
print(df_pred.shape)
print(df_pred.columns)

(69, 3)
Index(['y_test', 'y_pred_lin', 'errors_lin'], dtype='object')


### Linear Regression

In [None]:
model = LinearRegression()

# Fit model
model.fit(X_train, y_train)

y_pred_lin = model.predict(X_test)
df_pred['y_pred_lin'] = pd.Series(y_pred_lin, index=df_pred.index)

errors_lin = np.abs(y_test.reset_index(drop=True) - y_pred_lin)
df_pred['errors_lin'] = errors_lin


r2_scores = cross_val_score(model, X_train, y_train, cv=cv, scoring='r2')
print("Cross-validated R² scores:", r2_scores)
print(f"Average Cross-validated R²: {r2_scores.mean():.4f}")

rmse = np.sqrt(mean_squared_error(y_test, y_pred_lin))
print(f"Test RMSE (seconds): {rmse:.2f}")

coefficients = pd.Series(model.coef_, index=features)
print("Intercept:", model.intercept_)
print("Coefficients:")
print(coefficients)

thresholds_lin = {
    "Within 1 minute": errors_lin <= 60,
    "Within 3 minutes": (errors_lin > 60) & (errors_lin <= 180),
    "Within 5 minutes": (errors_lin > 180) & (errors_lin <= 300),
    "More than 5 minutes": errors_lin > 300
}

summary_lin = {"Category": [], "Count": [], "Percentage": []}
total = len(errors_lin)

for category, condition in thresholds_lin.items():
    count = condition.sum()
    percentage = round(100 * count / total, 3)
    summary_lin["Category"].append(category)
    summary_lin["Count"].append(count)
    summary_lin["Percentage"].append(percentage)

summary_lin_df = pd.DataFrame(summary_lin)
print("\nPrediction Accuracy Summary:")
print(summary_lin_df)


'''
Cross-validated R² scores: [0.00018276 0.00023669 0.0002489 0.00024995 0.0002358 ]
Average Cross-validated R²: 0.0002
Test RMSE (seconds): 245.98

Intercept: 198.63417195001517
Coefficients:
Month       -0.315826
DayofWeek    0.678032
Hour         0.616489
Minute      -0.039650
dtype: float64

NOT cumulative percentages:
              Category        Count  Percentage
0      Within 1 minute    2,474,125       35.938
1     Within 3 minutes    3,651,561       53.041
2     Within 5 minutes    312,989         4.546
3  More than 5 minutes    445,733         6.475


Prediction Accuracy Summary:
              Category    Count  Percentage
0      Within 1 minute  2474125      35.938
1     Within 3 minutes  6125686      88.979
2     Within 5 minutes  6438675      93.525
3  More than 5 minutes   445733      100.000
'''

Cross-validated R² scores: [-0.01339896 -0.12206528 -0.02744299 -0.08878668  0.04079326]
Average Cross-validated R²: -0.0422
Test RMSE (seconds): 205.70
Intercept: 190.19566366946182
Coefficients:
Month       -13.446466
DayofWeek     0.286927
Hour          5.417770
Minute       -0.835314
dtype: float64

Prediction Accuracy Summary:
              Category  Count  Percentage
0      Within 1 minute     19      27.536
1     Within 3 minutes     36      52.174
2     Within 5 minutes      8      11.594
3  More than 5 minutes      6       8.696


'\nCross-validated R² scores: [0.00018276 0.00023669 0.0002489  0.00024995 0.0002358 ]\nAverage Cross-validated R²: 0.0002\nTest RMSE (seconds): 245.98\nIntercept: 198.63417195001517\nCoefficients:\nMonth       -0.315826\nDayofWeek    0.678032\nHour         0.616489\nMinute      -0.039650\ndtype: float64\n\nPrediction Accuracy Summary:\n              Category    Count  Percentage\n0      Within 1 minute  2474125      35.938\n1     Within 3 minutes  6125686      88.979\n2     Within 5 minutes  6438675      93.525\n3  More than 5 minutes   445733       6.475\n'

In [None]:
print(df_pred.shape)
print(df_pred.columns)
print(df_pred.tail())

(69, 3)
Index(['y_test', 'y_pred_lin', 'errors_lin'], dtype='object')
    y_test  y_pred_lin  errors_lin
64   244.0  207.832034   36.167966
65   272.0  171.171719  100.828281
66    38.0  218.532045  180.532045
67    96.0  224.654119  128.654119
68   133.0  150.503922   17.503922


### Decision Tree Regression

In [None]:
# Hyperparameter space
param_distributions = {
    'max_depth': [3, 5, 10, 15, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 5, 10],
    'max_features': [None, 'sqrt', 'log2']
}

# Randomized search
grid_search = RandomizedSearchCV(
    estimator=DecisionTreeRegressor(random_state=14),
    param_distributions=param_distributions,
    scoring='r2',
    cv=5,
    n_iter=20,
    random_state=14,
    n_jobs=-1,
    verbose=0
)

# Fit model
grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_
y_pred_tree = best_model.predict(X_test)
df_pred['y_pred_tree'] = pd.Series(y_pred_tree, index=df_pred.index)

errors_tree = np.abs(y_test.reset_index(drop=True) - y_pred_tree)
df_pred['errors_tree'] = errors_tree


# Evaluate
r2 = r2_score(y_test, y_pred_tree)
rmse = np.sqrt(mean_squared_error(y_test, y_pred_tree))
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Test R²: {r2:.4f}")
print(f"Test RMSE: {rmse:.2f} seconds")


thresholds_tree = {
    "Within 1 minute": errors_tree <= 60,
    "Within 3 minutes": (errors_tree > 60) & (errors_tree <= 180),
    "Within 5 minutes": (errors_tree > 180) & (errors_tree <= 300),
    "More than 5 minutes": errors_tree > 300
}

summary_tree = {"Category": [], "Count": [], "Percentage": []}
total = len(errors_tree)

for category, condition in thresholds_tree.items():
    count = condition.sum()
    percentage = round(100 * count / total, 3)
    summary_tree["Category"].append(category)
    summary_tree["Count"].append(count)
    summary_tree["Percentage"].append(percentage)

summary_tree_df = pd.DataFrame(summary_tree)
print("\nPrediction Accuracy Summary:")
print(summary_tree_df)

'''
Best Parameters: {'min_samples_split': 5, 'min_samples_leaf': 10, 'max_features': None, 'max_depth': 15}
Test R²: 0.0063
Test RMSE: 245.24 seconds

Prediction Accuracy Summary:
              Category    Count  Percentage
0      Within 1 minute  2500086      36.315
1     Within 3 minutes  3571670      51.881
2     Within 5 minutes   369862       5.372
3  More than 5 minutes   442790       6.432

Cumulative %:
              Category    Count  Percentage  Cumulative Percentage
0      Within 1 minute  2500086      36.315                 36.315
1     Within 3 minutes  3571670      51.881                 88.196
2     Within 5 minutes   369862       5.372                 93.568
3  More than 5 minutes   442790       6.432                100.000
'''

Best Parameters: {'min_samples_split': 5, 'min_samples_leaf': 10, 'max_features': 'sqrt', 'max_depth': 5}
Test R²: -0.1494
Test RMSE: 219.86 seconds

Prediction Accuracy Summary:
              Category  Count  Percentage
0      Within 1 minute     18      26.087
1     Within 3 minutes     36      52.174
2     Within 5 minutes      6       8.696
3  More than 5 minutes      9      13.043


"\nBest Parameters: {'min_samples_split': 5, 'min_samples_leaf': 10, 'max_features': None, 'max_depth': 15}\nTest R²: 0.0063\nTest RMSE: 245.24 seconds\n\nPrediction Accuracy Summary:\n              Category    Count  Percentage\n0      Within 1 minute  2500086      36.315\n1     Within 3 minutes  3571670      51.881\n2     Within 5 minutes   369862       5.372\n3  More than 5 minutes   442790       6.432\n"

In [None]:
print(df_pred.shape)
print(df_pred.columns)
print(df_pred.tail())

(69, 5)
Index(['y_test', 'y_pred_lin', 'errors_lin', 'y_pred_tree', 'errors_tree'], dtype='object')
    y_test  y_pred_lin  errors_lin  y_pred_tree  errors_tree
64   244.0  207.832034   36.167966   150.000000    94.000000
65   272.0  171.171719  100.828281   128.700000   143.300000
66    38.0  218.532045  180.532045   167.450000   129.450000
67    96.0  224.654119  128.654119   101.466667     5.466667
68   133.0  150.503922   17.503922   258.052632   125.052632


### XGB Regression

In [None]:
# Hyperparameter space
param_distributions = {
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 5, 7, 10],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0]
}

# Randomized search
grid_search = RandomizedSearchCV(
    estimator=XGBRegressor(random_state=14, verbosity=0),
    param_distributions=param_distributions,
    scoring='r2',
    cv=cv,
    n_iter=20,
    random_state=14,
    n_jobs=-1,
    verbose=0
)

# Fit model
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_


y_pred_xgb = best_model.predict(X_test)
#y_test = y_test.reset_index(drop=True)
df_pred['y_pred_xgb'] = pd.Series(y_pred_xgb, index=df_pred.index)

errors_xgb = np.abs(y_test.reset_index(drop=True) - y_pred_xgb)
df_pred['errors_xgb'] = errors_xgb


# Evaluate
r2 = r2_score(y_test, y_pred_xgb)
rmse = np.sqrt(mean_squared_error(y_test, y_pred_xgb))
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Test R²: {r2:.4f}")
print(f"Test RMSE: {rmse:.2f} seconds")



thresholds_xgb = {
    "Within 1 minute": errors_xgb <= 60,
    "Within 3 minutes": (errors_xgb > 60) & (errors_xgb <= 180),
    "Within 5 minutes": (errors_xgb > 180) & (errors_xgb <= 300),
    "More than 5 minutes": errors_xgb > 300
}

summary_xgb = {"Category": [], "Count": [], "Percentage": []}

total = len(errors_xgb)
for category, condition in thresholds_xgb.items():
    count = condition.sum()
    percentage = round(100 * count / total, 3)
    summary_xgb["Category"].append(category)
    summary_xgb["Count"].append(count)
    summary_xgb["Percentage"].append(percentage)

summary_xgb_df = pd.DataFrame(summary_xgb)
print("\nPrediction Accuracy Summary:")
print(summary_xgb_df)

'''
Best Parameters: {'subsample': 1.0, 'n_estimators': 100, 'max_depth': 7, 'learning_rate': 0.2, 'colsample_bytree': 1.0}
Test R²: 0.0067
Test RMSE: 245.19 seconds

Prediction Accuracy Summary:
              Category    Count  Percentage
0      Within 1 minute  2500358      36.319
1     Within 3 minutes  3575028      51.929
2     Within 5 minutes   366263       5.320
3  More than 5 minutes   442759       6.431


Cumulative percentages:
              Category    Count  Percentage  Cumulative Percentage
0      Within 1 minute  2500358      36.319                 36.319
1     Within 3 minutes  3575028      51.929                 88.248
2     Within 5 minutes   366263       5.320                 93.568
3  More than 5 minutes   442759       6.431                100.000
'''

Best Parameters: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.01, 'colsample_bytree': 0.6}
Test R²: -0.0696
Test RMSE: 212.08 seconds

Prediction Accuracy Summary:
              Category  Count  Percentage
0      Within 1 minute     19      27.536
1     Within 3 minutes     37      53.623
2     Within 5 minutes      7      10.145
3  More than 5 minutes      6       8.696


"\nBest Parameters: {'subsample': 1.0, 'n_estimators': 100, 'max_depth': 7, 'learning_rate': 0.2, 'colsample_bytree': 1.0}\nTest R²: 0.0067\nTest RMSE: 245.19 seconds\n\nPrediction Accuracy Summary:\n              Category    Count  Percentage\n0      Within 1 minute  2500358      36.319\n1     Within 3 minutes  3575028      51.929\n2     Within 5 minutes   366263       5.320\n3  More than 5 minutes   442759       6.431\n"

In [None]:
print(df_pred.shape)
print(df_pred.columns)
print(df_pred.tail())

df_pred.to_parquet("df_pred_3models.parquet", index=False)

(69, 7)
Index(['y_test', 'y_pred_lin', 'errors_lin', 'y_pred_tree', 'errors_tree',
       'y_pred_xgb', 'errors_xgb'],
      dtype='object')
    y_test  y_pred_lin  errors_lin  y_pred_tree  errors_tree  y_pred_xgb  \
64   244.0  207.832034   36.167966   150.000000    94.000000  190.137695   
65   272.0  171.171719  100.828281   128.700000   143.300000  181.375107   
66    38.0  218.532045  180.532045   167.450000   129.450000  208.519058   
67    96.0  224.654119  128.654119   101.466667     5.466667  231.732605   
68   133.0  150.503922   17.503922   258.052632   125.052632  177.077744   

    errors_xgb  
64   53.862305  
65   90.624893  
66  170.519058  
67  135.732605  
68   44.077744  


### Support Vector Regression

In [18]:
# Pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svr', SVR())
])

# Hyperparameter space
param_distributions = {
    'svr__C': [0.1, 1, 10, 100],
    'svr__epsilon': [0.1, 0.5, 1, 5],
    'svr__kernel': ['linear', 'rbf', 'poly'],
    'svr__gamma': ['scale', 'auto']
}

# Randomized search
grid_search = RandomizedSearchCV(
    estimator=pipeline,
    param_distributions=param_distributions,
    scoring='r2',
    cv=cv,
    n_iter=20,
    random_state=14,
    n_jobs=-1,
    verbose=0
)

# Fit model
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_


y_pred_svr = best_model.predict(X_test)
# y_test = y_test.reset_index(drop=True)
y_pred_svr = pd.Series(y_pred_svr, index=df_pred.index)
df_pred['y_pred_svr'] = pd.Series(y_pred_svr)

errors_svr = np.abs(y_test.reset_index(drop=True) - y_pred_svr)
df_pred['errors_svr'] = errors_svr



# Evaluate
r2 = r2_score(y_test, y_pred_svr)
rmse = np.sqrt(mean_squared_error(y_test, y_pred_svr))
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Test R²: {r2:.4f}")
print(f"Test RMSE: {rmse:.2f} seconds")


thresholds_svr = {
    "Within 1 minute": errors_svr <= 60,
    "Within 3 minutes": (errors_svr > 60) & (errors_svr <= 180),
    "Within 5 minutes": (errors_svr > 180) & (errors_svr <= 300),
    "More than 5 minutes": errors_svr > 300
}

summary_svr = {"Category": [], "Count": [], "Percentage": []}
total = len(errors_svr)

for category, condition in thresholds_svr.items():
    count = condition.sum()
    percentage = round(100 * count / total, 3)
    summary_svr["Category"].append(category)
    summary_svr["Count"].append(count)
    summary_svr["Percentage"].append(percentage)

summary_svr_df = pd.DataFrame(summary_svr)
print("\nPrediction Accuracy Summary:")
print(summary_svr_df)

'''
Best Parameters: {'svr__kernel': 'poly', 'svr__gamma': 'auto', 'svr__epsilon': 5, 'svr__C': 100}
Test R²: -0.0520
Test RMSE: 250.33 seconds

Prediction Accuracy Summary:
              Category  Count  Percentage
0      Within 1 minute   3664      53.217
1     Within 3 minutes   2395      34.786
2     Within 5 minutes    312       4.532
3  More than 5 minutes    514       7.466
'''

Best Parameters: {'svr__kernel': 'poly', 'svr__gamma': 'scale', 'svr__epsilon': 1, 'svr__C': 100}
Test R²: -0.1078
Test RMSE: 215.84 seconds

Prediction Accuracy Summary:
              Category  Count  Percentage
0      Within 1 minute     31      44.928
1     Within 3 minutes     29      42.029
2     Within 5 minutes      2       2.899
3  More than 5 minutes      7      10.145


"\nBest Parameters: {'svr__kernel': 'poly', 'svr__gamma': 'auto', 'svr__epsilon': 5, 'svr__C': 100}\nTest R²: -0.0520\nTest RMSE: 250.33 seconds\n\nPrediction Accuracy Summary:\n              Category  Count  Percentage\n0      Within 1 minute   3664      53.217\n1     Within 3 minutes   2395      34.786\n2     Within 5 minutes    312       4.532\n3  More than 5 minutes    514       7.466\n"

In [19]:
print(df_pred.shape)
print(df_pred.columns)
print(df_pred.tail())

df_MLready.to_parquet("df_pred_4models.parquet", index=False)

(69, 11)
Index(['y_test', 'y_pred_lin', 'errors_lin', 'y_pred_tree', 'errors_tree',
       'y_pred_xgb', 'errors_xgb', 'y_pred_knn', 'errors_knn', 'y_pred_svr',
       'errors_svr'],
      dtype='object')
    y_test  y_pred_lin  errors_lin  y_pred_tree  errors_tree  y_pred_xgb  \
64   244.0  207.832034   36.167966   150.000000    94.000000  190.137695   
65   272.0  171.171719  100.828281   128.700000   143.300000  181.375107   
66    38.0  218.532045  180.532045   167.450000   129.450000  208.519058   
67    96.0  224.654119  128.654119   101.466667     5.466667  231.732605   
68   133.0  150.503922   17.503922   258.052632   125.052632  177.077744   

    errors_xgb  y_pred_knn  errors_knn  y_pred_svr  errors_svr  
64   53.862305  168.842105   75.157895  165.730876   78.269124  
65   90.624893  185.526316   86.473684  135.187399  136.812601  
66  170.519058  216.000000  178.000000  142.628096  104.628096  
67  135.732605  180.368421   84.368421  115.564419   19.564419  
68   44.07774

### K-Nearest Neighbors

In [20]:
# Pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsRegressor())
])

# Hyperparameter space 
param_distributions = {
    'knn__n_neighbors': list(range(3, 21)),
    'knn__weights': ['uniform', 'distance'],
    'knn__p': [1, 2]  # 1: Manhattan, 2: Euclidean
}

# Randomized search
grid_search = RandomizedSearchCV(
    estimator=pipeline,
    param_distributions=param_distributions,
    scoring='r2',
    cv=cv,
    n_iter=20,
    random_state=14,
    n_jobs=-1,
    verbose=0
)

# Fit model
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_


y_pred_knn = best_model.predict(X_test)
# y_test = y_test.reset_index(drop=True)
df_pred['y_pred_knn'] = pd.Series(y_pred_knn, index=df_pred.index)

errors_knn = np.abs(y_test.reset_index(drop=True) - y_pred_knn)
df_pred['errors_knn'] = errors_knn


# Evaluate
r2 = r2_score(y_test, y_pred_knn)
rmse = np.sqrt(mean_squared_error(y_test, y_pred_knn))
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Test R²: {r2:.4f}")
print(f"Test RMSE: {rmse:.2f} seconds")


thresholds_knn = {
    "Within 1 minute": errors_knn <= 60,
    "Within 3 minutes": (errors_knn > 60) & (errors_knn <= 180),
    "Within 5 minutes": (errors_knn > 180) & (errors_knn <= 300),
    "More than 5 minutes": errors_knn > 300
}

summary_knn = {"Category": [], "Count": [], "Percentage": []}
total = len(errors_knn)

for category, condition in thresholds_knn.items():
    count = condition.sum()
    percentage = round(100 * count / total, 3)
    summary_knn["Category"].append(category)
    summary_knn["Count"].append(count)
    summary_knn["Percentage"].append(percentage)

summary_knn_df = pd.DataFrame(summary_knn)
print("\nPrediction Accuracy Summary:")
print(summary_knn_df)

'''
Best Parameters: {'weights': 'distance', 'p': 2, 'n_neighbors': 20}
Test R²: -0.1814
Test RMSE: 267.39 seconds

Prediction Accuracy Summary:
              Category    Count  Percentage
0      Within 1 minute  2568632      37.311
1     Within 3 minutes  3167834      46.015
2     Within 5 minutes   644848       9.367
3  More than 5 minutes   503094       7.308
'''

Best Parameters: {'knn__weights': 'uniform', 'knn__p': 1, 'knn__n_neighbors': 19}
Test R²: -0.1388
Test RMSE: 218.84 seconds

Prediction Accuracy Summary:
              Category  Count  Percentage
0      Within 1 minute     19      27.536
1     Within 3 minutes     36      52.174
2     Within 5 minutes      6       8.696
3  More than 5 minutes      8      11.594


"\nBest Parameters: {'weights': 'distance', 'p': 2, 'n_neighbors': 20}\nTest R²: -0.1814\nTest RMSE: 267.39 seconds\n\nPrediction Accuracy Summary:\n              Category    Count  Percentage\n0      Within 1 minute  2568632      37.311\n1     Within 3 minutes  3167834      46.015\n2     Within 5 minutes   644848       9.367\n3  More than 5 minutes   503094       7.308\n"

In [22]:
print(df_pred.shape)
print(df_pred.columns)
print(df_pred.tail())

df_MLready.to_parquet("df_pred_5models.parquet", index=False)

(69, 11)
Index(['y_test', 'y_pred_lin', 'errors_lin', 'y_pred_tree', 'errors_tree',
       'y_pred_xgb', 'errors_xgb', 'y_pred_knn', 'errors_knn', 'y_pred_svr',
       'errors_svr'],
      dtype='object')
    y_test  y_pred_lin  errors_lin  y_pred_tree  errors_tree  y_pred_xgb  \
64   244.0  207.832034   36.167966   150.000000    94.000000  190.137695   
65   272.0  171.171719  100.828281   128.700000   143.300000  181.375107   
66    38.0  218.532045  180.532045   167.450000   129.450000  208.519058   
67    96.0  224.654119  128.654119   101.466667     5.466667  231.732605   
68   133.0  150.503922   17.503922   258.052632   125.052632  177.077744   

    errors_xgb  y_pred_knn  errors_knn  y_pred_svr  errors_svr  
64   53.862305  168.842105   75.157895  165.730876   78.269124  
65   90.624893  185.526316   86.473684  135.187399  136.812601  
66  170.519058  216.000000  178.000000  142.628096  104.628096  
67  135.732605  180.368421   84.368421  115.564419   19.564419  
68   44.07774