In [0]:
import pyspark
from pyspark.sql.types import StringType, BooleanType, IntegerType
import pyspark.sql.functions as F

import airporttime
from datetime import datetime, timedelta

import numpy as np
import pandas as pd

In [0]:
from pyspark.sql import SQLContext
from pyspark.mllib.stat import Statistics
from pyspark.sql.functions import udf
from pyspark.ml.feature import OneHotEncoder, StringIndexer, VectorAssembler,StandardScaler
from pyspark.ml.feature import Bucketizer
from pyspark.ml import Pipeline
from sklearn.metrics import confusion_matrix

In [0]:
from pyspark.ml.classification import LogisticRegression
from pyspark.ml.classification import RandomForestClassifier
from sparkdl.xgboost import XgboostRegressor, XgboostClassifier

#### Create the Azure BLOB storage to store data for quick access when datasets are huge

In [0]:
blob_container = "w261-scrr" # The name of your container created in https://portal.azure.com
storage_account = "midsw261rv" # The name of your Storage account created in https://portal.azure.com
secret_scope = "w261scrr" # The name of the scope created in your local computer using the Databricks CLI
secret_key = "w261scrrkey" # The name of the secret key created in your local computer using the Databricks CLI 
blob_url = f"wasbs://{blob_container}@{storage_account}.blob.core.windows.net"
mount_path = "/mnt/mids-w261"

In [0]:
spark.conf.set(
  f"fs.azure.sas.{blob_container}.{storage_account}.blob.core.windows.net",
  dbutils.secrets.get(scope = secret_scope, key = secret_key)
)

In [0]:
%run "../libs/weather_aggregation"

In [0]:
%run "../libs/time_based_features"

In [0]:
%run "../libs/transform"

In [0]:
%run "../libs/model_helper_functions"

#### Import joined data

In [0]:
df_train = spark.read.parquet(f"{blob_url}/join_full_0329")
df_test = spark.read.parquet(f"{blob_url}/test_full_join_0404")

In [0]:
trainCount=df_train_main.count()
testCount=df_test_main.count()
trainSize= trainCount/(trainCount+testCount)
print(f"Train count: {trainCount:,}")
print(f"Test count: {testCount:,}")
print(f"Train size: {trainSize*100:,.2f}%")

### Cross Validation

In [0]:
# # Transform the data and save it - run this once

# trainsplits, valsplits = Split4year5Fold(df_train_main)

# for i, val_train in enumerate(trainsplits):
  
# #   df_train_split = aggregate_weather_reports(val_train)
# #   df_val_split = aggregate_weather_reports(valsplits[i])
  
# #   df_train_split = get_transformed_df(df_train_split)
# #   df_val_split = get_transformed_df(df_val_split)
  
# #   df_train_split = add_previous_flight_delay_indicator(df_train_split)
# #   df_val_split = add_previous_flight_delay_indicator(df_val_split)
  
#   val_train.write.parquet(f"{blob_url}/cv_train_agg_0404_split"+str(i))
#   valsplits[i].write.parquet(f"{blob_url}/cv_val_agg_0404_split"+str(i))
  
  
  

In [0]:
# This would be part of main flow

# Initialize splits
df_train_split = []
df_val_split = []

for i in range(5):
  
  cv_train_str = "cv_train_0402_split" + str(i)
  cv_val_str = "cv_val_0402_split" + str(i)
  
  df_train_split.append(spark.read.parquet(f"{blob_url}/{cv_train_str}"))
  df_val_split.append(spark.read.parquet(f"{blob_url}/{cv_val_str}"))



In [0]:
def preprocess(df):

  #### read holidays and add holidays ####
#   df1 = spark.read.parquet(f"{blob_url}/holidays")
#   df = add_holidays(df,df1)

  ### Perform target mean encoding and drop original features
  cols_to_mean_encode=['ORIGIN', 'DEST','DEP_TIME_BLK','TAIL_NUM','DAY_OF_MONTH','DEST_CITY_MARKET_ID']
  df = target_mean_encoding(df, col=cols_to_mean_encode, target='DEP_DEL15')
  mean_encoded_cols = [f"{col}_mean_encoding" for col in cols_to_mean_encode]
  df=df.drop(*cols_to_mean_encode)
  
  # Handle nulls
  fillWithZero=['VIS_Horizontal_median','CIG_CeilingHeightDim_median','AL_SnowAccumDuration',
                'AJ1_SnowDepth','WND_Speed_mean','SLP_Value_mean','weather_condition']
  fillWithOther=['TMP_Value_mean']
  df = fill_nulls_with_zero_custom(df,fillWithZero)
  df = df.fillna(-99,fillWithOther)
  
  # Make sure weather condition is a string
  df = cast_features_to_strings(df, ['weather_condition'])
  
  # Frequency encode descending categorical variables
  indexers=[]
  categoricalColumns = [t[0] for t in df.dtypes if t[1] =='string']
  for categoricalCol in categoricalColumns:
      indexers.append(StringIndexer(inputCol = categoricalCol, outputCol = categoricalCol + '_Index', 
                                    handleInvalid="keep",stringOrderType="frequencyDesc"))
  pipeline=Pipeline(stages=indexers)
  df = pipeline.fit(df).transform(df)
  df = df.drop(*categoricalColumns)
  
  
  ### Add interactions between most relevant terms
#   top=['SLP_Value_mean', 'DEW_Value_median', 'DISTANCE', 'DEP_TIME_BLK_mean_encoding',
#  'TMP_Value_mean', 'ORIGIN_mean_encoding', 'TAIL_NUM_mean_encoding', 'OP_UNIQUE_CARRIER_Index',
#  'DAY_OF_MONTH_mean_encoding', 'MONTH_Index']
  
#   df, new_cols =add_interactions(df, top)
  
  ### Add balancing ratio feature
#   df = add_balancing_ratio(df)


  return df

In [0]:
# ### Select the columns we'll be using for training. This is so that we can choose columns for model and record scores.
# flights + weather + time based attribute
# selected_cols = ['DEP_DEL15', 'OP_UNIQUE_CARRIER', 'DAY_OF_WEEK', 'DISTANCE', 'DISTANCE_GROUP', 'MONTH', 'ORIGIN', 'DEST', \
#                   'CIG_CeilingHeightDim_median', 'VIS_Horizontal_median', 'AA_RainDepth','AA_RainDuration', 'AL_SnowAccumDuration_mean', \
#                   'AL_SnowAccumDepth', 'AJ1_SnowDepth_mean', 'AJ1_SnowEqWaterDepth','WND_Speed_mean', 'TMP_Value_mean', 'SLP_Value_mean', \
#                   'PREV_DEP_DEL15','weather_condition']

selected_cols = ['DEP_DEL15', 'OP_UNIQUE_CARRIER', 'DAY_OF_WEEK', 'DISTANCE', 'DISTANCE_GROUP', 'MONTH', 'ORIGIN', 'DEST', \
                  'CIG_CeilingHeightDim_median', 'WND_DirectionAngle_median','VIS_Horizontal_median', 'AA_RainDepth','AA_RainDuration', 'AL_SnowAccumDuration', \
                  'AL_SnowAccumDepth', 'AJ1_SnowDepth', 'AJ1_SnowEqWaterDepth','WND_Speed_mean', 'TMP_Value_mean', 'SLP_Value_mean', \
                  'PREV_DEP_DEL15','weather_condition','DEW_Value_median','DEST_CITY_MARKET_ID','VIS_Variability','DAY_OF_MONTH','WND_Type','TAIL_NUM','DEP_TIME_BLK']
df_temp = df_train_split[0].select(*selected_cols)

# Run preprocess test
df_temp = preprocess(df_temp)

# Get numerical, categorical values and label ready for pipeline
labelCol = ['DEP_DEL15']
categoricalColumns = [t[0] for t in df_temp.dtypes if t[1] =='string']
numericCols = [t[0] for t in df_temp.dtypes if t[1] !='string']
numericCols.remove(*labelCol)

In [0]:
# This section can be removed at the end
#TEST
# Create vector of features
featureCols = df_temp.columns
featureCols.remove('DEP_DEL15')
pipeline = getXGBPipeline(featureCols)

print("Getting pipeline ready")

pipelineModel = pipeline.fit(df_temp)  

df_temp = pipelineModel.transform(df_temp)
display(df_temp)

DEP_DEL15,DAY_OF_WEEK,DISTANCE,CIG_CeilingHeightDim_median,WND_DirectionAngle_median,VIS_Horizontal_median,AA_RainDepth,AA_RainDuration,AL_SnowAccumDuration,AL_SnowAccumDepth,AJ1_SnowDepth,AJ1_SnowEqWaterDepth,WND_Speed_mean,TMP_Value_mean,SLP_Value_mean,PREV_DEP_DEL15,DEW_Value_median,ORIGIN_mean_encoding,DEST_mean_encoding,DEP_TIME_BLK_mean_encoding,TAIL_NUM_mean_encoding,DAY_OF_MONTH_mean_encoding,DEST_CITY_MARKET_ID_mean_encoding,OP_UNIQUE_CARRIER_Index,DISTANCE_GROUP_Index,MONTH_Index,weather_condition_Index,VIS_Variability_Index,WND_Type_Index,weather_condition_vec
0.0,4,992.0,1263.92,226.93,10298.53,2.39,3.19,0.0,0,0.0,0.0,9.859999656677246,217.0,10205.1904296875,0.0,210.73,0.2089608241092899,0.2022562904926924,0.2208415212114206,0.1582213029989658,0.206857385722451,0.2022562904926924,8.0,2.0,7.0,0.0,0.0,0.0,"Map(vectorType -> sparse, length -> 40, indices -> List(0), values -> List(1.0))"
1.0,4,920.0,22000.0,238.22,16078.99,0.0,1.0,0.0,0,0.0,0.0,77.12999725341797,3.5799999237060547,10230.6796875,0.0,-126.21,0.1932316258770857,0.2041274635071824,0.2597576691461759,0.1582213029989658,0.206857385722451,0.2041274635071824,8.0,2.0,7.0,39.0,0.0,0.0,"Map(vectorType -> sparse, length -> 40, indices -> List(39), values -> List(1.0))"
0.0,5,507.0,5439.91,38.71,16083.32,0.0,1.0,0.0,0,0.0,0.0,25.11000061035156,187.94000244140625,10231.7998046875,0.0,167.59,0.1943438256658595,0.1364126327266419,0.0671195003580237,0.1582213029989658,0.2155547447304162,0.1364126327266419,8.0,1.0,7.0,39.0,0.0,0.0,"Map(vectorType -> sparse, length -> 40, indices -> List(39), values -> List(1.0))"
0.0,6,541.0,1324.45,162.0,13512.06,12.53,4.32,0.0,0,0.0,0.0,11.779999732971191,83.98999786376953,10275.919921875,0.0,70.61,0.1771598071931776,0.2292914007500493,0.090916868867663,0.1582213029989658,0.2172226423549841,0.2390534999511862,8.0,1.0,7.0,1.0,0.0,1.0,"Map(vectorType -> sparse, length -> 40, indices -> List(1), values -> List(1.0))"
0.0,6,541.0,14816.61,59.37,16089.95,0.0,6.44,0.0,0,0.0,0.0,25.479999542236328,12.25,10323.2099609375,0.0,-67.39,0.211280756434029,0.1364126327266419,0.1529513156423876,0.1582213029989658,0.2172226423549841,0.1364126327266419,8.0,1.0,7.0,39.0,0.0,0.0,"Map(vectorType -> sparse, length -> 40, indices -> List(39), values -> List(1.0))"
1.0,6,1475.0,362.0,63.1,13925.49,20.22,1.53,0.0,0,0.0,0.0,10.9399995803833,89.52999877929688,10272.009765625,0.0,69.71,0.1771598071931776,0.2244701836022457,0.1962679955703211,0.1582213029989658,0.2172226423549841,0.2244701836022457,8.0,6.0,7.0,39.0,0.0,0.0,"Map(vectorType -> sparse, length -> 40, indices -> List(39), values -> List(1.0))"
1.0,6,1475.0,212.53,44.5,8580.53,0.0,0.0,0.0,0,0.0,0.0,36.2549991607666,193.70499801635745,10168.4150390625,1.0,84.14,0.1991600465516369,0.1364126327266419,0.2829381351399536,0.1582213029989658,0.2172226423549841,0.1364126327266419,8.0,6.0,7.0,39.0,2.0,3.0,"Map(vectorType -> sparse, length -> 40, indices -> List(39), values -> List(1.0))"
1.0,6,930.0,212.53,44.5,8580.53,2.61,1.93,0.0,0,0.0,0.0,24.0,107.0999984741211,10247.169921875,1.0,84.14,0.1771598071931776,0.1543094247565943,0.2585247604755858,0.1582213029989658,0.2172226423549841,0.1543094247565943,8.0,2.0,7.0,0.0,0.0,0.0,"Map(vectorType -> sparse, length -> 40, indices -> List(0), values -> List(1.0))"
0.0,7,930.0,765.76,341.23,4780.21,0.0,1.05,0.0,0,8.0,778.22,91.72000122070312,-65.44999694824219,10157.3798828125,1.0,-98.17,0.151653351328994,0.1364126327266419,0.077717967859809,0.1582213029989658,0.2074070374703918,0.1364126327266419,8.0,2.0,7.0,2.0,0.0,0.0,"Map(vectorType -> sparse, length -> 40, indices -> List(2), values -> List(1.0))"
0.0,7,361.0,61.0,350.0,1205.38,20.27,5.57,0.0,0,0.0,0.0,8.239999771118164,96.01000213623048,10204.01953125,0.0,85.01,0.1771598071931776,0.2029393824973133,0.1341159292205422,0.1582213029989658,0.2074070374703918,0.1988880998534953,8.0,0.0,7.0,0.0,0.0,1.0,"Map(vectorType -> sparse, length -> 40, indices -> List(0), values -> List(1.0))"


In [0]:
# Important features based on experimentation
# These features were tested in the reduced model
highGainFeatures = ['SLP_Value_mean', 'DEW_Value_median',  'DISTANCE',  'DEP_TIME_BLK_mean_encoding',  
                    'TMP_Value_mean',  'ORIGIN_mean_encoding',  'TAIL_NUM_mean_encoding',  
                    'OP_UNIQUE_CARRIER_Index',   'DAY_OF_MONTH_mean_encoding',  'MONTH_Index']

print(highGainFeatures)

# Add target
highGainFeatures=highGainFeatures+['DEP_DEL15']

In [0]:
### TRAINING AND TESTING LOOP

### During experimentation, replicas with random search and three splits were used
### Curent loop works with best parameters found
repeat=1

metricsArray = np.empty((0,3), int)

best_model, best_params, bestScore=None,None,0.0
for replica in range(repeat):
  print("############################")
  print(f"Replica: {replica}")
  
#   ### Only for random search###
#   set_params = get_parameters()
#   print(f"Parameters: {set_params}")
  
  # Skip random cv split to reduce time
  cvSkip= cv_to_skip(5)
  
  # Log models
  model_history = []
  
  for i, cv_train in enumerate(df_train_split):
    
    if i in cvSkip:
      pass
#       continue
    
    ### Full Model ###
    cv_train = cv_train.select(*selected_cols)
    cv_val = df_val_split[i].select(*selected_cols)
    print("Preprocessing")
    cv_train = preprocess(cv_train)
    cv_val = preprocess(cv_val)
    
#     ### Reduced Model ###
#     cv_train = cv_train.select(*highGainFeatures)
#     cv_val = cv_val.select(*highGainFeatures)

    # Create vector of features
    featureCols = cv_train.columns
    featureCols.remove('DEP_DEL15')
    pipeline = getXGBPipeline(featureCols)
    
    print("Getting pipeline ready")

    pipelineModel = pipeline.fit(cv_train)  

    val_ml_train = pipelineModel.transform(cv_train)
    val_ml_test = pipelineModel.transform(cv_val)

    vectorCols = ['features','DEP_DEL15']
    print("Undersampling")
#     # Undersampling
#     train = undersampling(val_ml_train.select(vectorCols))
    # Oversampling
    train = oversampling(val_ml_train.select(vectorCols))
    test = val_ml_test.select(vectorCols)

    
    print("Validation Set {:d}".format(i+1))
  #   print("Training Dataset Count: " + str(train.count()))
  #   print("Test Dataset Count: " + str(test.count()))

    print("Training")
    
    ### ONLY FOR RANDOM SEARCH ###
#     pred,mod = execXGBModelClass(train, test, set_params)

    ## ONLY FOR DEFAULT MODEL ###
    pred,mod = execXGBModelClass_default(train, test)

#     ### ONLY FOR CUSTOM MODEL ###
    parameters = [3, 100, 0.1, 0, 0, 1, 1]
#     print(f"Parameters: {parameters}")
#     pred,mod = execXGBModelClass_custom(train, test, parameters)
#     ##########
    
    print("Testing")

    precision, recall, fmeasure = getMetrics(pred)

    print("Precision is {:.3f}".format(precision))
    print("Recall is {:.3f}".format(recall))
    print("F beta(0.5) score is {:.3f}".format(fmeasure))

    newrow = np.array([precision, recall, fmeasure])

    metricsArray = np.append(metricsArray, [newrow], axis=0)
    model_history.append([mod, parameters, metricsArray])
#     model_history.append([mod, set_params, metricsArray])

  avgArray = np.mean(metricsArray, axis=0)

  print("############################")
  print("Average of Cross validation")
  print("Average Precision is {:.3f}".format(avgArray[0]))
  print("Average Recall is {:.3f}".format(avgArray[1]))
  print("Average F beta(0.5) score is {:.3f}".format(avgArray[2])) 
  
  avgF=avgArray[2]
  if avgF > bestScore:
    best_model, bestScore = model_history, avgF
    print("++++++++++++++++++++++++++++")
    print(f"New best score: F beta(0.5) = {avgF:.3f}")
    print("++++++++++++++++++++++++++++")


  

In [0]:
# Number of features used during training
features = val_ml_train.columns
features.remove('DEP_DEL15')
len(features)

In [0]:
# Add feature names to the model
best_model[0][0].get_booster().feature_names = features

#### High importances features
Here we selected the top three models to extract their most relevant features. These were used in the reduced model version

In [0]:
# Get feature importances
xgb_fea_imp=pd.DataFrame(list(best_model[0][0].get_booster().get_fscore().items()),
columns=['feature','importance']).sort_values('importance', ascending=False)
print('',xgb_fea_imp)


In [0]:
# top features of the three best xgb models
a= ['DEP_TIME_BLK_mean_encoding','MONTH_Index','TAIL_NUM_mean_encoding','OP_UNIQUE_CARRIER_Index','ORIGIN_mean_encoding','TMP_Value_mean']
b= ['DEP_TIME_BLK_mean_encoding', 'OP_UNIQUE_CARRIER_Index', 'MONTH_Index', 'ORIGIN_mean_encoding', 'DISTANCE', 'TAIL_NUM_mean_encoding']
c =['DAY_OF_MONTH_mean_encoding', 'ORIGIN_mean_encoding', 'MONTH_Index', 'DEW_Value_median', 'SLP_Value_mean', 'DISTANCE']

In [0]:
# Remove duplicates
set_feats = set(a)
set_feats.update(b)
set_feats.update(c)
list(set_feats)

#### Run the model on test data

In [0]:
# Transform the training & test data and save it - run this once
  
# df_train_upd = aggregate_weather_reports(df_train)
# df_test_upd = aggregate_weather_reports(df_test)
  
# df_train_upd = get_transformed_df(df_train_upd)
# df_test_upd = get_transformed_df(df_test_upd)
  
# df_train_upd = add_previous_flight_delay_indicator(df_train_upd)
# df_test_upd = add_previous_flight_delay_indicator(df_test_upd)
  
# df_train_upd.write.parquet(f"{blob_url}/train_agg_0404")
# df_test_upd.write.parquet(f"{blob_url}/test_agg_0404")

In [0]:
# read the dataframes for inference - this will be part of main loop

df_train_main = spark.read.parquet(f"{blob_url}/train_agg_0404")
df_test_main = spark.read.parquet(f"{blob_url}/test_agg_0404")

In [0]:
def preprocess_dos(df):

  ### Perform target mean encoding and drop original features
  cols_to_mean_encode=['ORIGIN', 'DEST','DEP_TIME_BLK','TAIL_NUM','DAY_OF_MONTH','DEST_CITY_MARKET_ID']
  df = target_mean_encoding(df, col=cols_to_mean_encode, target='DEP_DEL15')
  mean_encoded_cols = [f"{col}_mean_encoding" for col in cols_to_mean_encode]
  cols_to_mean_encode.remove('TAIL_NUM')
  df=df.drop(*cols_to_mean_encode)

  # Handle nulls
  fillWithZero=['VIS_Horizontal_median','CIG_CeilingHeightDim_median','AL_SnowAccumDuration_mean',
                'AJ1_SnowDepth_mean','WND_Speed_mean','SLP_Value_mean','weather_condition']
  fillWithOther=['TMP_Value_mean']
  df = fill_nulls_with_zero_custom(df,fillWithZero)
  df = df.fillna(-99,fillWithOther)

  # Make sure weather condition is a string
  df = cast_features_to_strings(df, ['weather_condition'])

  # Frequency encode descending categorical variables
  indexers=[]
  categoricalColumns = [t[0] for t in df.dtypes if t[1] =='string']
  categoricalColumns.remove('TAIL_NUM')
  categoricalColumns.remove('OP_CARRIER_FL_NUM')
  for categoricalCol in categoricalColumns:
      indexers.append(StringIndexer(inputCol = categoricalCol, outputCol = categoricalCol + '_Index', 
                                    handleInvalid="keep",stringOrderType="frequencyDesc"))
  pipeline=Pipeline(stages=indexers)
  df = pipeline.fit(df).transform(df)
  df = df.drop(*categoricalColumns)

  return df

In [0]:
selected_cols = ['DEP_DEL15', 'OP_UNIQUE_CARRIER', 'DAY_OF_WEEK', 'DISTANCE', 'DISTANCE_GROUP', 'MONTH', 'ORIGIN', 'DEST', \
                  'CIG_CeilingHeightDim_median', 'WND_DirectionAngle_median','VIS_Horizontal_median', 'AA_RainDepth','AA_RainDuration', 'AL_SnowAccumDuration_mean', \
                  'AL_SnowAccumDepth', 'AJ1_SnowDepth_mean', 'AJ1_SnowEqWaterDepth','WND_Speed_mean', 'TMP_Value_mean', 'SLP_Value_mean', \
                  'PREV_DEP_DEL15','weather_condition','DEW_Value_median','DEST_CITY_MARKET_ID','VIS_Variability','DAY_OF_MONTH','WND_Type','TAIL_NUM','DEP_TIME_BLK', \
                  'OP_CARRIER_FL_NUM', 'TIMESTAMP_UTC']



In [0]:
df_train_main = df_train_main.select(*selected_cols)
df_test_main = df_test_main.select(*selected_cols)

df_train_main = preprocess_dos(df_train_main)
df_test_main = preprocess_dos(df_test_main)
  
#oversampling
df_train_main = undersampling(df_train_main)

# Create vector of features
featureCols = df_train_main.columns
featureCols.remove('TAIL_NUM')
featureCols.remove('OP_CARRIER_FL_NUM')
featureCols.remove('TIMESTAMP_UTC')
featureCols.remove('DEP_DEL15')
pipeline = getXGBPipeline(featureCols)

print("Getting pipeline ready")

pipelineModel = pipeline.fit(df_train_main) 

ml_train = pipelineModel.transform(df_train_main)
ml_test = pipelineModel.transform(df_test_main)

vectorCols = df_train_main.columns + ['features']
  
train_all = ml_train.select(vectorCols)
test_all = ml_test.select(vectorCols)

print("############################")

pred, mod = execXGBModelClass_default(train_all, test_all)

precision, recall, fmeasure = getMetrics(pred)

print("Final test scores")
print("Precision is {:.3f}".format(precision))
print("Recall is {:.3f}".format(recall))
print("F beta(0.5) score is {:.3f}".format(fmeasure))

In [0]:
pred.write.parquet(f"{blob_url}/xgb_test_0410b")

### Analyze errors

In [0]:
%run "../libs/error_analysis"

In [0]:
analyze_errors_xgb(pred)

PRED_GROUP,avg(DEP_TIME_BLK_mean_encoding),avg(MONTH_Index),avg(TAIL_NUM_mean_encoding),avg(OP_UNIQUE_CARRIER_Index),avg(ORIGIN_mean_encoding)
TP,0.229395851028525,4.844605621256034,0.2035595718986701,4.308230406219554,0.1973651550655299
TN,0.1685439335854144,5.56571770681387,0.1785231578406277,4.410201826067689,0.1814568669426705
FN,0.1827918191820176,5.536112021888815,0.1861161873713905,4.523368043385681,0.1856127796746141
FP,0.2222126152650344,4.733665943569402,0.2037693499996322,4.160859767724042,0.1980891867719618


PRED_GROUP,avg(PREV_DEP_DEL15)
TP,0.6135478503714211
TN,0.036747174397118
FN,0.0310185760964193
FP,0.2361924323555927
