In [1]:
import findspark
findspark.init()
from pyspark.sql import SparkSession
spark = SparkSession.builder.master("local").appName("Final_IDS").getOrCreate()
sc = spark.sparkContext
spark

In [2]:
from functools import reduce
from pyspark.sql import DataFrame
from pyspark.sql import functions as F
from pyspark.ml.feature import StringIndexer, VectorAssembler, StandardScaler
from pyspark.ml.stat import Correlation
from pyspark.ml import Pipeline
from pyspark.sql.types import DoubleType
import pyarrow as pa
from pyspark.sql.types import DataType
from pyspark.sql.functions import col
from pyspark.sql.functions import split
from pyspark.sql.dataframe import *
from pyspark.ml.evaluation import BinaryClassificationEvaluator, MulticlassClassificationEvaluator, RegressionEvaluator
from pyspark.ml.tuning import ParamGridBuilder, CrossValidator, TrainValidationSplit
from pyspark.ml.classification import RandomForestClassifier, LogisticRegression

In [3]:
df_col = spark.read.csv('NUSW-NB15_features.csv', header="true", inferSchema=False)
#resolving the columns containing features to a list in the order they appear to use as headers for the dataframe
distinct_names = df_col.select(col("`No.`").cast("int"), "Name").distinct()
ordered_dataset_names = [row.Name for row in distinct_names.orderBy("`No.`").collect()]

In [4]:
dfs = []
csv_files = ["C:/users/python/UNSW-NB15_1.csv", "C:/users/python/UNSW-NB15_2.csv", "C:/users/python/UNSW-NB15_3.csv", "C:/users/python/UNSW-NB15_4.csv"]

for csv_file in csv_files:
    df = spark.read.option('header', False).csv(csv_file)
    renamed_df = df.toDF(*ordered_dataset_names)
    dfs.append(renamed_df)

def union_all(*dfss):
    return reduce(DataFrame.unionAll, dfss)


dataframe = union_all(*dfs)

In [5]:
dataframe = dataframe.na.fill('normal', ['attack_cat'])
dataframe = dataframe.na.fill('0', ['ct_flw_http_mthd'])
dataframe = dataframe.na.fill('0', ['is_ftp_login'])
dataframe = dataframe.withColumn("is_ftp_login", F.when(F.col("is_ftp_login") > 1, 1).otherwise(F.col("is_ftp_login")))
dataframe = dataframe.withColumn("service", F.when(F.col("service") == "-", None).otherwise(F.col("service")))
dataframe = dataframe.fillna("unknown", ["service"])
dataframe = dataframe.fillna("0", ["sport"])
dataframe = dataframe.fillna("0", ["dsport"])

In [6]:
#resolving the ip address columns from string to numeric type while preserving the value
dataframe = dataframe.withColumn("srcip_int",split(col("srcip"),"\.")[0]*16777216 +split(col("srcip"),"\.")[1]*65536+ split(col("srcip"),"\.")[2]*256 + split(col("srcip"),"\.")[3])
dataframe = dataframe.withColumn("dstip_int",split(col("dstip"),"\.")[0]*16777216 +split(col("dstip"),"\.")[1]*65536+ split(col("dstip"),"\.")[2]*256 + split(col("dstip"),"\.")[3])

In [7]:
columns_to_convert = ["dur", "dload", "sload", "sjit", "djit", "Sintpkt", "Dintpkt", "tcprtt", "synack", "ackdat"]
for col_name in columns_to_convert:
    dataframe = dataframe.withColumn(col_name, dataframe[col_name].cast("double"))

In [8]:
#specifying the columns to exclude from the transformation of dataframe from string type to double type
#'srcip','dstip' will be dropped so no need to transform that, 'srcip_int' and 'dstip_int' have been transformed to int type, the rest are nominal datatypes that will be transformed later user stringindexer
columns_to_exclude = ["srcip", "dstip", "srcip_int","dstip_int","proto", "state", "service","attack_cat", "dur", "dload", "sload", "sjit", "djit", "Sintpkt" , "Dintpkt","tcprtt", "synack", "ackdat"]

In [9]:
#performing stringindexing of the nominal datatypes
for col_name, col_type in dataframe.dtypes:
        if col_name not in columns_to_exclude:
            dataframe = dataframe.withColumn(col_name, dataframe[col_name].cast("int"))
columns_to_convert = ["proto", "state", "service", "attack_cat"]
indexers = [StringIndexer(inputCol="proto", outputCol="proto_index", handleInvalid="skip"), StringIndexer(inputCol="state", outputCol="state_index", handleInvalid="skip"),StringIndexer(inputCol="service", outputCol="service_index",  handleInvalid="skip"),StringIndexer(inputCol="attack_cat", outputCol="attackcat_index",  handleInvalid="skip")]
pipeline = Pipeline(stages=indexers)
dataframe_r = pipeline.fit(dataframe).transform(dataframe)

In [10]:
#columns to drop after transformation, ct_ftp_cmd was dropped due to having about half null values
cols = ("srcip", "dstip", "proto","state","service", "attack_cat", "ct_ftp_cmd")

In [11]:
dataframe_d = dataframe_r.drop(*cols)

In [12]:
num_rows = dataframe.count()
num_columns = len(dataframe.columns)

print("Number of rows:", num_rows)
print("Number of columns:", num_columns)

Number of rows: 2540047
Number of columns: 51


In [13]:
training, test = dataframe_d.randomSplit([0.8, 0.2], seed=123)

In [14]:
#feature selection on training dataset, first transform columns to vector form  
#then find highly correlated columns
corr_columns = training.columns
vec_assembler = VectorAssembler(inputCols=corr_columns, outputCol="features", handleInvalid="skip")
corr_df_vector = vec_assembler.transform(training).select("features")
correlation_matrix = Correlation.corr(corr_df_vector, "features")
correlations = correlation_matrix.head()[0].toArray()
for i in range(len(corr_columns)):
    for j in range(i + 1, len(corr_columns)):
        col1 = corr_columns[i]
        col2 = corr_columns[j]
        correlation_value = correlations[i, j]
        if correlation_value > 0.9:
            print(f"Correlation between {col1} and {col2}: {correlation_value:.2f}")

Correlation between sbytes and sloss: 0.95
Correlation between dbytes and dloss: 0.99
Correlation between dbytes and Dpkts: 0.97
Correlation between sttl and ct_state_ttl: 0.93
Correlation between sttl and Label: 0.95
Correlation between dloss and Dpkts: 0.99
Correlation between swin and dwin: 0.99
Correlation between stcpb and dtcpb: 0.96
Correlation between Stime and Ltime: 1.00
Correlation between tcprtt and synack: 0.92
Correlation between tcprtt and ackdat: 0.92
Correlation between ct_srv_src and ct_srv_dst: 0.98
Correlation between ct_srv_src and ct_dst_src_ltm: 0.97
Correlation between ct_srv_dst and ct_dst_src_ltm: 0.98
Correlation between ct_dst_ltm and ct_src_ ltm: 0.96
Correlation between ct_dst_ltm and ct_src_dport_ltm: 0.98
Correlation between ct_src_ ltm and ct_src_dport_ltm: 0.97
Correlation between ct_src_dport_ltm and ct_dst_sport_ltm: 0.92
Correlation between ct_src_dport_ltm and ct_dst_src_ltm: 0.90


In [15]:
#dropping one of the columns with a correlation higher than 0.95
to_drop = set()
for i in range(len(corr_columns)):
    for j in range(i + 1, len(corr_columns)):
        col1 = corr_columns[i]
        col2 = corr_columns[j]
        correlation_value = correlations[i, j]
        if correlation_value > 0.95:
            # Add one of the correlated features to the list to_drop
            to_drop.add(col2)

print(to_drop)

{'ct_srv_dst', 'ct_src_ ltm', 'ct_src_dport_ltm', 'ct_dst_src_ltm', 'dtcpb', 'dwin', 'Ltime', 'dloss', 'Dpkts'}


In [16]:
#highly correlated features, attackcat_index is for multiclassification which will be used later, srcip, dstip,sport, and dsport will ideally not be available in training data
columns_to_drop = ('ct_srv_dst', 'Ltime', 'ct_dst_src_ltm', 'ct_src_ltm', 'ct_src_dport_ltm', 'dloss', 'Dpkts', 'dwin', 'dtcpb', 'attackcat_index', 'srcip', 'dstip', 'sport', 'dsport', 'srcip_int','dstip_int')

In [17]:
training_d = training.drop(*map(str, columns_to_drop))

In [18]:
#standardizing the dataset and excluding the label
columns = training_d.columns
column_to_exclude = 'Label'

In [19]:
feature_columns = [col for col in columns if col != column_to_exclude]
assembler = VectorAssembler(inputCols=feature_columns, outputCol='features', handleInvalid='skip')
temptraining = assembler.transform(training_d)
scaler = StandardScaler(inputCol='features', outputCol='scaled_features')
training = scaler.fit(temptraining).transform(temptraining)

In [20]:
test_d = test.drop(*map(str, columns_to_drop))
feature_columns = [col for col in columns if col != column_to_exclude]
temptest = assembler.transform(test_d)
test = scaler.fit(temptest).transform(temptest)

In [21]:
training = training.withColumnRenamed('Label', 'label')
test = test.withColumnRenamed('Label', 'label')

In [22]:
lr = LogisticRegression(labelCol='label', featuresCol='scaled_features')
lr_param_grid = ParamGridBuilder() \
    .addGrid(lr.regParam, [0.01, 0.1, 0.5]) \
    .addGrid(lr.fitIntercept, [False, True]) \
    .addGrid(lr.elasticNetParam, [0.0, 0.5, 1.0]) \
    .build()
tvs = TrainValidationSplit(estimator=lr,
                           estimatorParamMaps=lr_param_grid,
                           evaluator=BinaryClassificationEvaluator(),
                           # 80% of the data will be used for training, 20% for validation.
                           trainRatio=0.8)
model = tvs.fit(training)
best_model = model.bestModel
predictions = best_model.transform(test)
true_positives = predictions.filter("label = 1 AND prediction = 1").count()
false_positives = predictions.filter("label = 0 AND prediction = 1").count()
true_negatives = predictions.filter("label = 0 AND prediction = 0").count()
false_negatives = predictions.filter("label = 1 AND prediction = 0").count()
precision = true_positives / (true_positives + false_positives)
recall = true_positives / (true_positives + false_negatives)
f1_score = 2 * (precision * recall) / (precision + recall)

false_alarm_rate = false_positives / (false_positives + true_negatives)
print("F1 Score:", f1_score)
print("False Alarm Rate:", false_alarm_rate)


F1 Score: 0.9470005726026475
False Alarm Rate: 0.013871319955792865


In [23]:
evaluator_auc = BinaryClassificationEvaluator(labelCol='label', rawPredictionCol='rawPrediction')
auc_score = evaluator_auc.evaluate(predictions)
print("AUC Score:", auc_score)


AUC Score: 0.9966634557088376


In [24]:
rf = RandomForestClassifier(labelCol='label', featuresCol='scaled_features')
rf_param_grid = ParamGridBuilder() \
    .addGrid(rf.numTrees, [10, 50, 100]) \
    .addGrid(rf.maxDepth, [5, 10, 15]) \
    .build()

tvs_rf = TrainValidationSplit(estimator=rf,
                              estimatorParamMaps=rf_param_grid,
                              evaluator=BinaryClassificationEvaluator(),
                              trainRatio=0.8)

model_rf = tvs_rf.fit(training)
best_model_rf = model_rf.bestModel
predictions_rf = best_model_rf.transform(test)
true_positives = predictions_rf.filter("label = 1 AND prediction = 1").count()
false_positives = predictions_rf.filter("label = 0 AND prediction = 1").count()
true_negatives = predictions_rf.filter("label = 0 AND prediction = 0").count()
false_negatives = predictions_rf.filter("label = 1 AND prediction = 0").count()
precision = true_positives / (true_positives + false_positives)
recall = true_positives / (true_positives + false_negatives)
f1_score = 2 * (precision * recall) / (precision + recall)

false_alarm_rate = false_positives / (false_positives + true_negatives)
print("rf_F1 Score:", f1_score)
print("rf_False Alarm Rate:", false_alarm_rate)

rf_F1 Score: 0.9792076928978798
rf_False Alarm Rate: 0.0050365507782852426


In [25]:
evaluator_auc = BinaryClassificationEvaluator(labelCol='label', rawPredictionCol='rawPrediction')
auc_score = evaluator_auc.evaluate(predictions_rf)
print("rf_AUC Score:", auc_score)

rf_AUC Score: 0.9997484738247867


In [26]:
from pyspark.ml.classification import LinearSVC

In [27]:
svm = LinearSVC(labelCol='label', featuresCol='scaled_features')
svm_param_grid = ParamGridBuilder() \
    .addGrid(svm.maxIter, [10, 100]) \
    .addGrid(svm.regParam, [0.01, 0.1, 0.5]) \
    .build()

tvs_svm = TrainValidationSplit(estimator=svm,
                               estimatorParamMaps=svm_param_grid,
                               evaluator=BinaryClassificationEvaluator(),
                               trainRatio=0.8)
model_svm = tvs_svm.fit(training)
best_model_svm = model_svm.bestModel
predictions_svm = best_model_svm.transform(test)
true_positives = predictions_svm.filter("label = 1 AND prediction = 1").count()
false_positives = predictions_svm.filter("label = 0 AND prediction = 1").count()
true_negatives = predictions_svm.filter("label = 0 AND prediction = 0").count()
false_negatives = predictions_svm.filter("label = 1 AND prediction = 0").count()
precision = true_positives / (true_positives + false_positives)
recall = true_positives / (true_positives + false_negatives)
f1_score = 2 * (precision * recall) / (precision + recall)

false_alarm_rate = false_positives / (false_positives + true_negatives)
print("svm_F1 Score:", f1_score)
print("svm_False Alarm Rate:", false_alarm_rate)

svm_F1 Score: 0.9409347714432461
svm_False Alarm Rate: 0.015089679231967617


In [28]:
evaluator_auc = BinaryClassificationEvaluator(labelCol='label', rawPredictionCol='rawPrediction')
auc_score = evaluator_auc.evaluate(predictions_svm)
print("svm_AUC Score:", auc_score)

svm_AUC Score: 0.9962330248401873


In [29]:
from pyspark.ml.classification import DecisionTreeClassifier


In [30]:
dt = DecisionTreeClassifier(labelCol='label', featuresCol='scaled_features')
dt_param_grid = ParamGridBuilder() \
    .addGrid(dt.maxDepth, [5, 10, 15]) \
    .build()

tvs_dt = TrainValidationSplit(estimator=dt,
                              estimatorParamMaps=dt_param_grid,
                              evaluator=BinaryClassificationEvaluator(),
                              trainRatio=0.8)
model_dt = tvs_dt.fit(training)
best_model_dt = model_dt.bestModel
predictions_dt = best_model_dt.transform(test)
true_positives = predictions_dt.filter("label = 1 AND prediction = 1").count()
false_positives = predictions_dt.filter("label = 0 AND prediction = 1").count()
true_negatives = predictions_dt.filter("label = 0 AND prediction = 0").count()
false_negatives = predictions_dt.filter("label = 1 AND prediction = 0").count()
precision = true_positives / (true_positives + false_positives)
recall = true_positives / (true_positives + false_negatives)
f1_score = 2 * (precision * recall) / (precision + recall)

false_alarm_rate = false_positives / (false_positives + true_negatives)
print("dt_F1 Score:", f1_score)
print("dt_False Alarm Rate:", false_alarm_rate)

dt_F1 Score: 0.9751789405353696
dt_False Alarm Rate: 0.005439341686528808


In [31]:
evaluator_auc = BinaryClassificationEvaluator(labelCol='label', rawPredictionCol='rawPrediction')
auc_score = evaluator_auc.evaluate(predictions_dt)
print("dt_AUC Score:", auc_score)

dt_AUC Score: 0.9987333531687295


In [32]:
from pyspark.ml.classification import GBTClassifier


In [33]:
gbt = GBTClassifier(labelCol='label', featuresCol='scaled_features')
gbt_param_grid = ParamGridBuilder() \
    .addGrid(gbt.maxDepth, [5, 10]) \
    .addGrid(gbt.maxIter, [20, 50]) \
    .build()

tvs_gbt = TrainValidationSplit(estimator=gbt,
                               estimatorParamMaps=gbt_param_grid,
                               evaluator=BinaryClassificationEvaluator(),
                               trainRatio=0.8)

model_gbt = tvs_gbt.fit(training)
best_model_gbt = model_gbt.bestModel
predictions_gbt = best_model_gbt.transform(test)
true_positives = predictions_gbt.filter("label = 1 AND prediction = 1").count()
false_positives = predictions_gbt.filter("label = 0 AND prediction = 1").count()
true_negatives = predictions_gbt.filter("label = 0 AND prediction = 0").count()
false_negatives = predictions_gbt.filter("label = 1 AND prediction = 0").count()
precision = true_positives / (true_positives + false_positives)
recall = true_positives / (true_positives + false_negatives)
f1_score = 2 * (precision * recall) / (precision + recall)

false_alarm_rate = false_positives / (false_positives + true_negatives)
print("gbt_F1 Score:", f1_score)
print("gbt_False Alarm Rate:", false_alarm_rate)

gbt_F1 Score: 0.97691886527735
gbt_False Alarm Rate: 0.004783558141702508


In [34]:
evaluator_auc = BinaryClassificationEvaluator(labelCol='label', rawPredictionCol='rawPrediction')
auc_score = evaluator_auc.evaluate(predictions_gbt)
print("gbt_AUC Score:", auc_score)

gbt_AUC Score: 0.9995923329515508


In [35]:
exclude_columns = ['features', 'scaled_features', 'label']
feature_names = [col for col in training.columns if col not in exclude_columns]

In [36]:
decision_tree_model = best_model_dt
feature_importances = decision_tree_model.featureImportances
feature_importance_dict = {}
for feature_name, importance in zip(feature_names, feature_importances):
    feature_importance_dict[feature_name] = importance

sorted_feature_importance = sorted(feature_importance_dict.items(), key=lambda x: x[1], reverse=True)
print("Feature Importances:")
for feature_name, importance in sorted_feature_importance:
    print(f"{feature_name}: {importance}")

Feature Importances:
ct_state_ttl: 0.9242375702149693
smeansz: 0.025926133409351515
sbytes: 0.023579427043580164
ct_srv_src: 0.009015875079260985
dmeansz: 0.004070826247022661
proto_index: 0.003224146417374436
sttl: 0.0020769488557484997
dttl: 0.001728203694154997
Spkts: 0.001166171033622853
dbytes: 0.0007900842083220633
Dintpkt: 0.0006339058617094622
sloss: 0.0005365312225968248
service_index: 0.0005068647661865191
Stime: 0.00048276453879922567
djit: 0.0004291331478325324
ackdat: 0.0004054697498145416
sjit: 0.000388074781368542
ct_src_ ltm: 0.00023932742296264365
dur: 0.00012750631093002
ct_dst_ltm: 0.0001227081698673326
res_bdy_len: 0.00011757859249464134
sload: 4.7925049199221146e-05
trans_depth: 4.219527382872446e-05
stcpb: 3.365870132567805e-05
ct_dst_sport_ltm: 3.0344821362307025e-05
swin: 2.2317715778035818e-05
dload: 7.66941901848868e-06
Sintpkt: 5.861616369824903e-06
ct_flw_http_mthd: 4.122571991452658e-06
is_ftp_login: 6.540631562102726e-07
tcprtt: 0.0
synack: 0.0
is_sm_ips_p

In [37]:
#dropping columns less than 0.0001
columns_to_drop = ('state_index','dload', 'dur','is_sm_ips_ports', 'sload','res_bdy_len','is_ftp_login','synack','ct_dst_ltm'
                   'tcprtt' ,'swin', 'sjit','Sintpkt', 'ct_flw_http_mthd', 'ct_src_ ltm','stcpb' ,'ct_dst_sport_ltm', 'trans_depth')

In [38]:
training_sf = training_d.drop(*map(str, columns_to_drop))

In [39]:
#standardizing the dataset and excluding the label
columns = training_sf.columns
column_to_exclude = 'Label'
feature_columns = [col for col in columns if col != column_to_exclude]
assembler = VectorAssembler(inputCols=feature_columns, outputCol='features', handleInvalid='skip')
temptraining = assembler.transform(training_sf)
scaler = StandardScaler(inputCol='features', outputCol='scaled_features')
training = scaler.fit(temptraining).transform(temptraining)

In [40]:
test_sf = test_d.drop(*map(str, columns_to_drop))
temptest = assembler.transform(test_sf)
test = scaler.fit(temptest).transform(temptest)

In [41]:
training = training.withColumnRenamed('Label', 'label')
test = test.withColumnRenamed('Label', 'label')

In [43]:
best_model_params = best_model_rf.extractParamMap()
for param_name, param_value in best_model_params.items():
    print(f"{param_name.name}: {param_value}")


bootstrap: True
cacheNodeIds: False
checkpointInterval: 10
featureSubsetStrategy: auto
featuresCol: scaled_features
impurity: gini
labelCol: label
leafCol: 
maxBins: 32
maxDepth: 15
maxMemoryInMB: 256
minInfoGain: 0.0
minInstancesPerNode: 1
minWeightFractionPerNode: 0.0
numTrees: 50
predictionCol: prediction
probabilityCol: probability
rawPredictionCol: rawPrediction
seed: -3555532632180229690
subsamplingRate: 1.0


In [42]:
model_rf = tvs_rf.fit(training)
best_model_rf = model_rf.bestModel
predictions_rf = best_model_rf.transform(test)
true_positives = predictions_rf.filter("label = 1 AND prediction = 1").count()
false_positives = predictions_rf.filter("label = 0 AND prediction = 1").count()
true_negatives = predictions_rf.filter("label = 0 AND prediction = 0").count()
false_negatives = predictions_rf.filter("label = 1 AND prediction = 0").count()
precision = true_positives / (true_positives + false_positives)
recall = true_positives / (true_positives + false_negatives)
f1_score = 2 * (precision * recall) / (precision + recall)

false_alarm_rate = false_positives / (false_positives + true_negatives)
print("rf_F1 Score:", f1_score)
print("rf_False Alarm Rate:", false_alarm_rate)

rf_F1 Score: 0.9690197991436115
rf_False Alarm Rate: 0.005325158786962139


In [43]:
evaluator_auc = BinaryClassificationEvaluator(labelCol='label', rawPredictionCol='rawPrediction')
auc_score = evaluator_auc.evaluate(predictions_rf)
print("rf_AUC Score:", auc_score)

rf_AUC Score: 0.9996176878808688


In [44]:
model_dt = tvs_dt.fit(training)
best_model_dt = model_dt.bestModel
predictions_dt = best_model_dt.transform(test)
true_positives = predictions_dt.filter("label = 1 AND prediction = 1").count()
false_positives = predictions_dt.filter("label = 0 AND prediction = 1").count()
true_negatives = predictions_dt.filter("label = 0 AND prediction = 0").count()
false_negatives = predictions_dt.filter("label = 1 AND prediction = 0").count()
precision = true_positives / (true_positives + false_positives)
recall = true_positives / (true_positives + false_negatives)
f1_score = 2 * (precision * recall) / (precision + recall)

false_alarm_rate = false_positives / (false_positives + true_negatives)
print("dt_F1 Score:", f1_score)
print("dt_False Alarm Rate:", false_alarm_rate)

dt_F1 Score: 0.9641101661855348
dt_False Alarm Rate: 0.004899776280311317


In [45]:
auc_score = evaluator_auc.evaluate(predictions_dt)
print("dt_AUC Score:", auc_score)

dt_AUC Score: 0.9982006363350612


In [46]:
model_gbt = tvs_gbt.fit(training)
best_model_gbt = model_gbt.bestModel
predictions_gbt = best_model_gbt.transform(test)
true_positives = predictions_gbt.filter("label = 1 AND prediction = 1").count()
false_positives = predictions_gbt.filter("label = 0 AND prediction = 1").count()
true_negatives = predictions_gbt.filter("label = 0 AND prediction = 0").count()
false_negatives = predictions_gbt.filter("label = 1 AND prediction = 0").count()
precision = true_positives / (true_positives + false_positives)
recall = true_positives / (true_positives + false_negatives)
f1_score = 2 * (precision * recall) / (precision + recall)

false_alarm_rate = false_positives / (false_positives + true_negatives)
print("gbt_F1 Score:", f1_score)
print("gbt_False Alarm Rate:", false_alarm_rate)

gbt_F1 Score: 0.9671824973319104
gbt_False Alarm Rate: 0.0035988710483315554


In [47]:
auc_score = evaluator_auc.evaluate(predictions_gbt)
print("gbt_AUC Score:", auc_score)

gbt_AUC Score: 0.9994018130885248


In [50]:
training_sf.show(5)

+------+------+----+----+-----+-----+-------+-------+----+----------+-------+------+------+------------+----------+----------+-----+-----------+-------------+
|sbytes|dbytes|sttl|dttl|sloss|Spkts|smeansz|dmeansz|djit|     Stime|Dintpkt|tcprtt|ackdat|ct_state_ttl|ct_srv_src|ct_dst_ltm|Label|proto_index|service_index|
+------+------+----+----+-----+-----+-------+-------+----+----------+-------+------+------+------------+----------+----------+-----+-----------+-------------+
|   147|     0|  64|   0|    0|    1|    147|      0| 0.0|1421929277|    0.0|   0.0|   0.0|           0|         2|         2|    0|        6.0|          0.0|
|   200|     0| 254|   0|    0|    2|    100|      0| 0.0|1421930680|    0.0|   0.0|   0.0|           2|        12|         4|    0|      134.0|          0.0|
|  2800|     0|  64|   0|    0|   20|    140|      0| 0.0|1421931785|    0.0|   0.0|   0.0|           0|         2|         2|    0|        6.0|          0.0|
|   147|     0|  64|   0|    0|    1|    147| 