# (1) Multi-class Decision Tree (DT) Classification Model of AR, CG and Others (Downlink & Uplink)

# 1- Read the Datasets

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc
import matplotlib.pyplot as plt
from sklearn import tree
import graphviz




# Load datasets
### Enter your Dataset file address with csv format
ar_data = pd.read_csv(r'AR.csv')
cg_data = pd.read_csv(r'CG.csv')
others_data = pd.read_csv(r'others.csv')




# Set the features
features = ['IPI', 'FS', 'IFI']

ar_data = ar_data[features]
cg_data = cg_data[features]
others_data = others_data[features]

# Label the datasets
ar_data['Class'] = 'AR'
cg_data['Class'] = 'CG'
others_data['Class'] = 'Others'


# Show the number of Samples in training Dataset
print(f'AR samples are {ar_data.shape[0]}\nCG samples are {cg_data.shape[0]}\nother samples are {others_data.shape[0]}\n###########')



# Combine and shuffle the samples (Dataset of Combined Samples of AR & CG & other APPs (DS_CSACO))
DS_CSACO = pd.concat([ar_data, cg_data, others_data]).sample(frac=1).reset_index(drop=True)

pprint(f'The #No of samples belong to AR is ' + str(DS_CSACO[DS_CSACO['Class']=='AR'].shape[0]))
print(f'The #No of samples belong to CG is ' + str(DS_CSACO[DS_CSACO['Class']=='CG'].shape[0]))
print(f'The #No of samples belong to Other Apps is ' + str(DS_CSACO[DS_CSACO['Class']=='Others'].shape[0]))

# 2- Train the AR and CG Decision Tree Model

In [None]:
# Split the data
##The dataset is DS_CSACO
X = DS_CSACO[features]
y = DS_CSACO['Class']

# Class weight
class_weights = {'AR': 1 / DS_CSACO[DS_CSACO['Class']=='AR'].shape[0],
                 'CG': 1 / DS_CSACO[DS_CSACO['Class']=='CG'].shape[0],
                 'Others': 1/DS_CSACO[DS_CSACO['Class']=='Others'].shape[0]}

# Train AR model (Testsize = 10%, Classweight, and Criterion = 'Entropy')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=30)
cls_model = DecisionTreeClassifier(class_weight=class_weights,criterion="entropy")
cls_model.fit(X_train, y_train)


# 3- Evaluate the Model Before Optimization

In [None]:
# Evluate AR & CG Model
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    conf_matrix = confusion_matrix(y_test, y_pred)
    return accuracy, precision, recall, f1, conf_matrix

# Evaluate AR and CG models (Accuracy, Precision, Recall, and F-Score1 + Confusion Matrix)
accuracy, precision, recall, f1, conf = evaluate_model(cls_model, X_test, y_test)


# Print evaluation metrics
print(f'AR Model Metrics:accuracy is {accuracy} |\nPrecision is {precision} |\nrecall is {recall} |\nf-score is {f1}', '\nIt is confusion matrix:\n', conf)

# Print features used in this traing phase
print('Features are',features)

AR Model Metrics:accuracy is 0.9876390605686032 |
Precision is 0.9879819609605839 |
recall is 0.9876390605686032 |
f-score is 0.9876448168568386 
It is confusion matrix:
 [[273   0   1]
 [  0 255   0]
 [  0   9 271]]
Features are ['IPI', 'FS', 'IFI']


In [None]:
# Cross Validation K-fold with K=10
scoring = {'precision': make_scorer(precision_score, average='macro'),
           'recall': make_scorer(recall_score, average='macro'),
           'f1_score': make_scorer(f1_score, average='macro')}


cv_results = cross_validate(cls_model, X, y, cv=10, scoring=scoring)

print("Precision:", cv_results['test_precision'].mean())
print("Recall:", cv_results['test_recall'].mean())
print("F1 Score:", cv_results['test_f1_score'].mean())

print("**************************************************************")
dt_cv_scores = cross_val_score(cls_model, X, y, cv=10)
print("Decision Tree - CV Scores (Accuracy):", dt_cv_scores)
print("Decision Tree - Average CV Score (Accuracy Mean):", dt_cv_scores.mean())
print('Features are',features)

# 4- Draw the Decision Tree

In [None]:
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt

# Visualize the AR-CG Classification Decision Tree (DT) Model
plt.figure(figsize=(20,10))
plot_tree(cls_model, feature_names=features, class_names=['AR', 'CG', 'Others'], filled=True)
plt.title("AR-CG Classification  Decision Tree")
plt.show()


# 5- Improved The Model using GridSearch Technique

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix

# DS_CSACO dataset with 'IPI','FS','IFI' features and Class
X = DS_CSACO.drop('Class', axis=1)
y = DS_CSACO['Class']


# Define the parameter grid (Tables.I of the paper)
param_grid = {
    'max_depth': [None, 10, 20, 30,40],
    'min_samples_split': [2, 5, 10,20],
    'min_samples_leaf': [1, 2, 4,6],
    'criterion': ['entropy','gini']

}


# Initialize GridSearch with cross-validation
grid_search = GridSearchCV(estimator=cls_model, param_grid=param_grid, cv=10, scoring='accuracy')

# Fit the grid search to the data
grid_search.fit(X, y)

# Print the best parameters
print("Best parameters found: ", grid_search.best_params_)

# Train the model with the best parameters
cls_model_optimized = grid_search.best_estimator_

# Evaluate the model
predictions = cls_model_optimized.predict(X)
print(classification_report(y, predictions))
print("Confusion Matrix:\n", confusion_matrix(y, predictions))

# 6- **Test the trained model and optimized model (6-1) Load the Test Dataset & (6-2) Test the model **

# 6-1- Load the test dataset

In [None]:
# Load the test dataset (Mentioned in Table (IV) of the Paper)
AR_Test = pd.read_csv(r'AR-Test.csv')
CG_Test = pd.read_csv(r'CG_Test.csv')
Other_Test = pd.read_csv(r'Other_Test.csv')
# ** Notice: Test the Model with direction considering (DL or UL)

# Set the features of teh test dataset and label them
AR_Test = AR_Test[features]
AR_Test['Class'] = 'AR'

CG_Test = CG_Test[features]
CG_Test['Class'] = 'CG'

Other_Test = Other_Test[features]
Other_Test['Class'] = 'Others'
# ** Notice: Pay attention to the feature variable name to match the features of the test dataset

# Combine the Test Dataset
DT_Test =  pd.concat([AR_Test,CG_Test,Other_Test],sort=False).sample(frac=1, random_state=30).reset_index(drop=True)


X_test = DT_Test[features]
y_test = DT_Test['Class']

# print the number of each sample in the Test Dataset
print('No# of AR samples',DT_Test[DT_Test['Class']=='AR'].shape,
      '\nNo# of CG samples-->',DT_Test[DT_Test['Class']=='CG'].shape,
      '\nNo# of Other Apps samples-->',DT_Test[DT_Test['Class']=='Others'].shape)

# 6-2- **Test and evaluate the Model**
> *(6-2-1) cls_model (Simple model without Hyperparameter Tunning) (6-2-2)  cls_model_optimized (Optimized model without Hyperparameter Tunning by search grid)*









6-2-1- **Test cls_model**

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc
from sklearn.preprocessing import label_binarize
import matplotlib.pyplot as plt
import numpy as np


# Make predictions with the model ************ cls_model & cls_model2
predictions = cls_model.predict(X_test)

# Calculate metrics
accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions, average='weighted')
recall = recall_score(y_test, predictions, average='weighted')
f1 = f1_score(y_test, predictions, average='weighted')
conf_matrix = confusion_matrix(y_test, predictions)

# Print the metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-Score:", f1)
print("Confusion Matrix:\n", conf_matrix)

# ROC Curve (if y_test is binary or multiclass)
# Convert y_test to a binary format
y_test_bin = label_binarize(y_test, classes=np.unique(y_test))
n_classes = y_test_bin.shape[1]

# Print the features used for classification
print('Features are',features)

6-2-1- **Test cls_model_optimized**

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc
from sklearn.preprocessing import label_binarize
import matplotlib.pyplot as plt
import numpy as np

# Make predictions with the model ************ cls_model & cls_model2
predictions = cls_model_optimized.predict(X_test)

# Calculate metrics
accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions, average='weighted')
recall = recall_score(y_test, predictions, average='weighted')
f1 = f1_score(y_test, predictions, average='weighted')
conf_matrix = confusion_matrix(y_test, predictions)

# Print the metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-Score:", f1)
print("Confusion Matrix:\n", conf_matrix)

# ROC Curve (if y_test is binary or multiclass)
# Convert y_test to a binary format
y_test_bin = label_binarize(y_test, classes=np.unique(y_test))
n_classes = y_test_bin.shape[1]

# Print the features used for classification
print('Features are',features)