# **(2) Multi-Class Random Forest (RF) Classification Model of AR, CG and Others (Uplink/Downlink)**

# 1- **Read the Datasets**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc
import matplotlib.pyplot as plt
from sklearn import tree
import graphviz

# Load datasets
### Enter your Dataset file address with csv format
ar_data = pd.read_csv(r'AR.csv')
cg_data = pd.read_csv(r'CG.csv')
others_data = pd.read_csv(r'others.csv')




# Set the features
features = ['IPI', 'FS', 'IFI']

ar_data = ar_data[features]
cg_data = cg_data[features]
others_data = others_data[features]

# Label the datasets
ar_data['Class'] = 'AR'
cg_data['Class'] = 'CG'
others_data['Class'] = 'Others'


# Show the number of Samples in training Dataset
print(f'AR samples are {ar_data.shape[0]}\nCG samples are {cg_data.shape[0]}\nother samples are {others_data.shape[0]}\n###########')



# Combine and shuffle the samples (Dataset of Combined Samples of AR & CG & other APPs (DS_CSACO))
DS_CSACO = pd.concat([ar_data, cg_data, others_data]).sample(frac=1).reset_index(drop=True)

pprint(f'The #No of samples belong to AR is ' + str(DS_CSACO[DS_CSACO['Class']=='AR'].shape[0]))
print(f'The #No of samples belong to CG is ' + str(DS_CSACO[DS_CSACO['Class']=='CG'].shape[0]))
print(f'The #No of samples belong to Other Apps is ' + str(DS_CSACO[DS_CSACO['Class']=='Others'].shape[0]))


# 2- **Train the AR and CG model**

In [None]:
# Train DT model
# Split the data
X = combined_data[features]
y = combined_data['Class']

# Class weight
class_weights = {'AR': 1 / combined_data[combined_data['Class']=='AR'].shape[0],
                 'CG': 1 / combined_data[combined_data['Class']=='CG'].shape[0],
                 'Others': 1/combined_data[combined_data['Class']=='Others'].shape[0]}


# Train AR model (Testsize = 10%, Classweight, and n_estimators = 100)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=30)
rf_model = RandomForestClassifier(n_estimators=100,class_weight=class_weights, random_state=30)
rf_model.fit(X_train, y_train)

# 3- **Evaluate the Model**

In [None]:
# Evluate AR & CG Model
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    conf_matrix = confusion_matrix(y_test, y_pred)
    return accuracy, precision, recall, f1, conf_matrix

# Evaluate AR and CG models
accuracy, precision, recall, f1, conf = evaluate_model(rf_model, X_test, y_test)


# Print evaluation metrics
print(f'AR Model Metrics:accuracy is {accuracy} |\nPrecision is {precision} |\nrecall is {recall} |\nf-score is {f1}', '\nIt is confusion matrix:\n', conf)

#print(y_test['Class'][0])

In [None]:
# Cross Validation Model Test
from sklearn.metrics import make_scorer, precision_score, recall_score, f1_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score
scoring = {'precision': make_scorer(precision_score, average='macro'),
           'recall': make_scorer(recall_score, average='macro'),
           'f1_score': make_scorer(f1_score, average='macro')}


cv_results = cross_validate(rf_model, X, y, cv=10, scoring=scoring)

print("Precision:", cv_results['test_precision'].mean())
print("Recall:", cv_results['test_recall'].mean())
print("F1 Score:", cv_results['test_f1_score'].mean())

print("**************************************************************")
dt_cv_scores = cross_val_score(rf_model, X, y, cv=10)
print("Decision Tree - CV Scores (Accuracy):", dt_cv_scores)
print("Decision Tree - Average CV Score (Accuracy Mean):", dt_cv_scores.mean())

print(conf)
print(features)

# 4- **Improved Model**

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix

# Assuming you have a DataFrame 'df' with your features and labels
X = combined_data.drop('Class', axis=1)  # Replace 'label' with your actual label column name
y = combined_data['Class']

# Define the parameter grid
param_grid = {
    'n_estimators': [100, 200, 300, 400],  # Number of trees in the forest
    'max_features': ['auto', 'sqrt'],  # Number of features to consider at every split
    'max_depth': [10, 20, 30, 40, None],   # Maximum number of levels in tree
    'min_samples_split': [2, 5, 10],   # Minimum number of samples required to split a node
    'min_samples_leaf': [1, 2, 4],     # Minimum number of samples required at each leaf node
    'bootstrap': [True, False]         # Method of selecting samples for training each tree
}

# Initialize GridSearch with cross-validation
grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid,cv=3, n_jobs=-1, verbose=2)

# Fit the grid search to the data
grid_search.fit(X, y)

# Print the best parameters
print("Best parameters found: ", grid_search.best_params_)

# Train the model with the best parameters
rf_model_optimized = grid_search.best_estimator_

# Evaluate the model
predictions = rf_model_optimized.predict(X)
print(classification_report(y, predictions))
print("Confusion Matrix:\n", confusion_matrix(y, predictions))
confusion = confusion_matrix(y, predictions)

# 5- **Test the RF model** *(5-1) load dataset, (5-2) Test model *

### 5-1- **Load the test dataset**

In [None]:
# Load the test dataset (Mentioned in Table (IV) of the Paper)
AR_Test = pd.read_csv(r'AR-Test.csv')
CG_Test = pd.read_csv(r'CG_Test.csv')
Other_Test = pd.read_csv(r'Other_Test.csv')
# ** Notice: Test the Model with direction considering (DL or UL)

# Set the features of teh test dataset and label them
AR_Test = AR_Test[features]
AR_Test['Class'] = 'AR'

CG_Test = CG_Test[features]
CG_Test['Class'] = 'CG'

Other_Test = Other_Test[features]
Other_Test['Class'] = 'Others'
# ** Notice: Pay attention to the feature variable name to match the features of the test dataset

# Combine the Test Dataset
DT_Test =  pd.concat([AR_Test,CG_Test,Other_Test],sort=False).sample(frac=1, random_state=30).reset_index(drop=True)


X_test = DT_Test[features]
y_test = DT_Test['Class']

# print the number of each sample in the Test Dataset
print('No# of AR samples',DT_Test[DT_Test['Class']=='AR'].shape,
      '\nNo# of CG samples-->',DT_Test[DT_Test['Class']=='CG'].shape,
      '\nNo# of Other Apps samples-->',DT_Test[DT_Test['Class']=='Others'].shape)


# 5-2-**Test rf_model**(without Hyperparameter Tunning)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc
from sklearn.preprocessing import label_binarize
import matplotlib.pyplot as plt
import numpy as np

# Make predictions with the model ************ cls_model & cls_model2
predictions = rf_model.predict(X_test)

# Calculate metrics
accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions, average='weighted')
recall = recall_score(y_test, predictions, average='weighted')
f1 = f1_score(y_test, predictions, average='weighted')
conf_matrix = confusion_matrix(y_test, predictions)

# Print the metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-Score:", f1)
print("Confusion Matrix:\n", conf_matrix)

# print the features used for training
print(features)

## 6-2- **Test rf_model_optimized** (with Hyperparameter Tunning)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc
from sklearn.preprocessing import label_binarize
import matplotlib.pyplot as plt
import numpy as np

# Make predictions with the model ************ rf_model & rf_model2
predictions = rf_model_optimized.predict(X_test)

# Calculate metrics
accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions, average='weighted')
recall = recall_score(y_test, predictions, average='weighted')
f1 = f1_score(y_test, predictions, average='weighted')
conf_matrix = confusion_matrix(y_test, predictions)

# Print the metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-Score:", f1)
print("Confusion Matrix:\n", conf_matrix)

# print the features used for training
print(features)