In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import time
import datetime
from sklearn.metrics import mean_absolute_error, mean_squared_error
# Parameter windowing
input_width = 60*4
label_width = 60
shift = label_width
total_window_size = input_width + shift
OUT_STEPS = label_width
# Definisikan irisan untuk input dan label
input_slice = slice(0, input_width)
label_start = total_window_size - label_width
labels_slice = slice(label_start, None)
train_df = None
output_selected=['RRU.PrbUsedDl']
train_name_cells=['S1/B2/C1']
test_name_cells=['S7/B2/C1']

In [2]:
def  import_data(file_path="D:\\KULIAH\\teep\\AI\\dataset\\08_01_2024\\CellReports.csv"):
    
    df_begin=pd.read_csv(file_path)
    df_1=df_begin.copy()
    convert_time=pd.to_datetime(df_1['timestamp'], unit='ms',origin='unix')
    df_1.insert(df_1.columns.get_loc('timestamp') + 1, 'datetime_column', convert_time)
    df_1.insert(df_1.columns.get_loc('datetime_column') + 2, 'hour', df_1['datetime_column'].dt.hour+df_1['datetime_column'].dt.minute/60)
    df_1.set_index('datetime_column', inplace=True)
    df_1.drop(columns=['timestamp'], inplace=True)
    
    # Find columns where correlation with 'RRU.PrbUsedDl'is greater than 0.5
    columns_with_high_corr = ['RRU.PrbUsedDl','RRC.ConnMean', 'DRB.UEThpDl']
    seleted_columns = ['Viavi.Cell.Name']+ columns_with_high_corr 
    df_2= df_1[seleted_columns].copy()
    cell_name= test_name_cells+train_name_cells
    df= df_2[df_2['Viavi.Cell.Name'].isin(cell_name)].copy()
    
    return df, cell_name

In [3]:
def standardize_data(data, train_df, isoutput=True, column_output=output_selected):
    if isoutput:
        median = train_df[column_output].median().values
        q1 = train_df[column_output].quantile(0.25).values
        q3 = train_df[column_output].quantile(0.75).values
    else:
        median = train_df.median().values
        q1 = train_df.quantile(0.25).values
        q3 = train_df.quantile(0.75).values

    iqr = q3 - q1

    # Reshape for broadcasting with 2D matrix
    median = median.reshape(1, -1)
    iqr = iqr.reshape(1, -1)

    return (data - median) / iqr

def inverse_standardize_data(data, train_df, isoutput=True, column_output=output_selected):
    if isoutput:
        median = train_df[column_output].median().values
        q1 = train_df[column_output].quantile(0.25).values
        q3 = train_df[column_output].quantile(0.75).values
    else:
        median = train_df.median().values
        q1 = train_df.quantile(0.25).values
        q3 = train_df.quantile(0.75).values

    iqr = q3 - q1

    # Reshape for broadcasting with 2D matrix
    median = median.reshape(1, -1)
    iqr = iqr.reshape(1, -1)

    return data * iqr + median

In [4]:
def make_windows(data_x,data_y, total_window_size, input_slice, labels_slice):
    x = []
    y = []
    for i in range(len(data_x) - total_window_size + 1):
        window_x = data_x[i:i+total_window_size]
        x.append(window_x[input_slice])

    for i in range(len(data_y) - total_window_size + 1):
        window_y= data_y[i:i+total_window_size]
        y.append(window_y[labels_slice])

    return np.array(x), np.array(y)


In [5]:
def compute_metrics(actual_df, predictions_df):
    mae = mean_absolute_error(actual_df, predictions_df)
    mse = mean_squared_error(actual_df, predictions_df)
    rmse = np.sqrt(mse)
    mape = np.mean(np.abs((actual_df - predictions_df) / actual_df)) * 100
    return mae, mape, mse, rmse

def process_predictions(model, x_scaled, actual_df, input_width, label_width, columns):
    predictions = model.predict(x_scaled[slice(None, None, label_width), :, :])
    predictions_reshaped = predictions.reshape(-1, len(columns))
    predictions_unscaled = inverse_standardize_data(predictions_reshaped, isoutput=True, train_df=train_df)
    predictions_df = pd.DataFrame(predictions_unscaled, columns=columns, index=actual_df[input_width:].index)
    return predictions_df
def compute_error( x_scaled, actual_df, columns, model):
    
    # Process predictions for training and validation sets
    predictions_df = process_predictions(model, x_scaled, actual_df, input_width, label_width, columns)

    display(pd.concat((predictions_df.rename(columns={columns[0]: '%s_predict'%columns[0]}), actual_df[input_width:]), axis=1))
    mae, mape, mse, rmse = compute_metrics(actual_df[input_width:], predictions_df)
    
    print(f"MAE: {mae}, MAPE %: {mape}, MSE: {mse}, RMSE: {rmse}")

In [6]:

def plot_predictions(val_df, x_val_scaled, model, val_scaled, input_width, label_width, num_features):
    num_output = 2


    # Predictions for validation data
    val_predictions = model.predict(x_val_scaled[slice(None,None,label_width), :, :])
    predictions_reshaped = val_predictions.reshape(-1, num_output)
    predictions_unscaled = inverse_standardize_data(predictions_reshaped, isoutput=True, train_df=train_df)
    val_predictions_df = pd.DataFrame(predictions_unscaled, columns=output_selected, index=val_df.index)

    # Generate new predictions
    val_new = val_scaled.tail(input_width).to_numpy()
    val_new = val_new.reshape(1, -1, num_features)
    generate_predictions = model.predict(val_new)
    generate_predictions = inverse_standardize_data(generate_predictions.reshape(-1, num_output), isoutput=True, train_df=train_df)
    generate_predictions_df = pd.DataFrame(generate_predictions, columns=output_selected, index=val_df.tail(label_width).index + pd.DateOffset(hours=1))

    # Combine predictions
    all_predictions = pd.concat([val_predictions_df, generate_predictions_df], axis=0)
    inputs_graph = val_df[output_selected].iloc[-label_width * 3:-label_width]
    labels_graph = val_df[output_selected].iloc[-label_width:]
    predictions_graph = all_predictions.loc[labels_graph.index]
    new_predictions_graph = all_predictions.loc[generate_predictions_df.index]

    # Plotting
    plt.figure(figsize=(16, 8))
    for n, feature in enumerate(all_predictions.columns):
        plt.subplot(len(all_predictions.columns), 1, n + 1)
        plt.plot(inputs_graph.index, inputs_graph[feature], label='Inputs for orange', marker='.', zorder=-100, markersize=5)
        plt.plot(labels_graph.index, labels_graph[feature], label='actual output for orange and inputs for red', marker='.', zorder=-100, c='#2ca02c')
        plt.scatter(predictions_graph.index, predictions_graph[feature], marker='X', edgecolors='k', label='Prediction', c='#ff7f0e', s=int(32 * 1.5))
        plt.scatter(new_predictions_graph.index, new_predictions_graph[feature], marker='+', label='New prediction', c='#FF012D', s=int(32 * 1.5))
        plt.ylabel(feature)
        plt.legend()

    plt.subplots_adjust(hspace=0.2, top=1)
    plt.show()


In [7]:
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import time

def tensorflow_cnn(X_train_scaled, Y_train_scaled, X_validation_scaled, Y_validation_scaled, 
                    learning_rate, target_error, max_epochs, max_sampel_batch,
                    patience, save_best_model_path, validation_data=False, load_model=None, out_steps=OUT_STEPS):
    global model

    class MAEStopCallback(tf.keras.callbacks.Callback):
        def __init__(self, threshold):
            super(MAEStopCallback, self).__init__()
            self.threshold = threshold

        def on_epoch_end(self, epoch, logs=None):
            if logs['mae'] < self.threshold:
                print(f"\nMAE reached below {self.threshold}. Stopping training.")
                self.model.stop_training = True

                
    input_width = X_train_scaled.shape[1]
    CONV_WIDTH = input_width # Define the width of the convolutional window
    num_features = X_train_scaled.shape[2]
    num_output = Y_train_scaled.shape[2]
    out_steps = out_steps

    model = tf.keras.models.Sequential()
    if load_model is None:
        model.add(tf.keras.layers.Lambda(lambda x: x[:, -CONV_WIDTH:, :], input_shape=(input_width, num_features)))
        model.add(tf.keras.layers.Conv1D(120, activation='relu', kernel_size=int(CONV_WIDTH/8), padding='same'))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Conv1D(120, activation='relu', kernel_size=int(CONV_WIDTH/4),padding='same') )
        model.add(tf.keras.layers.Dropout(0.2))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Conv1D(120, activation='relu', kernel_size=int(CONV_WIDTH),padding='valid') )
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Dense(out_steps * num_output, kernel_initializer=tf.initializers.zeros()))
        model.add(tf.keras.layers.Reshape([out_steps, num_output]))

    else:
        print("Load model")
        model = tf.keras.models.load_model(load_model)
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mse', metrics=['mse', 'mae', 'mape', tf.keras.metrics.RootMeanSquaredError(name='rmse')])
    mae_stop_callback = MAEStopCallback(threshold=target_error)
    checkpoint_callback = ModelCheckpoint(
        save_best_model_path,
        monitor='val_loss',     
        mode='min',         
        save_best_only=True, 
        verbose=1            
    )

    early_stopping_callback = EarlyStopping(
        monitor='loss',     
        mode='min',         
        patience=patience,    
        restore_best_weights=True,
        verbose=1            
    )

    reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.1, patience=int(1), min_lr=0.00001, verbose=1)

    time_start = time.time()
    if validation_data:
        model.fit(X_train_scaled, Y_train_scaled, epochs=max_epochs, batch_size=max_sampel_batch,  
                  callbacks=[mae_stop_callback, checkpoint_callback, early_stopping_callback, reduce_lr], 
                  validation_data=(X_validation_scaled, Y_validation_scaled), validation_batch_size=max_sampel_batch)
    else:
        model.fit(X_train_scaled, Y_train_scaled, epochs=max_epochs, batch_size=max_sampel_batch, 
                  callbacks=[mae_stop_callback, checkpoint_callback, early_stopping_callback, reduce_lr])
    
    print("time computation seconds: ", time.time() - time_start)
    
    loss, MSE, MAE, RMSE, MAPE = model.evaluate(X_train_scaled, Y_train_scaled)
    print("loss: ", loss, "MSE: ", MSE, "MAE: ", MAE, "RMSE: ", RMSE, "MAPE: ", MAPE)
    
    return model, loss, MSE, MAE, RMSE, MAPE


In [8]:
def running_program(train_df,val_df, index_cell, name_file, name_file_before):
    
    train_scaled = standardize_data(train_df, isoutput=False, train_df=train_df)
    val_scaled= standardize_data(val_df, isoutput=False, train_df=train_df)
    # Membuat windowed dataset untuk set pelatihan, validasi, dan pengujian

    x_train_scaled, y_train_scaled = make_windows(train_scaled.to_numpy(), train_scaled[output_selected].to_numpy(),total_window_size, input_slice, labels_slice)
    x_val_scaled, y_val_scaled = make_windows(val_scaled.to_numpy(), val_scaled[output_selected].to_numpy(),total_window_size, input_slice, labels_slice)
    print(x_train_scaled.shape, y_train_scaled.shape)
    if index_cell==0:
        model, loss, MSE, MAE, RMSE,  MAPE  = tensorflow_cnn(x_train_scaled, y_train_scaled, x_val_scaled, y_val_scaled,
                                                        learning_rate=0.005, target_error=0.001,  max_epochs=20, max_sampel_batch=int(input_width/2), 
                                                        patience=6,  save_best_model_path = name_file, 
                                                        validation_data=True, load_model="testaja.hdf5", out_steps=OUT_STEPS)
        model.summary()
        model.save("testaja_part2.hdf5")
    else:
        model, loss, MSE, MAE, RMSE,  MAPE  = tensorflow_cnn(x_train_scaled, y_train_scaled, x_val_scaled, y_val_scaled,
                                                        learning_rate=0.0001, target_error=0.001,  max_epochs=10, max_sampel_batch=int(input_width/2), 
                                                        patience=6,  save_best_model_path = name_file, 
                                                        validation_data=True, load_model=name_file_before, out_steps=OUT_STEPS)

    model = tf.keras.models.load_model('%s'%name_file)
    columns = output_selected
    output_actual_train = train_df[columns]
    output_actual_val = val_df[columns]
    print("Training Metrics:")
    compute_error(x_train_scaled,  output_actual_train,columns, model)
    print("\nValidation Metrics:")
    compute_error(x_val_scaled,  output_actual_val,columns, model)
    #print("Training Plot:")
    #plot_predictions( output_actual_train[input_width:], x_train_scaled, model, train_scaled, input_width, label_width, num_features)
    #print("\nValidation (test) Plot:")
    #plot_predictions( output_actual_val[input_width:], x_val_scaled, model, val_scaled, input_width, label_width, num_features)
    


In [9]:
df_start, cell_name= import_data("D:\\KULIAH\\teep\\AI\\dataset\\08_01_2024\\CellReports.csv")
#now = datetime.datetime.now()
timestamp = "cnn0802_testajapart2"
for index in range(0,1):
    print(index)

    train_df = df_start[df_start['Viavi.Cell.Name'] == train_name_cells[index]]
    train_df = train_df.loc[~train_df.index.duplicated()]
    train_df=train_df.drop(columns=['Viavi.Cell.Name']).astype(float).copy()

    val_df= df_start[df_start['Viavi.Cell.Name'] == test_name_cells[0]]
    val_df=val_df.loc[~val_df.index.duplicated()]
    val_df=val_df.drop(columns=['Viavi.Cell.Name']).astype(float).copy()

    name_file='4hour_%s_%s.hdf5'%(timestamp, index+1)
    name_file_before='4hour_%s_%s.hdf5'%(timestamp, index)
    print("name_file: ", name_file)
    print("name_file_before: ", name_file_before)
    print("Cell Name: ", cell_name[index])
    #display(train_df)
    running_program(train_df=train_df, val_df=val_df, index_cell=index, name_file=name_file, name_file_before=name_file_before)

0
name_file:  4hour_cnn0802_testajapart2_1.hdf5
name_file_before:  4hour_cnn0802_testajapart2_0.hdf5
Cell Name:  S7/B2/C1
(11221, 240, 3) (11221, 60, 1)
Load model
Epoch 1/20
Epoch 1: val_loss improved from inf to 0.59397, saving model to 4hour_cnn0802_testajapart2_1.hdf5
Epoch 2/20
Epoch 2: val_loss improved from 0.59397 to 0.59284, saving model to 4hour_cnn0802_testajapart2_1.hdf5
Epoch 3/20
Epoch 3: val_loss improved from 0.59284 to 0.56372, saving model to 4hour_cnn0802_testajapart2_1.hdf5
Epoch 4/20
Epoch 4: val_loss did not improve from 0.56372
Epoch 5/20
Epoch 5: val_loss improved from 0.56372 to 0.55660, saving model to 4hour_cnn0802_testajapart2_1.hdf5
Epoch 6/20
Epoch 6: val_loss improved from 0.55660 to 0.52126, saving model to 4hour_cnn0802_testajapart2_1.hdf5
Epoch 7/20
Epoch 7: val_loss did not improve from 0.52126
Epoch 8/20
Epoch 8: val_loss did not improve from 0.52126
Epoch 9/20
Epoch 9: val_loss did not improve from 0.52126
Epoch 10/20
Epoch 10: val_loss did not impr

Unnamed: 0_level_0,RRU.PrbUsedDl_predict,RRU.PrbUsedDl
datetime_column,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-08-01 04:00:00,14.537268,14.966667
2024-08-01 04:01:00,42.449425,53.983333
2024-08-01 04:02:00,14.217965,17.550000
2024-08-01 04:03:00,22.751361,17.533333
2024-08-01 04:04:00,40.528892,47.766667
...,...,...
2024-08-08 23:55:00,24.632424,25.116667
2024-08-08 23:56:00,30.655791,24.666667
2024-08-08 23:57:00,52.482366,59.133333
2024-08-08 23:58:00,13.610628,6.583333


MAE: 2.4363772769447745, MAPE %: inf, MSE: 9.382794276866226, RMSE: 3.063134714123136

Validation Metrics:


Unnamed: 0_level_0,RRU.PrbUsedDl_predict,RRU.PrbUsedDl
datetime_column,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-08-01 04:00:00,34.344923,1.766667
2024-08-01 04:01:00,23.095740,8.266667
2024-08-01 04:02:00,40.274228,6.433333
2024-08-01 04:03:00,32.888792,14.766667
2024-08-01 04:04:00,34.089730,4.016667
...,...,...
2024-08-08 23:55:00,19.902885,59.733333
2024-08-08 23:56:00,18.725217,37.333333
2024-08-08 23:57:00,33.136024,12.033333
2024-08-08 23:58:00,35.823708,3.000000


MAE: 18.588708065649747, MAPE %: inf, MSE: 500.73597957483867, RMSE: 22.377130727035553
