In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_seq_items', None)

In [3]:
# Set the range of dates
dates = pd.date_range(start='2023-03-01', end='2023-03-10', freq='1D')

# Set the range of hours
hours = pd.date_range(start='2023-03-01', end='2023-03-02', freq='1H').time

# Set the range of eNB IDs
enb_ids = np.random.randint(low=100000, high=999999, size=30)

# Create a list to hold the dataframes for each date
df_list = []

for date in dates:
    # Create a list to hold the dataframes for each hour
    hour_df_list = []
    for hour in hours:
        # Create a list to hold the dataframes for each eNB ID
        enb_df_list = []
        for enb_id in enb_ids:
            # Determine the trend of ENDC attempts for each eNB
            attempts = np.abs(np.random.normal(loc=100, scale=20)) + 0.01
            if date < pd.Timestamp('2023-03-06'):
                attempts += np.abs(np.random.normal(loc=50, scale=10)) + 0.01
            elif date >= pd.Timestamp('2023-03-06') and enb_id < enb_ids[15]:
                attempts += np.abs(np.random.normal(loc=50, scale=10)) + 0.01
            else:
                attempts += np.abs(np.random.normal(loc=10, scale=10)) + 0.01
            
            # Randomly determine the number of ENDC attempts for each eNB and hour
            num_attempts = np.random.poisson(lam=attempts)
            
            # Create a dataframe with the ENDC attempts for each eNB and hour
            enb_hour_df = pd.DataFrame({'Date': [date]*num_attempts,
                                        'Hour': [hour.strftime('%H:%M')]*num_attempts,
                                        'eNB_ID': [enb_id]*num_attempts,
                                        'ENDC_Attempt': [1]*num_attempts})
            
            # Add the dataframe to the list for the eNB ID
            enb_df_list.append(enb_hour_df)
        
        # Concatenate the dataframes for the eNB IDs for the hour
        hour_enb_df = pd.concat(enb_df_list)
        
        # Add the dataframe to the list for the hour
        hour_df_list.append(hour_enb_df)
    
    # Concatenate the dataframes for the hours for the date
    date_hour_df = pd.concat(hour_df_list)
    
    # Add the dataframe to the list for the date
    df_list.append(date_hour_df)

# Concatenate the dataframes for each date
df = pd.concat(df_list)

In [4]:
df.shape

(1020119, 4)

In [5]:
df.pivot_table(index='Date', columns='eNB_ID', values='ENDC_Attempt', aggfunc='sum')

eNB_ID,157039,172147,229338,235702,239479,290549,303878,398499,415240,424205,447970,448885,517791,559040,563267,564497,612870,624645,630679,632643,640855,681915,686513,747750,756518,801092,805041,806166,939011,943824
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1
2023-03-01,3736,3745,3669,3826,3883,3817,3750,3852,3792,3659,3880,3763,3619,3852,3545,3753,3765,3634,3723,3720,3684,3901,3833,3661,3957,3888,4064,3741,3859,3682
2023-03-02,3681,3708,3717,3706,3729,3484,3665,4187,3821,3747,3498,3703,3758,3719,3656,3786,3949,3610,3873,3862,3971,3831,3732,3913,3964,3649,3631,3548,3795,3807
2023-03-03,3932,3853,3855,3687,3920,3697,3677,4072,3647,3728,3719,3648,3942,3857,3629,3673,3544,3673,3823,3840,3720,3650,3712,3804,3850,3710,3557,3838,3826,3745
2023-03-04,3872,3555,3996,3690,3864,4004,3901,3873,3917,3695,3936,3671,3684,3784,3728,3840,3806,3640,3601,3747,3917,3892,3922,3722,3647,3779,3643,3806,3784,3500
2023-03-05,3875,3660,3866,3647,3645,3713,3840,3676,3733,3565,3715,3679,3731,4006,3497,3938,3848,3813,3781,3555,3961,4000,3873,3685,3872,3804,3884,3823,3676,3720
2023-03-06,3731,3661,3678,3599,3799,3935,3583,2921,2803,2789,2881,2684,2805,2854,2939,2638,2951,2743,2654,2855,2699,2794,2909,2906,2662,2711,3082,2857,2757,2741
2023-03-07,3746,3675,3728,3786,3797,3816,3814,2792,2716,2785,2863,2947,2928,2777,2595,2708,2772,2985,2904,2873,2968,2893,2849,2688,2855,2930,2855,2689,2633,2769
2023-03-08,3829,3788,3712,3429,3855,3798,3806,2886,2839,2832,2902,2736,2918,2875,2725,2661,2684,2835,2572,2960,3154,2705,2818,2830,2918,2785,2816,2779,2932,2824
2023-03-09,3515,4000,3872,3800,3701,3702,4180,2712,2812,2897,2883,2745,2904,3024,2593,2762,2706,2687,2991,2742,2752,2718,2903,2746,2664,2892,2795,3115,2856,2963
2023-03-10,3858,3521,3452,3694,3705,3972,3747,2742,2635,2767,2789,2605,2713,2978,2877,2763,3006,2806,2759,2965,2566,2882,2992,2821,2769,2714,2810,3013,2661,2667


In [6]:
# Reshape the DataFrame using pivot_table()
pivot_table = pd.pivot_table(df, values='ENDC_Attempt', index='eNB_ID', columns='Date', aggfunc='sum')

# Calculate the average value before and after 2023-03-15
before_mean = pivot_table.loc[:, :'2023-03-06'].mean(axis=1)
after_mean = pivot_table.loc[:, '2023-03-06':].mean(axis=1)

In [7]:
pivot_table

Date,2023-03-01 00:00:00,2023-03-02 00:00:00,2023-03-03 00:00:00,2023-03-04 00:00:00,2023-03-05 00:00:00,2023-03-06 00:00:00,2023-03-07 00:00:00,2023-03-08 00:00:00,2023-03-09 00:00:00,2023-03-10 00:00:00
eNB_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
157039,3736,3681,3932,3872,3875,3731,3746,3829,3515,3858
172147,3745,3708,3853,3555,3660,3661,3675,3788,4000,3521
229338,3669,3717,3855,3996,3866,3678,3728,3712,3872,3452
235702,3826,3706,3687,3690,3647,3599,3786,3429,3800,3694
239479,3883,3729,3920,3864,3645,3799,3797,3855,3701,3705
...,...,...,...,...,...,...,...,...,...,...
801092,3888,3649,3710,3779,3804,2711,2930,2785,2892,2714
805041,4064,3631,3557,3643,3884,3082,2855,2816,2795,2810
806166,3741,3548,3838,3806,3823,2857,2689,2779,3115,3013
939011,3859,3795,3826,3784,3676,2757,2633,2932,2856,2661


In [8]:
before_mean

eNB_ID
157039    3804.500000
172147    3697.000000
229338    3796.833333
235702    3692.500000
239479    3806.666667
290549    3775.000000
303878    3736.000000
398499    3763.500000
415240    3618.833333
424205    3530.500000
447970    3604.833333
448885    3524.666667
517791    3589.833333
559040    3678.666667
563267    3499.000000
564497    3604.666667
612870    3643.833333
624645    3518.833333
630679    3575.833333
632643    3596.500000
640855    3658.666667
681915    3678.000000
686513    3663.500000
747750    3615.166667
756518    3658.666667
801092    3590.166667
805041    3643.500000
806166    3602.166667
939011    3616.166667
943824    3532.500000
dtype: float64

In [9]:
after_mean

eNB_ID
157039    3735.8
172147    3729.0
229338    3688.4
235702    3661.6
239479    3771.4
290549    3844.6
303878    3826.0
398499    2810.6
415240    2761.0
424205    2814.0
447970    2863.6
448885    2743.4
517791    2853.6
559040    2901.6
563267    2745.8
564497    2706.4
612870    2823.8
624645    2811.2
630679    2776.0
632643    2879.0
640855    2827.8
681915    2798.4
686513    2894.2
747750    2798.2
756518    2773.6
801092    2806.4
805041    2871.6
806166    2890.6
939011    2767.8
943824    2792.8
dtype: float64

In [10]:
# Calculate the percent change
percent_change = (after_mean - before_mean) / before_mean * 100

In [11]:
percent_change

eNB_ID
157039    -1.805756
172147     0.865567
229338    -2.855889
235702    -0.836831
239479    -0.926445
290549     1.843709
303878     2.408994
398499   -25.319516
415240   -23.704693
424205   -20.294576
447970   -20.562208
448885   -22.165689
517791   -20.508844
559040   -21.123596
563267   -21.526150
564497   -24.919549
612870   -22.504688
624645   -20.109885
630679   -22.367746
632643   -19.949951
640855   -22.709548
681915   -23.915171
686513   -20.999045
747750   -22.598313
756518   -24.190962
801092   -21.830927
805041   -21.185673
806166   -19.753852
939011   -23.460386
943824   -20.939844
dtype: float64

In [12]:
# Filter the eNBs that have decreased in average value by more than 20%
filtered_enbs = percent_change[percent_change < -20].index.tolist()
filtered_enbs

[398499,
 415240,
 424205,
 447970,
 448885,
 517791,
 559040,
 563267,
 564497,
 612870,
 624645,
 630679,
 640855,
 681915,
 686513,
 747750,
 756518,
 801092,
 805041,
 939011,
 943824]

In [13]:
filtered_df = df.loc[df['eNB_ID'].isin(filtered_enbs)]
filtered_df.head(20)

Unnamed: 0,Date,Hour,eNB_ID,ENDC_Attempt
0,2023-03-01,00:00,756518,1
1,2023-03-01,00:00,756518,1
2,2023-03-01,00:00,756518,1
3,2023-03-01,00:00,756518,1
4,2023-03-01,00:00,756518,1
...,...,...,...,...
15,2023-03-01,00:00,756518,1
16,2023-03-01,00:00,756518,1
17,2023-03-01,00:00,756518,1
18,2023-03-01,00:00,756518,1
