In [1]:
import pandas as pd

### old ground truth

In [2]:
# the ground truth values 
old_real_values = {
    'date': ['1/2024', '2/2024', '3/2024', '4/2024', '5/2024', '6/2024', '7/2024', '8/2024', '9/2024', '10/2024', '11/2024', '12/2024'],
    'real_consumo': [4885.10, 4880.14, 5391.35, 5925.34, 5295.28, 4996.13, 5256.78, 5069.64, 4694.26, 5215.3, 5273.88, 5501.23]
}

old_real_values = pd.DataFrame(old_real_values)
old_real_values = old_real_values.set_index('date')
display(old_real_values)


Unnamed: 0_level_0,real_consumo
date,Unnamed: 1_level_1
1/2024,4885.1
2/2024,4880.14
3/2024,5391.35
4/2024,5925.34
5/2024,5295.28
6/2024,4996.13
7/2024,5256.78
8/2024,5069.64
9/2024,4694.26
10/2024,5215.3


### ground truth

In [19]:
# the ground truth values 
path = 'results/ground_truth_data/ground_truth.csv'
real_values = pd.read_csv(path)
real_values['datetime'] = pd.to_datetime(real_values['datetime'])

# we will transform all the predicted values from MWh/day in MWh, so multiply by 24
real_values['consumo_mwh'] = real_values['consumo_mwh'].astype(float)
real_values.rename(columns={'consumo_mwh': 'real_consumo'}, inplace=True)

# Now, we want to group by month and sum the predicted energy
real_values['month'] = pd.to_datetime(real_values['datetime']).dt.month
real_values['year'] = pd.to_datetime(real_values['datetime']).dt.year
real_values = real_values.drop(columns=['datetime'])
real_values = real_values.groupby(['month', 'year']).sum()

# Only for beauty we'll join the month and year columns
real_values['date'] = real_values.index
real_values['date'] = real_values['date'].apply(lambda x: f'{x[0]}/{x[1]}')
real_values = real_values.reset_index()
real_values = real_values.drop(columns=['month', 'year'])
real_values = real_values.set_index('date')

# we'll maintain the months after september from the old_real_values
after_september_old_real_values = old_real_values.loc['10/2024':].copy()
real_values = pd.concat([real_values, after_september_old_real_values])

# We don't have all real values to september 2024
real_values.iloc[8] = old_real_values.iloc[8].values[0]

display(real_values)


Unnamed: 0_level_0,real_consumo
date,Unnamed: 1_level_1
1/2024,4496.595
2/2024,4563.984
3/2024,5004.829
4/2024,5397.761
5/2024,4913.619
6/2024,4532.12
7/2024,4816.37
8/2024,4111.706
9/2024,4694.26
10/2024,5215.3


### prediction/real compare function

In [20]:
def compare_results(model_name):
    prediction_data_path = f'results/output_data/{model_name}.csv'
    prediction_data = pd.read_csv(prediction_data_path)

    # we will transform all the predicted values in MWh, so multiply by 24
    prediction_data['predicted_consumo'] = prediction_data['predicted_consumo'] * 24

    # Now, we want to group by month and sum the predicted energy
    prediction_data['month'] = pd.to_datetime(prediction_data['datetime']).dt.month
    prediction_data['year'] = pd.to_datetime(prediction_data['datetime']).dt.year
    prediction_data = prediction_data.drop(columns=['datetime'])
    prediction_data = prediction_data.groupby(['month', 'year']).sum()

    # Only for beauty we'll join the month and year columns
    prediction_data['date'] = prediction_data.index
    prediction_data['date'] = prediction_data['date'].apply(lambda x: f'{x[0]}/{x[1]}')
    prediction_data = prediction_data.reset_index()
    prediction_data = prediction_data.drop(columns=['month', 'year'])
    prediction_data = prediction_data.set_index('date')

    # Now we will make a dataframe that contains the difference between the real values and the predicted values
    diff = real_values.copy() 
    diff['predicted_consumo'] = prediction_data['predicted_consumo']
    diff['Erro'] = abs(diff['real_consumo'] - diff['predicted_consumo'])
    diff['Erro %'] = diff['Erro'] / diff['real_consumo'] * 100
    display(diff)
    print(f"E[erro] = {diff['Erro %'].mean()} %")
    print(f"sigma[erro] = {diff['Erro %'].std()} %")

    

# Xgboost

### Xgboost with energy prediction

In [21]:
compare_results('xgboost_with_energy')

Unnamed: 0_level_0,real_consumo,predicted_consumo,Erro,Erro %
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/2024,4496.595,4259.304041,237.290959,5.277125
2/2024,4563.984,4534.367935,29.616065,0.648908
3/2024,5004.829,4601.712974,403.116026,8.054541
4/2024,5397.761,5617.293936,-219.532936,-4.067111
5/2024,4913.619,5341.73628,-428.11728,-8.712871
6/2024,4532.12,4121.97547,410.14453,9.049728
7/2024,4816.37,4409.104291,407.265709,8.455864
8/2024,4111.706,4827.393362,-715.687362,-17.406093
9/2024,4694.26,4455.745277,238.514723,5.080987
10/2024,5215.3,5173.545566,41.754434,0.800614


E[erro] = 1.3235606915513918 %
sigma[erro] = 8.550575570039062 %


### Xgboost without energy

In [22]:
compare_results('xgboost_without_energy')

Unnamed: 0_level_0,real_consumo,predicted_consumo,Erro,Erro %
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/2024,4496.595,4326.937819,169.657181,3.773014
2/2024,4563.984,4469.63933,94.34467,2.067156
3/2024,5004.829,4672.17312,332.65588,6.646698
4/2024,5397.761,5580.077047,-182.316047,-3.377624
5/2024,4913.619,5086.851252,-173.232252,-3.525553
6/2024,4532.12,4392.707911,139.412089,3.07609
7/2024,4816.37,4610.598091,205.771909,4.272344
8/2024,4111.706,4526.254973,-414.548973,-10.082165
9/2024,4694.26,4706.12503,-11.86503,-0.252756
10/2024,5215.3,4721.325418,493.974582,9.471643


E[erro] = 1.374204240649516 %
sigma[erro] = 6.023002076407533 %


### Xgboost shuffle True

In [23]:
compare_results('xgboost_shuffle_v2')

Unnamed: 0_level_0,real_consumo,predicted_consumo,Erro,Erro %
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/2024,4496.595,4610.914322,-114.319322,-2.542353
2/2024,4563.984,4950.966967,-386.982967,-8.479061
3/2024,5004.829,4996.607172,8.221828,0.164278
4/2024,5397.761,5734.421263,-336.660263,-6.237035
5/2024,4913.619,5380.678195,-467.059195,-9.505401
6/2024,4532.12,4746.026378,-213.906378,-4.719786
7/2024,4816.37,4922.102263,-105.732263,-2.195269
8/2024,4111.706,4899.083959,-787.377959,-19.149666
9/2024,4694.26,5067.481243,-373.221243,-7.950587
10/2024,5215.3,5103.584549,111.715451,2.142071


E[erro] = -4.853645437023328 %
sigma[erro] = 6.475457521643467 %


#### Xgboost v3

In [24]:
compare_results('xgboost_shuffle_v3')

Unnamed: 0_level_0,real_consumo,predicted_consumo,Erro,Erro %
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/2024,4496.595,4541.011018,-44.416018,-0.98777
2/2024,4563.984,4945.327169,-381.343169,-8.355489
3/2024,5004.829,5039.51502,-34.68602,-0.693051
4/2024,5397.761,5904.605702,-506.844702,-9.389906
5/2024,4913.619,5500.112338,-586.493338,-11.936077
6/2024,4532.12,4634.100024,-101.980024,-2.250162
7/2024,4816.37,4682.993868,133.376132,2.769225
8/2024,4111.706,4822.81933,-711.11333,-17.294849
9/2024,4694.26,5072.17673,-377.91673,-8.050614
10/2024,5215.3,5299.007501,-83.707501,-1.605037


E[erro] = -4.879872309042048 %
sigma[erro] = 6.429658713236946 %


#### Xgboost v4

In [25]:
compare_results('xgboost_shuffle_v4')

Unnamed: 0_level_0,real_consumo,predicted_consumo,Erro,Erro %
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/2024,4496.595,4591.130542,-94.535542,-2.102381
2/2024,4563.984,4990.73592,-426.75192,-9.350425
3/2024,5004.829,4936.707667,68.121333,1.361112
4/2024,5397.761,5739.04681,-341.28581,-6.322729
5/2024,4913.619,5235.855847,-322.236847,-6.558035
6/2024,4532.12,4609.923415,-77.803415,-1.716711
7/2024,4816.37,4730.686738,85.683262,1.779001
8/2024,4111.706,4771.092768,-659.386768,-16.036817
9/2024,4694.26,5144.833673,-450.573673,-9.598396
10/2024,5215.3,5194.555488,20.744512,0.397763


E[erro] = -4.012004627807288 %
sigma[erro] = 6.075949145265437 %


# Random Forest

## RF raposo com shuffle

In [26]:
compare_results('Random_Forest_shuffle_True')

Unnamed: 0_level_0,real_consumo,predicted_consumo,Erro,Erro %
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/2024,4496.595,4804.166352,-307.571352,-6.840095
2/2024,4563.984,4924.50492,-360.52092,-7.899259
3/2024,5004.829,5155.814535,-150.985535,-3.016797
4/2024,5397.761,5831.911336,-434.150336,-8.043156
5/2024,4913.619,5289.657295,-376.038295,-7.65298
6/2024,4532.12,4674.052633,-141.932633,-3.131705
7/2024,4816.37,4802.585968,13.784032,0.286191
8/2024,4111.706,4842.705201,-730.999201,-17.778489
9/2024,4694.26,5065.09475,-370.83475,-7.899749
10/2024,5215.3,4923.358572,291.941428,5.597788


E[erro] = -4.664312484379539 %
sigma[erro] = 6.6599989381655496 %


## RF raposo sem shuffle

In [27]:
compare_results('Random_Forest_shuffle_False')

Unnamed: 0_level_0,real_consumo,predicted_consumo,Erro,Erro %
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/2024,4496.595,4426.395957,70.199043,1.56116
2/2024,4563.984,4487.261163,76.722837,1.68105
3/2024,5004.829,4740.308226,264.520774,5.285311
4/2024,5397.761,5654.582458,-256.821458,-4.757926
5/2024,4913.619,5056.636438,-143.017438,-2.910633
6/2024,4532.12,4306.554237,225.565763,4.977047
7/2024,4816.37,4382.697699,433.672301,9.004132
8/2024,4111.706,4449.504156,-337.798156,-8.215523
9/2024,4694.26,4370.097183,324.162817,6.905515
10/2024,5215.3,4682.551154,532.748846,10.215114


E[erro] = 2.7465204667099066 %
sigma[erro] = 6.563571704411899 %


## RF melhor que encontrei (sem shuffle)

In [28]:
compare_results('best_Random_Forest_shuffle_False')

Unnamed: 0_level_0,real_consumo,predicted_consumo,Erro,Erro %
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/2024,4496.595,4450.46921,46.12579,1.025794
2/2024,4563.984,4491.196632,72.787368,1.594821
3/2024,5004.829,4766.603645,238.225355,4.75991
4/2024,5397.761,5656.363185,-258.602185,-4.790916
5/2024,4913.619,5071.90647,-158.28747,-3.221403
6/2024,4532.12,4348.540024,183.579976,4.050642
7/2024,4816.37,4439.33131,377.03869,7.828275
8/2024,4111.706,4493.236735,-381.530735,-9.279135
9/2024,4694.26,4381.525229,312.734771,6.662068
10/2024,5215.3,4697.805646,517.494354,9.922619


E[erro] = 2.313580335304769 %
sigma[erro] = 6.579044608344049 %


## RF melhor que encontrei (com shuffle)

In [29]:
compare_results('Random_Forest')

Unnamed: 0_level_0,real_consumo,predicted_consumo,Erro,Erro %
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/2024,4496.595,4790.006377,-293.411377,-6.52519
2/2024,4563.984,4848.406108,-284.422108,-6.231882
3/2024,5004.829,5194.50476,-189.67576,-3.789855
4/2024,5397.761,5194.678615,203.082385,3.762345
5/2024,4913.619,4860.196321,53.422679,1.087237
6/2024,4532.12,4512.090747,20.029253,0.44194
7/2024,4816.37,4557.982403,258.387597,5.364779
8/2024,4111.706,4889.463143,-777.757143,-18.91568
9/2024,4694.26,4753.997709,-59.737709,-1.272569
10/2024,5215.3,4753.906591,461.393409,8.84692


E[erro] = -0.9615345480596491 %
sigma[erro] = 7.58949108197663 %


## KNN

In [30]:
compare_results('KNN')

Unnamed: 0_level_0,real_consumo,predicted_consumo,Erro,Erro %
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/2024,4496.595,4851.441222,-354.846222,-7.891443
2/2024,4563.984,4873.615276,-309.631276,-6.784232
3/2024,5004.829,5409.722529,-404.893529,-8.090057
4/2024,5397.761,5638.176978,-240.415978,-4.453995
5/2024,4913.619,5139.442555,-225.823555,-4.59587
6/2024,4532.12,4762.876988,-230.756988,-5.09159
7/2024,4816.37,4665.104962,151.265038,3.140644
8/2024,4111.706,4994.47906,-882.77306,-21.469751
9/2024,4694.26,4976.885975,-282.625975,-6.020672
10/2024,5215.3,4861.401966,353.898034,6.785766


E[erro] = -4.524254677834162 %
sigma[erro] = 7.832262171652952 %


In [31]:
compare_results('KNN_2')

Unnamed: 0_level_0,real_consumo,predicted_consumo,Erro,Erro %
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/2024,4496.595,4840.369677,-343.774677,-7.645222
2/2024,4563.984,4903.212221,-339.228221,-7.432722
3/2024,5004.829,5393.526953,-388.697953,-7.766458
4/2024,5397.761,5644.277971,-246.516971,-4.567023
5/2024,4913.619,5166.24214,-252.62314,-5.141285
6/2024,4532.12,4690.893566,-158.773566,-3.503296
7/2024,4816.37,4533.078838,283.291162,5.88184
8/2024,4111.706,4832.237243,-720.531243,-17.5239
9/2024,4694.26,4689.495813,4.764187,0.10149
10/2024,5215.3,4779.554726,435.745274,8.355133


E[erro] = -3.182789214978898 %
sigma[erro] = 7.633453559836977 %


In [32]:
compare_results('KNN_3')

Unnamed: 0_level_0,real_consumo,predicted_consumo,Erro,Erro %
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/2024,4496.595,4750.274699,-253.679699,-5.641595
2/2024,4563.984,4716.240576,-152.256576,-3.336045
3/2024,5004.829,5040.062098,-35.233098,-0.703982
4/2024,5397.761,5218.744031,179.016969,3.316504
5/2024,4913.619,4738.175088,175.443912,3.570564
6/2024,4532.12,3763.036925,769.083075,16.96961
7/2024,4816.37,4093.492509,722.877491,15.008762
8/2024,4111.706,4793.87755,-682.17155,-16.590961
9/2024,4694.26,4349.139868,345.120132,7.35196
10/2024,5215.3,4311.667592,903.632408,17.326566


E[erro] = 3.427397797135438 %
sigma[erro] = 10.176421956326454 %
