In [7]:
import pandas as pd

### old ground truth

In [8]:
# the ground truth values 
old_real_values = {
    'date': ['1/2024', '2/2024', '3/2024', '4/2024', '5/2024', '6/2024', '7/2024', '8/2024', '9/2024', '10/2024', '11/2024', '12/2024'],
    'real_consumo': [4885.10, 4880.14, 5391.35, 5925.34, 5295.28, 4996.13, 5256.78, 5069.64, 4694.26, 5215.3, 5273.88, 5501.23]
}

old_real_values = pd.DataFrame(old_real_values)
old_real_values = old_real_values.set_index('date')
display(old_real_values)


Unnamed: 0_level_0,real_consumo
date,Unnamed: 1_level_1
1/2024,4885.1
2/2024,4880.14
3/2024,5391.35
4/2024,5925.34
5/2024,5295.28
6/2024,4996.13
7/2024,5256.78
8/2024,5069.64
9/2024,4694.26
10/2024,5215.3


### ground truth

In [9]:
# the ground truth values 
path = 'results/ground_truth_data/ground_truth.csv'
real_values = pd.read_csv(path)
real_values['datetime'] = pd.to_datetime(real_values['datetime'])

# we will transform all the predicted values from MWh/day in MWh, so multiply by 24
real_values['consumo_mwh'] = real_values['consumo_mwh'].astype(float)
real_values.rename(columns={'consumo_mwh': 'real_consumo'}, inplace=True)

# Now, we want to group by month and sum the predicted energy
real_values['month'] = pd.to_datetime(real_values['datetime']).dt.month
real_values['year'] = pd.to_datetime(real_values['datetime']).dt.year
real_values = real_values.drop(columns=['datetime'])
real_values = real_values.groupby(['month', 'year']).sum()

# Only for beauty we'll join the month and year columns
real_values['date'] = real_values.index
real_values['date'] = real_values['date'].apply(lambda x: f'{x[0]}/{x[1]}')
real_values = real_values.reset_index()
real_values = real_values.drop(columns=['month', 'year'])
real_values = real_values.set_index('date')

# we'll maintain the months after september from the old_real_values
after_september_old_real_values = old_real_values.loc['10/2024':].copy()
real_values = pd.concat([real_values, after_september_old_real_values])

# We don't have all real values to september 2024
real_values.iloc[8] = old_real_values.iloc[8].values[0]

display(real_values)


Unnamed: 0_level_0,real_consumo
date,Unnamed: 1_level_1
1/2024,4878.220478
2/2024,4880.002875
3/2024,5389.67872
4/2024,5925.579621
5/2024,5297.406681
6/2024,4995.805396
7/2024,5256.900793
8/2024,4435.953231
9/2024,4694.26
10/2024,5215.3


### prediction/real compare function

In [10]:
def compare_results(model_name):
    prediction_data_path = f'results/output_data/{model_name}.csv'
    prediction_data = pd.read_csv(prediction_data_path)

    # we will transform all the predicted values in MWh, so multiply by 24
    prediction_data['predicted_consumo'] = prediction_data['predicted_consumo'] * 24

    # Now, we want to group by month and sum the predicted energy
    prediction_data['month'] = pd.to_datetime(prediction_data['datetime']).dt.month
    prediction_data['year'] = pd.to_datetime(prediction_data['datetime']).dt.year
    prediction_data = prediction_data.drop(columns=['datetime'])
    prediction_data = prediction_data.groupby(['month', 'year']).sum()

    # Only for beauty we'll join the month and year columns
    prediction_data['date'] = prediction_data.index
    prediction_data['date'] = prediction_data['date'].apply(lambda x: f'{x[0]}/{x[1]}')
    prediction_data = prediction_data.reset_index()
    prediction_data = prediction_data.drop(columns=['month', 'year'])
    prediction_data = prediction_data.set_index('date')

    # Now we will make a dataframe that contains the difference between the real values and the predicted values
    diff = real_values.copy() 
    diff['predicted_consumo'] = prediction_data['predicted_consumo']
    diff['Erro'] = abs(diff['real_consumo'] - diff['predicted_consumo'])
    diff['Erro %'] = diff['Erro'] / diff['real_consumo'] * 100
    display(diff)
    print(f"E[erro] = {diff['Erro %'].mean()} %")
    print(f"sigma[erro] = {diff['Erro %'].std()} %")

    

# Xgboost

### Xgboost with energy prediction

In [11]:
compare_results('xgboost_with_energy')

Unnamed: 0_level_0,real_consumo,predicted_consumo,Erro,Erro %
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/2024,4878.220478,4259.304041,618.916437,12.68734
2/2024,4880.002875,4534.367935,345.63494,7.082679
3/2024,5389.67872,4601.712974,787.965746,14.619902
4/2024,5925.579621,5617.293936,308.285685,5.202625
5/2024,5297.406681,5341.73628,44.329599,0.836817
6/2024,4995.805396,4121.97547,873.829926,17.491272
7/2024,5256.900793,4409.104291,847.796502,16.127306
8/2024,4435.953231,4827.393362,391.440131,8.824262
9/2024,4694.26,4455.745277,238.514723,5.080987
10/2024,5215.3,5173.545566,41.754434,0.800614


E[erro] = 8.681616939553932 %
sigma[erro] = 5.841735067273322 %


### Xgboost without energy

In [12]:
compare_results('xgboost_without_energy')

Unnamed: 0_level_0,real_consumo,predicted_consumo,Erro,Erro %
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/2024,4878.220478,4326.937819,551.282659,11.300897
2/2024,4880.002875,4469.63933,410.363545,8.409084
3/2024,5389.67872,4672.17312,717.5056,13.312586
4/2024,5925.579621,5580.077047,345.502574,5.830697
5/2024,5297.406681,5086.851252,210.555429,3.974689
6/2024,4995.805396,4392.707911,603.097485,12.072077
7/2024,5256.900793,4610.598091,646.302702,12.294367
8/2024,4435.953231,4526.254973,90.301742,2.035678
9/2024,4694.26,4706.12503,11.86503,0.252756
10/2024,5215.3,4721.325418,493.974582,9.471643


E[erro] = 7.794387549780997 %
sigma[erro] = 4.29859024784102 %


### Xgboost shuffle True

In [13]:
compare_results('xgboost_shuffle_v2')

Unnamed: 0_level_0,real_consumo,predicted_consumo,Erro,Erro %
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/2024,4878.220478,4610.914322,267.306156,5.479583
2/2024,4880.002875,4950.966967,70.964092,1.454181
3/2024,5389.67872,4996.607172,393.071548,7.293042
4/2024,5925.579621,5734.421263,191.158358,3.225986
5/2024,5297.406681,5380.678195,83.271514,1.57193
6/2024,4995.805396,4746.026378,249.779018,4.999775
7/2024,5256.900793,4922.102263,334.79853,6.368744
8/2024,4435.953231,4899.083959,463.130728,10.440388
9/2024,4694.26,5067.481243,373.221243,7.950587
10/2024,5215.3,5103.584549,111.715451,2.142071


E[erro] = 5.262239947919161 %
sigma[erro] = 2.746422100585364 %


#### Xgboost v3

In [14]:
compare_results('xgboost_shuffle_v3')

Unnamed: 0_level_0,real_consumo,predicted_consumo,Erro,Erro %
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/2024,4878.220478,4541.011018,337.20946,6.912551
2/2024,4880.002875,4945.327169,65.324294,1.338612
3/2024,5389.67872,5039.51502,350.1637,6.496931
4/2024,5925.579621,5904.605702,20.973919,0.353956
5/2024,5297.406681,5500.112338,202.705657,3.826507
6/2024,4995.805396,4634.100024,361.705372,7.240181
7/2024,5256.900793,4682.993868,573.906925,10.91721
8/2024,4435.953231,4822.81933,386.866099,8.721149
9/2024,4694.26,5072.17673,377.91673,8.050614
10/2024,5215.3,5299.007501,83.707501,1.605037


E[erro] = 5.527652213641763 %
sigma[erro] = 3.2235926963348467 %


#### Xgboost v4

In [15]:
compare_results('xgboost_shuffle_v4')

Unnamed: 0_level_0,real_consumo,predicted_consumo,Erro,Erro %
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/2024,4878.220478,4591.130542,287.089936,5.885137
2/2024,4880.002875,4990.73592,110.733045,2.269118
3/2024,5389.67872,4936.707667,452.971053,8.404417
4/2024,5925.579621,5739.04681,186.532811,3.147925
5/2024,5297.406681,5235.855847,61.550834,1.161905
6/2024,4995.805396,4609.923415,385.881981,7.72412
7/2024,5256.900793,4730.686738,526.214055,10.009967
8/2024,4435.953231,4771.092768,335.139537,7.555074
9/2024,4694.26,5144.833673,450.573673,9.598396
10/2024,5215.3,5194.555488,20.744512,0.397763


E[erro] = 5.64396702329471 %
sigma[erro] = 3.2421837088973504 %


# Random Forest

## RF raposo com shuffle

In [16]:
compare_results('Random_Forest_shuffle_True')

Unnamed: 0_level_0,real_consumo,predicted_consumo,Erro,Erro %
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/2024,4878.220478,4804.166352,74.054126,1.518056
2/2024,4880.002875,4924.50492,44.502045,0.911927
3/2024,5389.67872,5155.814535,233.864185,4.339112
4/2024,5925.579621,5831.911336,93.668285,1.580745
5/2024,5297.406681,5289.657295,7.749386,0.146286
6/2024,4995.805396,4674.052633,321.752763,6.440458
7/2024,5256.900793,4802.585968,454.314825,8.642256
8/2024,4435.953231,4842.705201,406.75197,9.169438
9/2024,4694.26,5065.09475,370.83475,7.899749
10/2024,5215.3,4923.358572,291.941428,5.597788


E[erro] = 4.957136223837009 %
sigma[erro] = 3.181233868494512 %


## RF raposo sem shuffle

In [17]:
compare_results('Random_Forest_shuffle_False')

Unnamed: 0_level_0,real_consumo,predicted_consumo,Erro,Erro %
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/2024,4878.220478,4426.395957,451.824521,9.262077
2/2024,4880.002875,4487.261163,392.741712,8.047981
3/2024,5389.67872,4740.308226,649.370494,12.048408
4/2024,5925.579621,5654.582458,270.997163,4.573344
5/2024,5297.406681,5056.636438,240.770243,4.545059
6/2024,4995.805396,4306.554237,689.251159,13.796597
7/2024,5256.900793,4382.697699,874.203094,16.629629
8/2024,4435.953231,4449.504156,13.550925,0.305479
9/2024,4694.26,4370.097183,324.162817,6.905515
10/2024,5215.3,4682.551154,532.748846,10.215114


E[erro] = 8.549561609839737 %
sigma[erro] = 4.804310235690977 %


## RF melhor que encontrei (sem shuffle)

In [18]:
compare_results('best_Random_Forest_shuffle_False')

Unnamed: 0_level_0,real_consumo,predicted_consumo,Erro,Erro %
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/2024,4878.220478,4450.46921,427.751268,8.768592
2/2024,4880.002875,4491.196632,388.806243,7.967336
3/2024,5389.67872,4766.603645,623.075075,11.560524
4/2024,5925.579621,5656.363185,269.216436,4.543293
5/2024,5297.406681,5071.90647,225.500211,4.256804
6/2024,4995.805396,4348.540024,647.265372,12.956177
7/2024,5256.900793,4439.33131,817.569483,15.55231
8/2024,4435.953231,4493.236735,57.283504,1.291346
9/2024,4694.26,4381.525229,312.734771,6.662068
10/2024,5215.3,4697.805646,517.494354,9.922619


E[erro] = 8.30934081297121 %
sigma[erro] = 4.396446882402895 %


## RF melhor que encontrei (com shuffle)

In [19]:
compare_results('Random_Forest')

Unnamed: 0_level_0,real_consumo,predicted_consumo,Erro,Erro %
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/2024,4878.220478,4790.006377,88.214101,1.808325
2/2024,4880.002875,4848.406108,31.596767,0.647474
3/2024,5389.67872,5194.50476,195.17396,3.621254
4/2024,5925.579621,5194.678615,730.901006,12.334675
5/2024,5297.406681,4860.196321,437.21036,8.25329
6/2024,4995.805396,4512.090747,483.714649,9.682416
7/2024,5256.900793,4557.982403,698.91839,13.295255
8/2024,4435.953231,4889.463143,453.509912,10.223505
9/2024,4694.26,4753.997709,59.737709,1.272569
10/2024,5215.3,4753.906591,461.393409,8.84692


E[erro] = 6.671362820865483 %
sigma[erro] = 4.52082702913904 %


## KNN

In [20]:
compare_results('KNN')

Unnamed: 0_level_0,real_consumo,predicted_consumo,Erro,Erro %
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/2024,4878.220478,4851.441222,26.779256,0.548955
2/2024,4880.002875,4873.615276,6.387599,0.130893
3/2024,5389.67872,5409.722529,20.043809,0.371892
4/2024,5925.579621,5638.176978,287.402643,4.850203
5/2024,5297.406681,5139.442555,157.964126,2.981914
6/2024,4995.805396,4762.876988,232.928408,4.66248
7/2024,5256.900793,4665.104962,591.795831,11.257504
8/2024,4435.953231,4994.47906,558.525829,12.590886
9/2024,4694.26,4976.885975,282.625975,6.020672
10/2024,5215.3,4861.401966,353.898034,6.785766


E[erro] = 5.509739677429489 %
sigma[erro] = 4.096041053192685 %


In [21]:
compare_results('KNN_2')

Unnamed: 0_level_0,real_consumo,predicted_consumo,Erro,Erro %
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/2024,4878.220478,4840.369677,37.850801,0.775914
2/2024,4880.002875,4903.212221,23.209346,0.475601
3/2024,5389.67872,5393.526953,3.848233,0.0714
4/2024,5925.579621,5644.277971,281.30165,4.747243
5/2024,5297.406681,5166.24214,131.164541,2.476014
6/2024,4995.805396,4690.893566,304.91183,6.103357
7/2024,5256.900793,4533.078838,723.821955,13.768986
8/2024,4435.953231,4832.237243,396.284012,8.933458
9/2024,4694.26,4689.495813,4.764187,0.10149
10/2024,5215.3,4779.554726,435.745274,8.355133


E[erro] = 5.09159574719008 %
sigma[erro] = 4.393372357662437 %


In [22]:
compare_results('KNN_3')

Unnamed: 0_level_0,real_consumo,predicted_consumo,Erro,Erro %
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/2024,4878.220478,4750.274699,127.945779,2.622796
2/2024,4880.002875,4716.240576,163.762299,3.355783
3/2024,5389.67872,5040.062098,349.616622,6.48678
4/2024,5925.579621,5218.744031,706.83559,11.928548
5/2024,5297.406681,4738.175088,559.231593,10.556705
6/2024,4995.805396,3763.036925,1232.768471,24.676071
7/2024,5256.900793,4093.492509,1163.408284,22.131068
8/2024,4435.953231,4793.87755,357.924319,8.068713
9/2024,4694.26,4349.139868,345.120132,7.35196
10/2024,5215.3,4311.667592,903.632408,17.326566


E[erro] = 10.475474055870958 %
sigma[erro] = 7.294778841215429 %
