In [1]:
import numpy as np
import math
import time
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import sys
sys.path.append('..')

import data
import features

In [2]:
temp, dico = data.load_input_data('data/train_input.csv')
output = data.load_output_data('data/challenge_output.csv', temp, dico)

** Variables:**
* **dico**: dictionnaire avec pour chaque nom de batiment les paramètres de réglages ayant subi un premier traitement. dico['buildings] = liste de tous les noms de building
* **temp**: outside temperature
* **output**: dictionnaire avec pour chaque nom de batiment les données à prédire. Le champs 'fields' contient les noms des 5 outputs:

**Outputs**:
* 0: office_temperature_degreC
* 1: Q_total_heating_kW
* 2: Q_total_AC_kW
* 3: Q_total_gains_kW
* 4: Q_total_kW

In [3]:
name_building = 'building_10_1'
dico[name_building]

{'AC_WE_hours': [0.0, 6.75, 17.5, 24.0],
 'AC_WE_temperatures_degreC': [32.0, 27.56, 31.43, 31.43],
 'AC_monday_hours': [0.0, 7.0, 18.0, 24.0],
 'AC_monday_temperatures_degreC': [32.0, 24.55, 28.04, 28.04],
 'AC_power_kW': 603.58,
 'AC_week_hours': [0.0, 6.0, 19.5, 24.0],
 'AC_week_temperatures_degreC': [32.0, 24.6, 28.82, 28.82],
 'AHU_high_threshold': 22.0,
 'AHU_low_threshold': 18.0,
 'PCs_percent_on_night_WE': 0.2,
 'Phantom_use_kW': 192.89,
 'airchange_infiltration_m3perh': 0.18,
 'airchange_ventilation_m3perh': 0.79,
 'heating_WE_hours': [0.0, 7.5, 17.5, 24.0],
 'heating_WE_temperatures_degreC': [17.0, 20.9, 14.49, 14.49],
 'heating_monday_hours': [0.0, 5.0, 19.0, 24.0],
 'heating_monday_temperatures_degreC': [17.0, 20.5, 16.19, 16.19],
 'heating_power_kW': 180.09,
 'heating_week_hours': [0.0, 6.0, 18.5, 24.0],
 'heating_week_temperatures_degreC': [17.0, 22.62, 17.1, 17.1],
 'initial_temperature': 20.0,
 'light_percent_on_night_WE': 0.1,
 'lighting_Wperm2': 1.46,
 'nb_PCs': 467.0

In [4]:
features_df = features.extract_features(dico, temp, name_building)
features_df.head()

364 jours et 52 semaines


Unnamed: 0,AC_in,AC_power_kW,AC_value,AHU_high_threshold,AHU_low_threshold,PCs_percent_on_night_WE,Phantom_use_kW,airchange_infiltration_m3perh,airchange_ventilation_m3perh,heating_in,...,surface_4_m2_OUTW,surface_m2_GROU,surface_m2_INTW,surface_m2_ROOF,useful_surface_m2,volume2capacitance_coeff,window_percent_1_outwall,window_percent_2_outwall,window_percent_3_outwall,window_percent_4_outwall
0,0,603.58,32.0,22.0,18.0,0.2,192.89,0.18,0.79,0,...,1109.55,1450.0,8000.0,1450.0,6073.55,34.11,70.0,70.0,70.0,70.0
1,0,603.58,32.0,22.0,18.0,0.2,192.89,0.18,0.79,0,...,1109.55,1450.0,8000.0,1450.0,6073.55,34.11,70.0,70.0,70.0,70.0
2,0,603.58,32.0,22.0,18.0,0.2,192.89,0.18,0.79,0,...,1109.55,1450.0,8000.0,1450.0,6073.55,34.11,70.0,70.0,70.0,70.0
3,0,603.58,32.0,22.0,18.0,0.2,192.89,0.18,0.79,0,...,1109.55,1450.0,8000.0,1450.0,6073.55,34.11,70.0,70.0,70.0,70.0
4,0,603.58,32.0,22.0,18.0,0.2,192.89,0.18,0.79,0,...,1109.55,1450.0,8000.0,1450.0,6073.55,34.11,70.0,70.0,70.0,70.0


In [48]:
constant_var = [var for var in dico[name_building] if type(dico[name_building][var]) == float]
non_constant_var = [var for var in dico[name_building] if type(dico[name_building][var]) != float]
non_constant_var

['ventilation_week_hours',
 'ventilation_week_ONif1',
 'AC_week_hours',
 'AC_week_temperatures_degreC',
 'AC_WE_hours',
 'AC_WE_temperatures_degreC',
 'AC_monday_hours',
 'AC_monday_temperatures_degreC',
 'heating_week_hours',
 'heating_week_temperatures_degreC',
 'heating_WE_hours',
 'heating_WE_temperatures_degreC',
 'heating_monday_hours',
 'heating_monday_temperatures_degreC',
 'thickness_ground_m',
 'thickness_outwall_m',
 'thickness_intwall_m',
 'thickness_intfloor_m',
 'thickness_roof_m']

In [52]:
def plot_repartition_var(dico, name_var, plot=False):
    evol_var = [dico[x][name_var] for x in dico['buildings']]
    if plot:
        plt.plot()
        h = plt.hist(evol_var)
        plt.title('Repartion of {}'.format(name_var))
        plt.show()
    return evol_var

# Des variables sont identiques pour TOUS les batiments !

In [54]:
var_evol_dico = {}
for var in constant_var:
    var_evol_dico[var] = plot_repartition_var(dico, var)

In [74]:
var_evol_df = pd.DataFrame(var_evol_dico)
useless_var = [var for var in constant_var if np.abs(var_evol_df.describe()[var]['std']) < 1e-10]
print(len(useless_var))
print(useless_var)

15
['airchange_infiltration_m3perh', 'airchange_ventilation_m3perh', 'AC_power_kW', 'heating_power_kW', 'surface_m2_GROU', 'surface_m2_ROOF', 'surface_m2_INTW', 'PCs_percent_on_night_WE', 'light_percent_on_night_WE', 'lighting_Wperm2', 'volume2capacitance_coeff', 'initial_temperature', 'Phantom_use_kW', 'AHU_low_threshold', 'AHU_high_threshold']
