In [1]:
"""
You need to run this cell for the code in following cells to work.
"""

# Enable module reloading
%load_ext autoreload
%autoreload 2

import os
os.chdir("..")

import pandas as pd
from src.data.analysis import get_outliers
from src.data.preprocessing.utils import split_meter_data, preprocess_and_merge_data
from src.data.pipelines import primary_use_pipeline, square_feet_pipeline, air_temperature_pipeline, \
    air_temperature_without_outliers_pipeline, dew_temperature_pipeline, \
    dew_temperature_without_outliers_pipeline, sea_level_pressure_pipeline, wind_speed_pipeline, \
    wind_speed_without_outliers_pipeline, wind_direction_pipeline, meter_pipeline
from src.data.feature_unions import buildings_fu, weather_fu, weather_without_outliers_fu, meter_fu

In [2]:
def check_mean_and_variance(df_column):
    mean = round(df_column.mean(), 2)
    var = round(df_column.var(), 2)
    print(f'mean = {mean}, variance = {var}')

# Data preprocessing

In [3]:
building_metadata = pd.read_csv('data/building_metadata.csv')
building_metadata

Unnamed: 0,site_id,building_id,primary_use,square_feet,year_built,floor_count
0,0,0,Education,7432,2008.0,
1,0,1,Education,2720,2004.0,
2,0,2,Education,5376,1991.0,
3,0,3,Education,23685,2002.0,
4,0,4,Education,116607,1975.0,
...,...,...,...,...,...,...
1444,15,1444,Entertainment/public assembly,19619,1914.0,
1445,15,1445,Education,4298,,
1446,15,1446,Entertainment/public assembly,11265,1997.0,
1447,15,1447,Lodging/residential,29775,2001.0,


In [4]:
bm_copy = building_metadata.copy()

## Buildings data

### Primary use

In this preprocessing we merge less numerous categories of primary use to category `Other` and encode values using One Hot Encoding.

In [5]:
primary_use_feature = pd.DataFrame(primary_use_pipeline.fit_transform(bm_copy))
primary_use_feature

Unnamed: 0,Education,Entertainment/public assembly,Lodging/residential,Office,Other,Public services
0,1.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0
3,1.0,0.0,0.0,0.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...
1444,0.0,1.0,0.0,0.0,0.0,0.0
1445,1.0,0.0,0.0,0.0,0.0,0.0
1446,0.0,1.0,0.0,0.0,0.0,0.0
1447,0.0,0.0,1.0,0.0,0.0,0.0


We can see that there is only 6 categories (`Education`, `Entertainment/public assembly`, `Lodging/residential`, `Office`, `Other` and `Public services`) instead of 16 original and values are properly encoded.

### Square feet

In this preprocessing we scale values to zero mean unit variance.

In [5]:
square_feet_feature = pd.DataFrame(square_feet_pipeline.fit_transform(bm_copy))
square_feet_feature.head()

Unnamed: 0,square_feet
0,-0.764729
1,-0.807282
2,-0.783297
3,-0.617951
4,0.221212


In [7]:
check_mean_and_variance(square_feet_feature.square_feet)

mean = 0.0, variance = 1.0


We can see that are properly scaled.

### Union of features

The resulting buildings data looks following.

In [8]:
buildings_features = buildings_fu.union_features(building_metadata)
buildings_features

Unnamed: 0,site_id,building_id,Education,Entertainment/public assembly,Lodging/residential,Office,Other,Public services,square_feet
0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,-0.764729
1,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,-0.807282
2,0.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,-0.783297
3,0.0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,-0.617951
4,0.0,4.0,1.0,0.0,0.0,0.0,0.0,0.0,0.221212
...,...,...,...,...,...,...,...,...,...
1444,15.0,1444.0,0.0,1.0,0.0,0.0,0.0,0.0,-0.654670
1445,15.0,1445.0,1.0,0.0,0.0,0.0,0.0,0.0,-0.793032
1446,15.0,1446.0,0.0,1.0,0.0,0.0,0.0,0.0,-0.730114
1447,15.0,1447.0,0.0,0.0,1.0,0.0,0.0,0.0,-0.562953


## Weather data

In [4]:
train_weather = pd.read_csv('data/weather_train.csv')
train_weather

Unnamed: 0,site_id,timestamp,air_temperature,cloud_coverage,dew_temperature,precip_depth_1_hr,sea_level_pressure,wind_direction,wind_speed
0,0,2016-01-01 00:00:00,25.0,6.0,20.0,,1019.7,0.0,0.0
1,0,2016-01-01 01:00:00,24.4,,21.1,-1.0,1020.2,70.0,1.5
2,0,2016-01-01 02:00:00,22.8,2.0,21.1,0.0,1020.2,0.0,0.0
3,0,2016-01-01 03:00:00,21.1,2.0,20.6,0.0,1020.1,0.0,0.0
4,0,2016-01-01 04:00:00,20.0,2.0,20.0,-1.0,1020.0,250.0,2.6
...,...,...,...,...,...,...,...,...,...
139768,15,2016-12-31 19:00:00,3.0,,-8.0,,,180.0,5.7
139769,15,2016-12-31 20:00:00,2.8,2.0,-8.9,,1007.4,180.0,7.7
139770,15,2016-12-31 21:00:00,2.8,,-7.2,,1007.5,180.0,5.1
139771,15,2016-12-31 22:00:00,2.2,,-6.7,,1008.0,170.0,4.6


In [3]:
tw_copy = train_weather.copy()

### Air temperature

In this preprocessing we fill in missing values using rolling average and scale values to zero mean unit variance.

In [11]:
air_temperature_feature = pd.DataFrame(air_temperature_pipeline.fit_transform(tw_copy))
air_temperature_feature.head()

Unnamed: 0,air_temperature
0,0.995738
1,0.939274
2,0.788704
3,0.628723
4,0.525206


In [12]:
air_temperature_feature.air_temperature.isna().sum()

0

In [13]:
check_mean_and_variance(air_temperature_feature.air_temperature)

mean = 0.0, variance = 1.0


We can see that there is zero missing values and values are properly scaled.

In this preprocessing we focus on replacing outliers with 5th percentile or 95th percentile.

In [14]:
air_temperature_without_outliers_feature = \
    pd.DataFrame(air_temperature_without_outliers_pipeline.fit_transform(tw_copy))
air_temperature_without_outliers_feature.head()

Unnamed: 0,air_temperature
0,1.010217
1,0.952421
2,0.7983
3,0.634545
4,0.528587


In [15]:
get_outliers(air_temperature_without_outliers_feature, 'air_temperature')

lower bound: -2.87172198528123
upper bound: 2.9078396453263515


Unnamed: 0,air_temperature
61806,-2.871722
62355,-2.871722
62852,-2.871722
96766,-2.871722
97315,-2.871722
97812,-2.871722


We can see that outliers were correctly replaced (returned values are only rounding error).

### Dew temperature

In this preprocessing we fill in missing values using rolling average and scale values to zero mean unit variance.

In [16]:
dew_temperature_feature = pd.DataFrame(dew_temperature_pipeline.fit_transform(tw_copy))
dew_temperature_feature.head()

Unnamed: 0,dew_temperature
0,1.292475
1,1.404847
2,1.404847
3,1.353769
4,1.292475


In [17]:
dew_temperature_feature.dew_temperature.isna().sum()

0

In [18]:
check_mean_and_variance(dew_temperature_feature.dew_temperature)

mean = 0.0, variance = 1.0


We can see that there is zero missing values and values are properly scaled.

In this preprocessing we focus on replacing outliers with 5th percentile or 95th percentile.

In [19]:
dew_temperature_without_outliers_feature = \
    pd.DataFrame(dew_temperature_without_outliers_pipeline.fit_transform(tw_copy))
dew_temperature_without_outliers_feature.head()

Unnamed: 0,dew_temperature
0,1.314705
1,1.429847
2,1.429847
3,1.37751
4,1.314705


In [20]:
get_outliers(dew_temperature_without_outliers_feature, 'dew_temperature')

lower bound: -2.882740752309666
upper bound: 2.8952894356771677


Unnamed: 0,dew_temperature
62510,-2.882741
62793,-2.882741
97470,-2.882741
97753,-2.882741


We can see that outliers were correctly replaced.

### Sea level pressure

In this preprocessing we fill in missing values using rolling average and scale values to zero mean unit variance.

In [21]:
sea_level_pressure_feature = pd.DataFrame(sea_level_pressure_pipeline.fit_transform(tw_copy))
sea_level_pressure_feature.head()

Unnamed: 0,sea_level_pressure
0,0.466649
1,0.532188
2,0.532188
3,0.51908
4,0.505972


In [22]:
sea_level_pressure_feature.sea_level_pressure.isna().sum()

8755

Since there are still some missing values, we will look into them to find out why they were not filled in with some value.

In [23]:
train_weather[train_weather.sea_level_pressure.isna()].sea_level_pressure.isna().sum()

10618

We can see that although some values were filled in, most were not.

In [24]:
train_weather.groupby(['site_id']).count()

Unnamed: 0_level_0,timestamp,air_temperature,cloud_coverage,dew_temperature,precip_depth_1_hr,sea_level_pressure,wind_direction,wind_speed
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,8784,8781,4954,8781,8783,8699,8534,8784
1,8763,8762,1701,8762,0,8711,8760,8763
2,8783,8782,6429,8782,8717,8739,8192,8778
3,8780,8776,5138,8774,8743,8622,8627,8776
4,8783,8783,4553,8781,7466,8710,8678,8783
5,8755,8753,2725,8753,0,0,8460,8752
6,8782,8772,5790,8772,8782,8607,8003,8743
7,8614,8614,0,8591,731,8606,8614,8614
8,8784,8781,4954,8781,8783,8699,8534,8784
9,8780,8775,5322,8773,8773,8541,6222,8683


We can see that for 5th site there are no values of sea_level_pressure and therefore we have no way of filling in these data. Also precipitation is missing for that site.

We will probably throw away this attribute or site, because we have no way of filling in data.

In [25]:
check_mean_and_variance(sea_level_pressure_feature.sea_level_pressure)

mean = 0.0, variance = 1.0


### Wind speed

In this preprocessing we fill in missing values using rolling average and scale values to zero mean unit variance.

In [26]:
wind_speed_feature = pd.DataFrame(wind_speed_pipeline.fit_transform(tw_copy))
wind_speed_feature.head()

Unnamed: 0,wind_speed
0,-1.524278
1,-0.881804
2,-1.524278
3,-1.524278
4,-0.410656


In [27]:
wind_speed_feature.wind_speed.isna().sum()

0

In [28]:
check_mean_and_variance(wind_speed_feature.wind_speed)

mean = -0.0, variance = 1.0


We can see that there is zero missing values and values are properly scaled.

In this preprocessing we focus on replacing outliers with 5th percentile or 95th percentile.

In [29]:
wind_speed_without_outliers_feature = \
    pd.DataFrame(wind_speed_without_outliers_pipeline.fit_transform(tw_copy))
wind_speed_without_outliers_feature.head()

Unnamed: 0,wind_speed
0,-1.609404
1,-0.919681
2,-1.609404
3,-1.609404
4,-0.413884


In [30]:
get_outliers(wind_speed_without_outliers_feature, 'wind_speed')

lower bound: -2.6439894378040787
upper bound: 2.6898716490125114


Unnamed: 0,wind_speed


We can see that outliers were correctly replaced.

### Wind direction

In this preprocessing we fill in missing values using rolling average and scale values to zero mean unit variance.

In [31]:
wind_direction_feature = pd.DataFrame(wind_direction_pipeline.fit_transform(tw_copy))
wind_direction_feature.head()

Unnamed: 0,wind_direction
0,-1.615903
1,-0.983175
2,-1.615903
3,-1.615903
4,0.64384


In [32]:
wind_direction_feature.wind_direction.isna().sum()

0

In [33]:
check_mean_and_variance(wind_direction_feature.wind_direction)

mean = -0.0, variance = 1.0


We can see that there is zero missing values and values are properly scaled.

### Union of features

The resulting weather data looks following.

In [34]:
weather_features = weather_fu.union_features(train_weather)
weather_features

Unnamed: 0,site_id,timestamp,air_temperature,dew_temperature,wind_direction,wind_speed
0,0,2016-01-01 00:00:00,0.995738,1.29247,-1.6159,-1.52428
1,0,2016-01-01 01:00:00,0.939274,1.40485,-0.983175,-0.881804
2,0,2016-01-01 02:00:00,0.788704,1.40485,-1.6159,-1.52428
3,0,2016-01-01 03:00:00,0.628723,1.35377,-1.6159,-1.52428
4,0,2016-01-01 04:00:00,0.525206,1.29247,0.64384,-0.410656
...,...,...,...,...,...,...
139768,15,2016-12-31 19:00:00,-1.0746,-1.56789,0.0111115,0.917124
139769,15,2016-12-31 20:00:00,-1.09343,-1.65983,0.0111115,1.77376
139770,15,2016-12-31 21:00:00,-1.09343,-1.48617,0.0111115,0.660134
139771,15,2016-12-31 22:00:00,-1.14989,-1.43509,-0.0792782,0.445976


In [35]:
weather_features = weather_without_outliers_fu.union_features(train_weather)
weather_features

Unnamed: 0,site_id,timestamp,air_temperature,dew_temperature,wind_direction,wind_speed
0,0,2016-01-01 00:00:00,1.01022,1.31471,-1.6159,-1.6094
1,0,2016-01-01 01:00:00,0.952421,1.42985,-0.983175,-0.919681
2,0,2016-01-01 02:00:00,0.7983,1.42985,-1.6159,-1.6094
3,0,2016-01-01 03:00:00,0.634545,1.37751,-1.6159,-1.6094
4,0,2016-01-01 04:00:00,0.528587,1.31471,0.64384,-0.413884
...,...,...,...,...,...,...
139768,15,2016-12-31 19:00:00,-1.10896,-1.61618,0.0111115,1.01154
139769,15,2016-12-31 20:00:00,-1.12822,-1.71039,0.0111115,1.93118
139770,15,2016-12-31 21:00:00,-1.12822,-1.53244,0.0111115,0.735655
139771,15,2016-12-31 22:00:00,-1.18602,-1.4801,-0.0792782,0.505747


## Meter data

In [5]:
meter_data = pd.read_csv('data/train.csv')
meter_data

Unnamed: 0,building_id,meter,timestamp,meter_reading
0,0,0,2016-01-01 00:00:00,0.000
1,1,0,2016-01-01 00:00:00,0.000
2,2,0,2016-01-01 00:00:00,0.000
3,3,0,2016-01-01 00:00:00,0.000
4,4,0,2016-01-01 00:00:00,0.000
...,...,...,...,...
20216095,1444,0,2016-12-31 23:00:00,8.750
20216096,1445,0,2016-12-31 23:00:00,4.825
20216097,1446,0,2016-12-31 23:00:00,0.000
20216098,1447,0,2016-12-31 23:00:00,159.575


We take only 10 buildings for the illustration because this preprocessing lasts long.

In [4]:
meter_data_aux = meter_data[meter_data.building_id < 10]

In [5]:
md_copy = meter_data.copy()

### Meter reading

In this preprocessing we select only electricity meter type, meter readings smaller than 200 and add previous values of meter reading to simulate time series. We also scale previous values to zero mean unit variance.

In [5]:
%%time
meter_reading_feature = pd.DataFrame(meter_pipeline.fit_transform(md_copy))
meter_reading_feature

CPU times: user 17.4 s, sys: 67.4 ms, total: 17.5 s
Wall time: 17.5 s


Unnamed: 0,meter_reading,meter_reading_scaled_1,meter_reading_scaled_2,meter_reading_scaled_3,meter_reading_scaled_4,meter_reading_scaled_5
0,0.0000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.0000,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.0000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.0000,0.000000,0.000000,0.000000,0.000000,0.000000
4,0.0000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...
62204,51.0555,0.301109,0.286152,0.266209,0.555373,1.841647
62205,4.9144,-0.601281,-0.586323,-0.593801,-0.601281,-0.593801
62206,102.7250,1.209722,1.259591,1.265818,1.265818,1.178569
62207,8.8733,-0.373190,-0.522757,-0.348262,-0.504061,-0.310870


In [11]:
for i in range(1, 6):
    check_mean_and_variance(meter_reading_feature[f'meter_reading_scaled_{i}'])

mean = -0.01, variance = 0.98
mean = -0.01, variance = 0.97
mean = -0.01, variance = 0.96
mean = -0.02, variance = 0.96
mean = -0.02, variance = 0.95


We can see that values are properly scaled. Small variations are cause by missing values in particular set of previous values. However we can not see that values are only for electricity meter type because returning this column would cause scaling of this electricity type as well.

### Union of features

The resulting meter data looks following.

In [6]:
%%time
meter_features = meter_fu.union_features(meter_data_aux)
meter_features

CPU times: user 178 ms, sys: 199 ms, total: 377 ms
Wall time: 19.1 s


Unnamed: 0,building_id,timestamp,meter_reading,meter_reading_scaled_1,meter_reading_scaled_2,meter_reading_scaled_3,meter_reading_scaled_4,meter_reading_scaled_5
0,0,2016-01-01 00:00:00,0,0,0,0,0,0
1,1,2016-01-01 00:00:00,0,0,0,0,0,0
2,2,2016-01-01 00:00:00,0,0,0,0,0,0
3,3,2016-01-01 00:00:00,0,0,0,0,0,0
4,4,2016-01-01 00:00:00,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...
62204,1,2016-12-31 23:00:00,51.0555,0.301109,0.286152,0.266209,0.555373,1.84165
62205,2,2016-12-31 23:00:00,4.9144,-0.601281,-0.586323,-0.593801,-0.601281,-0.593801
62206,3,2016-12-31 23:00:00,102.725,1.20972,1.25959,1.26582,1.26582,1.17857
62207,5,2016-12-31 23:00:00,8.8733,-0.37319,-0.522757,-0.348262,-0.504061,-0.31087


Since samples are not sorted appropriately, we print only one building to check correctness of previous values.

In [13]:
meter_features[meter_features.building_id == 1]

Unnamed: 0,building_id,timestamp,meter_reading,meter_reading_scaled_1,meter_reading_scaled_2,meter_reading_scaled_3,meter_reading_scaled_4,meter_reading_scaled_5
1,1,2016-01-01 00:00:00,0,0,0,0,0,0
11,1,2016-01-01 01:00:00,0,-0.69102,0,0,0,0
21,1,2016-01-01 02:00:00,0,-0.69102,-0.69102,0,0,0
31,1,2016-01-01 03:00:00,0,-0.69102,-0.69102,-0.69102,0,0
41,1,2016-01-01 04:00:00,0,-0.69102,-0.69102,-0.69102,-0.69102,0
...,...,...,...,...,...,...,...,...
62180,1,2016-12-31 19:00:00,68.256,1.84165,0.934276,1.33811,1.57243,0.944248
62186,1,2016-12-31 20:00:00,52.4206,0.555373,1.84165,0.934276,1.33811,1.57243
62192,1,2016-12-31 21:00:00,53.5127,0.266209,0.555373,1.84165,0.934276,1.33811
62198,1,2016-12-31 22:00:00,54.3318,0.286152,0.266209,0.555373,1.84165,0.934276


As we can see, previous values are correct.

## Data merging

In [6]:
preprocessed_data_path = 'data/preprocessed'
if not os.path.isdir(preprocessed_data_path):
    os.mkdir('data/preprocessed')

We have to split meter data to train, dev and test sets. Created sets are disjoint with respect to containing buildings and each split contains buildings from each site.

In [7]:
%%time
train_meter_data, dev_meter_data, test_meter_data = split_meter_data(
    meter_data,
    building_metadata,
    train_size=0.8,
    dev_size=0.1,
    test_size=0.1
)

CPU times: user 6.92 s, sys: 656 ms, total: 7.58 s
Wall time: 7.58 s


In [8]:
train_meter_data

Unnamed: 0,building_id,meter,timestamp,meter_reading
0,0,0,2016-01-01 00:00:00,0.000
1,1,0,2016-01-01 00:00:00,0.000
2,2,0,2016-01-01 00:00:00,0.000
3,3,0,2016-01-01 00:00:00,0.000
4,4,0,2016-01-01 00:00:00,0.000
...,...,...,...,...
20216095,1444,0,2016-12-31 23:00:00,8.750
20216096,1445,0,2016-12-31 23:00:00,4.825
20216097,1446,0,2016-12-31 23:00:00,0.000
20216098,1447,0,2016-12-31 23:00:00,159.575


In [9]:
dev_meter_data

Unnamed: 0,building_id,meter,timestamp,meter_reading
9,9,0,2016-01-01 00:00:00,0.000
12,12,0,2016-01-01 00:00:00,0.000
14,14,0,2016-01-01 00:00:00,0.000
32,33,0,2016-01-01 00:00:00,0.000
43,44,0,2016-01-01 00:00:00,0.000
...,...,...,...,...
20216046,1412,1,2016-12-31 23:00:00,0.000
20216047,1412,2,2016-12-31 23:00:00,870.821
20216051,1414,0,2016-12-31 23:00:00,70.650
20216052,1414,2,2016-12-31 23:00:00,2420.780


In [10]:
test_meter_data

Unnamed: 0,building_id,meter,timestamp,meter_reading
23,23,0,2016-01-01 00:00:00,0.000
30,31,0,2016-01-01 00:00:00,0.000
38,39,0,2016-01-01 00:00:00,0.000
41,42,0,2016-01-01 00:00:00,0.000
63,65,0,2016-01-01 00:00:00,0.000
...,...,...,...,...
20216082,1435,0,2016-12-31 23:00:00,4.725
20216083,1436,0,2016-12-31 23:00:00,11.600
20216084,1436,2,2016-12-31 23:00:00,1274.660
20216087,1438,0,2016-12-31 23:00:00,100.675


### With outliers

#### Train meter data

In [11]:
%%time
y, x = preprocess_and_merge_data(
    building_metadata, train_weather, train_meter_data, buildings_fu, weather_fu, meter_fu, fit=True
)

CPU times: user 1min 7s, sys: 9.8 s, total: 1min 17s
Wall time: 47min 11s


In [12]:
y

Unnamed: 0,meter_reading
1,0
2,0
3,0
4,0
5,0
...,...
7781021,199.97
7781022,186.13
7781023,198.35
7781024,190.38


In [13]:
x

Unnamed: 0,Education,Entertainment/public assembly,Lodging/residential,Office,Other,Public services,air_temperature,dew_temperature,meter_reading_scaled_1,meter_reading_scaled_2,meter_reading_scaled_3,meter_reading_scaled_4,meter_reading_scaled_5,square_feet,wind_direction,wind_speed
1,1.0,0.0,0.0,0.0,0.0,0.0,0.995738,1.29247,-1.0867,-1.0867,-1.0867,-1.0867,-1.0867,-0.764729,-1.6159,-1.52428
2,1.0,0.0,0.0,0.0,0.0,0.0,0.939274,1.40485,-1.0867,-1.0867,-1.0867,-1.0867,-1.0867,-0.764729,-0.983175,-0.881804
3,1.0,0.0,0.0,0.0,0.0,0.0,0.788704,1.40485,-1.0867,-1.0867,-1.0867,-1.0867,-1.0867,-0.764729,-1.6159,-1.52428
4,1.0,0.0,0.0,0.0,0.0,0.0,0.628723,1.35377,-1.0867,-1.0867,-1.0867,-1.0867,-1.0867,-0.764729,-1.6159,-1.52428
5,1.0,0.0,0.0,0.0,0.0,0.0,0.525206,1.29247,-1.0867,-1.0867,-1.0867,-1.0867,-1.0867,-0.764729,0.64384,-0.410656
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7781021,0.0,0.0,0.0,1.0,0.0,0.0,-1.14989,-0.576981,-1.0867,-1.0867,-1.0867,-1.0867,-1.0867,2.852735,1.63813,-0.881804
7781022,0.0,0.0,0.0,1.0,0.0,0.0,-0.942855,-0.464609,-1.0867,2.67511,-1.0867,-1.0867,-1.0867,2.852735,-1.16395,-0.196498
7781023,0.0,0.0,0.0,1.0,0.0,0.0,-0.72641,-0.464609,2.41475,-1.0867,2.67511,-1.0867,-1.0867,2.852735,-1.16395,0.0176603
7781024,0.0,0.0,0.0,1.0,0.0,0.0,-0.679357,-0.413531,2.64463,2.41475,-1.0867,2.67511,-1.0867,2.852735,-0.892786,-0.624814


In [14]:
y.to_csv(os.path.join(preprocessed_data_path, 'train_y.gz'), index=False)

In [15]:
x.to_csv(os.path.join(preprocessed_data_path, 'train_x.gz'), index=False)

In [16]:
%%time
y_loaded = pd.read_csv(os.path.join(preprocessed_data_path, 'train_y.gz'))

CPU times: user 999 ms, sys: 27.5 ms, total: 1.03 s
Wall time: 1.06 s


In [17]:
y_loaded

Unnamed: 0,meter_reading
0,0.00
1,0.00
2,0.00
3,0.00
4,0.00
...,...
7752649,199.97
7752650,186.13
7752651,198.35
7752652,190.38


In [18]:
%%time
x_loaded = pd.read_csv(os.path.join(preprocessed_data_path, 'train_x.gz'))

CPU times: user 26.9 s, sys: 821 ms, total: 27.7 s
Wall time: 28 s


In [19]:
x_loaded

Unnamed: 0,Education,Entertainment/public assembly,Lodging/residential,Office,Other,Public services,air_temperature,dew_temperature,meter_reading_scaled_1,meter_reading_scaled_2,meter_reading_scaled_3,meter_reading_scaled_4,meter_reading_scaled_5,square_feet,wind_direction,wind_speed
0,1.0,0.0,0.0,0.0,0.0,0.0,0.995738,1.292475,-1.086704,-1.086704,-1.086704,-1.086704,-1.086704,-0.764729,-1.615903,-1.524278
1,1.0,0.0,0.0,0.0,0.0,0.0,0.939274,1.404847,-1.086704,-1.086704,-1.086704,-1.086704,-1.086704,-0.764729,-0.983175,-0.881804
2,1.0,0.0,0.0,0.0,0.0,0.0,0.788704,1.404847,-1.086704,-1.086704,-1.086704,-1.086704,-1.086704,-0.764729,-1.615903,-1.524278
3,1.0,0.0,0.0,0.0,0.0,0.0,0.628723,1.353769,-1.086704,-1.086704,-1.086704,-1.086704,-1.086704,-0.764729,-1.615903,-1.524278
4,1.0,0.0,0.0,0.0,0.0,0.0,0.525206,1.292475,-1.086704,-1.086704,-1.086704,-1.086704,-1.086704,-0.764729,0.643840,-0.410656
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7752649,0.0,0.0,0.0,1.0,0.0,0.0,-1.149889,-0.576981,-1.086704,-1.086704,-1.086704,-1.086704,-1.086704,2.852735,1.638126,-0.881804
7752650,0.0,0.0,0.0,1.0,0.0,0.0,-0.942855,-0.464609,-1.086704,2.675105,-1.086704,-1.086704,-1.086704,2.852735,-1.163955,-0.196498
7752651,0.0,0.0,0.0,1.0,0.0,0.0,-0.726410,-0.464609,2.414749,-1.086704,2.675105,-1.086704,-1.086704,2.852735,-1.163955,0.017660
7752652,0.0,0.0,0.0,1.0,0.0,0.0,-0.679357,-0.413531,2.644630,2.414749,-1.086704,2.675105,-1.086704,2.852735,-0.892786,-0.624814


#### Dev meter

In [20]:
%%time
y, x = preprocess_and_merge_data(
    building_metadata, train_weather, dev_meter_data, buildings_fu, weather_fu, meter_fu, fit=False
)

CPU times: user 23.6 s, sys: 1.4 s, total: 25 s
Wall time: 8min 1s


In [21]:
y

Unnamed: 0,meter_reading
1,0
2,0
3,0
4,0
5,0
...,...
876288,0
876289,0
876290,0
876291,0


In [22]:
x

Unnamed: 0,Education,Entertainment/public assembly,Lodging/residential,Office,Other,Public services,air_temperature,dew_temperature,meter_reading_scaled_1,meter_reading_scaled_2,meter_reading_scaled_3,meter_reading_scaled_4,meter_reading_scaled_5,square_feet,wind_direction,wind_speed
1,0.0,0.0,0.0,1.0,0.0,0.0,0.995738,1.29247,-1.0867,-1.0867,-1.0867,-1.0867,-1.0867,-0.588014,-1.6159,-1.52428
2,0.0,0.0,0.0,1.0,0.0,0.0,0.939274,1.40485,-1.0867,-1.0867,-1.0867,-1.0867,-1.0867,-0.588014,-0.983175,-0.881804
3,0.0,0.0,0.0,1.0,0.0,0.0,0.788704,1.40485,-1.0867,-1.0867,-1.0867,-1.0867,-1.0867,-0.588014,-1.6159,-1.52428
4,0.0,0.0,0.0,1.0,0.0,0.0,0.628723,1.35377,-1.0867,-1.0867,-1.0867,-1.0867,-1.0867,-0.588014,-1.6159,-1.52428
5,0.0,0.0,0.0,1.0,0.0,0.0,0.525206,1.29247,-1.0867,-1.0867,-1.0867,-1.0867,-1.0867,-0.588014,0.64384,-0.410656
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
876288,0.0,0.0,0.0,0.0,0.0,1.0,-0.359395,-0.127494,-1.0867,-1.0867,-1.0867,0.32334,-1.0867,1.313140,-1.6159,-1.52428
876289,0.0,0.0,0.0,1.0,0.0,0.0,-1.56396,-1.08776,-1.0867,-1.0867,-1.0867,-1.0867,-1.0867,2.137131,0.0111115,0.660134
876290,0.0,0.0,0.0,1.0,0.0,0.0,-1.56396,-1.08776,-1.0867,-1.0867,-1.0867,-1.0867,-1.0867,2.137131,0.191891,1.13128
876291,0.0,0.0,0.0,1.0,0.0,0.0,-1.5169,-1.03668,-1.0867,-1.0867,-1.0867,-1.0867,-1.0867,2.137131,0.37267,0.231818


In [23]:
y.to_csv(os.path.join(preprocessed_data_path, 'dev_y.gz'), index=False)

In [26]:
x.to_csv(os.path.join(preprocessed_data_path, 'dev_x.gz'), index=False)

In [27]:
%%time
y_loaded = pd.read_csv(os.path.join(preprocessed_data_path, 'dev_y.gz'))

CPU times: user 141 ms, sys: 0 ns, total: 141 ms
Wall time: 144 ms


In [28]:
y_loaded

Unnamed: 0,meter_reading
0,0.0
1,0.0
2,0.0
3,0.0
4,0.0
...,...
873018,0.0
873019,0.0
873020,0.0
873021,0.0


In [29]:
%%time
x_loaded = pd.read_csv(os.path.join(preprocessed_data_path, 'dev_x.gz'))

CPU times: user 2.73 s, sys: 14 µs, total: 2.73 s
Wall time: 2.76 s


In [30]:
x_loaded

Unnamed: 0,Education,Entertainment/public assembly,Lodging/residential,Office,Other,Public services,air_temperature,dew_temperature,meter_reading_scaled_1,meter_reading_scaled_2,meter_reading_scaled_3,meter_reading_scaled_4,meter_reading_scaled_5,square_feet,wind_direction,wind_speed
0,0.0,0.0,0.0,1.0,0.0,0.0,0.995738,1.292475,-1.086704,-1.086704,-1.086704,-1.086704,-1.086704,-0.588014,-1.615903,-1.524278
1,0.0,0.0,0.0,1.0,0.0,0.0,0.939274,1.404847,-1.086704,-1.086704,-1.086704,-1.086704,-1.086704,-0.588014,-0.983175,-0.881804
2,0.0,0.0,0.0,1.0,0.0,0.0,0.788704,1.404847,-1.086704,-1.086704,-1.086704,-1.086704,-1.086704,-0.588014,-1.615903,-1.524278
3,0.0,0.0,0.0,1.0,0.0,0.0,0.628723,1.353769,-1.086704,-1.086704,-1.086704,-1.086704,-1.086704,-0.588014,-1.615903,-1.524278
4,0.0,0.0,0.0,1.0,0.0,0.0,0.525206,1.292475,-1.086704,-1.086704,-1.086704,-1.086704,-1.086704,-0.588014,0.643840,-0.410656
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
873018,0.0,0.0,0.0,0.0,0.0,1.0,-0.359395,-0.127494,-1.086704,-1.086704,-1.086704,0.323340,-1.086704,1.313140,-1.615903,-1.524278
873019,0.0,0.0,0.0,1.0,0.0,0.0,-1.563957,-1.087761,-1.086704,-1.086704,-1.086704,-1.086704,-1.086704,2.137131,0.011112,0.660134
873020,0.0,0.0,0.0,1.0,0.0,0.0,-1.563957,-1.087761,-1.086704,-1.086704,-1.086704,-1.086704,-1.086704,2.137131,0.191891,1.131282
873021,0.0,0.0,0.0,1.0,0.0,0.0,-1.516904,-1.036683,-1.086704,-1.086704,-1.086704,-1.086704,-1.086704,2.137131,0.372670,0.231818


#### Test meter

In [31]:
%%time
y, x = preprocess_and_merge_data(
    building_metadata, train_weather, test_meter_data, buildings_fu, weather_fu, meter_fu, fit=False
)

CPU times: user 24.5 s, sys: 1.48 s, total: 26 s
Wall time: 7min 58s


In [32]:
y

Unnamed: 0,meter_reading
1,0
2,0
3,0
4,0
5,0
...,...
860366,15.9667
860367,15.305
860368,15.5685
860369,14.9298


In [33]:
x

Unnamed: 0,Education,Entertainment/public assembly,Lodging/residential,Office,Other,Public services,air_temperature,dew_temperature,meter_reading_scaled_1,meter_reading_scaled_2,meter_reading_scaled_3,meter_reading_scaled_4,meter_reading_scaled_5,square_feet,wind_direction,wind_speed
1,1.0,0.0,0.0,0.0,0.0,0.0,0.995738,1.29247,-1.0867,-1.0867,-1.0867,-1.0867,-1.0867,0.350155,-1.6159,-1.52428
2,1.0,0.0,0.0,0.0,0.0,0.0,0.939274,1.40485,-1.0867,-1.0867,-1.0867,-1.0867,-1.0867,0.350155,-0.983175,-0.881804
3,1.0,0.0,0.0,0.0,0.0,0.0,0.788704,1.40485,-1.0867,-1.0867,-1.0867,-1.0867,-1.0867,0.350155,-1.6159,-1.52428
4,1.0,0.0,0.0,0.0,0.0,0.0,0.628723,1.35377,-1.0867,-1.0867,-1.0867,-1.0867,-1.0867,0.350155,-1.6159,-1.52428
5,1.0,0.0,0.0,0.0,0.0,0.0,0.525206,1.29247,-1.0867,-1.0867,-1.0867,-1.0867,-1.0867,0.350155,0.64384,-0.410656
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
860366,0.0,0.0,0.0,0.0,1.0,0.0,-0.152361,-0.413531,-0.771345,-0.773896,-0.673819,-0.726524,-0.757073,-0.828351,-1.16395,-0.196498
860367,0.0,0.0,0.0,0.0,1.0,0.0,-0.312342,-0.352237,-0.78634,-0.771345,-0.773896,-0.673819,-0.726524,-0.828351,-1.16395,0.231818
860368,0.0,0.0,0.0,0.0,1.0,0.0,-0.312342,-0.413531,-0.798788,-0.78634,-0.771345,-0.773896,-0.673819,-0.828351,-1.16395,0.660134
860369,0.0,0.0,0.0,0.0,1.0,0.0,-0.255878,-0.413531,-0.793831,-0.798788,-0.78634,-0.771345,-0.773896,-0.828351,-0.892786,-0.410656


In [34]:
y.to_csv(os.path.join(preprocessed_data_path, 'test_y.gz'), index=False)

In [35]:
x.to_csv(os.path.join(preprocessed_data_path, 'test_x.gz'), index=False)

In [36]:
%%time
y_loaded = pd.read_csv(os.path.join(preprocessed_data_path, 'test_y.gz'))

CPU times: user 138 ms, sys: 56 µs, total: 138 ms
Wall time: 139 ms


In [37]:
y_loaded

Unnamed: 0,meter_reading
0,0.0000
1,0.0000
2,0.0000
3,0.0000
4,0.0000
...,...
857121,15.9667
857122,15.3050
857123,15.5685
857124,14.9298


In [38]:
%%time
x_loaded = pd.read_csv(os.path.join(preprocessed_data_path, 'test_x.gz'))

CPU times: user 3.02 s, sys: 28.2 ms, total: 3.05 s
Wall time: 3.08 s


In [39]:
x_loaded

Unnamed: 0,Education,Entertainment/public assembly,Lodging/residential,Office,Other,Public services,air_temperature,dew_temperature,meter_reading_scaled_1,meter_reading_scaled_2,meter_reading_scaled_3,meter_reading_scaled_4,meter_reading_scaled_5,square_feet,wind_direction,wind_speed
0,1.0,0.0,0.0,0.0,0.0,0.0,0.995738,1.292475,-1.086704,-1.086704,-1.086704,-1.086704,-1.086704,0.350155,-1.615903,-1.524278
1,1.0,0.0,0.0,0.0,0.0,0.0,0.939274,1.404847,-1.086704,-1.086704,-1.086704,-1.086704,-1.086704,0.350155,-0.983175,-0.881804
2,1.0,0.0,0.0,0.0,0.0,0.0,0.788704,1.404847,-1.086704,-1.086704,-1.086704,-1.086704,-1.086704,0.350155,-1.615903,-1.524278
3,1.0,0.0,0.0,0.0,0.0,0.0,0.628723,1.353769,-1.086704,-1.086704,-1.086704,-1.086704,-1.086704,0.350155,-1.615903,-1.524278
4,1.0,0.0,0.0,0.0,0.0,0.0,0.525206,1.292475,-1.086704,-1.086704,-1.086704,-1.086704,-1.086704,0.350155,0.643840,-0.410656
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
857121,0.0,0.0,0.0,0.0,1.0,0.0,-0.152361,-0.413531,-0.771345,-0.773896,-0.673819,-0.726524,-0.757073,-0.828351,-1.163955,-0.196498
857122,0.0,0.0,0.0,0.0,1.0,0.0,-0.312342,-0.352237,-0.786340,-0.771345,-0.773896,-0.673819,-0.726524,-0.828351,-1.163955,0.231818
857123,0.0,0.0,0.0,0.0,1.0,0.0,-0.312342,-0.413531,-0.798788,-0.786340,-0.771345,-0.773896,-0.673819,-0.828351,-1.163955,0.660134
857124,0.0,0.0,0.0,0.0,1.0,0.0,-0.255878,-0.413531,-0.793831,-0.798788,-0.786340,-0.771345,-0.773896,-0.828351,-0.892786,-0.410656


### Without outliers

#### Train meter data

In [6]:
%%time
_, x = preprocess_and_merge_data(
    building_metadata, train_weather, train_meter_data, buildings_fu, weather_without_outliers_fu, meter_fu, fit=True
)

CPU times: user 1min 6s, sys: 11.9 s, total: 1min 18s
Wall time: 57min 16s


In [18]:
x

Unnamed: 0,Education,Entertainment/public assembly,Lodging/residential,Office,Other,Public services,air_temperature,dew_temperature,meter_reading_1,meter_reading_2,meter_reading_3,meter_reading_4,meter_reading_5,square_feet,wind_direction,wind_speed
1,1.0,0.0,0.0,0.0,0.0,0.0,0.995738,1.29247,-1.075,0,0,0,0,-0.764729,-1.6159,-1.52428
2,1.0,0.0,0.0,0.0,0.0,0.0,0.939274,1.40485,-1.075,-1.075,0,0,0,-0.764729,-0.983175,-0.881804
3,1.0,0.0,0.0,0.0,0.0,0.0,0.788704,1.40485,-1.075,-1.075,-1.075,0,0,-0.764729,-1.6159,-1.52428
4,1.0,0.0,0.0,0.0,0.0,0.0,0.628723,1.35377,-1.075,-1.075,-1.075,-1.075,0,-0.764729,-1.6159,-1.52428
5,1.0,0.0,0.0,0.0,0.0,0.0,0.525206,1.29247,-1.075,-1.075,-1.075,-1.075,-1.075,-0.764729,0.64384,-0.410656
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9517685,0.0,0.0,0.0,1.0,0.0,0.0,-1.14989,-0.576981,0,0,0,0,0,2.852735,1.63813,-0.881804
9517686,0.0,0.0,0.0,1.0,0.0,0.0,-0.942855,-0.464609,0,2.66574,0,0,0,2.852735,-1.16395,-0.196498
9517687,0.0,0.0,0.0,1.0,0.0,0.0,-0.72641,-0.464609,2.40684,0,2.66574,0,0,2.852735,-1.16395,0.0176603
9517688,0.0,0.0,0.0,1.0,0.0,0.0,-0.679357,-0.413531,2.63543,2.40684,0,2.66574,0,2.852735,-0.892786,-0.624814


In [28]:
x.to_csv(os.path.join(preprocessed_data_path, 'train_x_without_outliers.gz'), index=False)

In [6]:
%%time
x_loaded = pd.read_csv(os.path.join(preprocessed_data_path, 'train_x_without_outliers.gz'))

CPU times: user 31.6 s, sys: 940 ms, total: 32.5 s
Wall time: 32.6 s


In [7]:
x_loaded

Unnamed: 0,Education,Entertainment/public assembly,Lodging/residential,Office,Other,Public services,air_temperature,dew_temperature,meter_reading_1,meter_reading_2,meter_reading_3,meter_reading_4,meter_reading_5,square_feet,wind_direction,wind_speed
0,1.0,0.0,0.0,0.0,0.0,0.0,0.995738,1.292475,-1.074995,0.000000,0.000000,0.000000,0.000000,-0.764729,-1.615903,-1.524278
1,1.0,0.0,0.0,0.0,0.0,0.0,0.939274,1.404847,-1.074995,-1.074995,0.000000,0.000000,0.000000,-0.764729,-0.983175,-0.881804
2,1.0,0.0,0.0,0.0,0.0,0.0,0.788704,1.404847,-1.074995,-1.074995,-1.074995,0.000000,0.000000,-0.764729,-1.615903,-1.524278
3,1.0,0.0,0.0,0.0,0.0,0.0,0.628723,1.353769,-1.074995,-1.074995,-1.074995,-1.074995,0.000000,-0.764729,-1.615903,-1.524278
4,1.0,0.0,0.0,0.0,0.0,0.0,0.525206,1.292475,-1.074995,-1.074995,-1.074995,-1.074995,-1.074995,-0.764729,0.643840,-0.410656
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9517684,0.0,0.0,0.0,1.0,0.0,0.0,-1.149889,-0.576981,0.000000,0.000000,0.000000,0.000000,0.000000,2.852735,1.638126,-0.881804
9517685,0.0,0.0,0.0,1.0,0.0,0.0,-0.942855,-0.464609,0.000000,2.665735,0.000000,0.000000,0.000000,2.852735,-1.163955,-0.196498
9517686,0.0,0.0,0.0,1.0,0.0,0.0,-0.726410,-0.464609,2.406838,0.000000,2.665735,0.000000,0.000000,2.852735,-1.163955,0.017660
9517687,0.0,0.0,0.0,1.0,0.0,0.0,-0.679357,-0.413531,2.635431,2.406838,0.000000,2.665735,0.000000,2.852735,-0.892786,-0.624814


#### Dev meter data

In [6]:
%%time
_, x = preprocess_and_merge_data(
    building_metadata, train_weather, dev_meter_data, buildings_fu, weather_without_outliers_fu, meter_fu, fit=False
)

CPU times: user 1min 6s, sys: 11.9 s, total: 1min 18s
Wall time: 57min 16s


In [18]:
x

Unnamed: 0,Education,Entertainment/public assembly,Lodging/residential,Office,Other,Public services,air_temperature,dew_temperature,meter_reading_1,meter_reading_2,meter_reading_3,meter_reading_4,meter_reading_5,square_feet,wind_direction,wind_speed
1,1.0,0.0,0.0,0.0,0.0,0.0,0.995738,1.29247,-1.075,0,0,0,0,-0.764729,-1.6159,-1.52428
2,1.0,0.0,0.0,0.0,0.0,0.0,0.939274,1.40485,-1.075,-1.075,0,0,0,-0.764729,-0.983175,-0.881804
3,1.0,0.0,0.0,0.0,0.0,0.0,0.788704,1.40485,-1.075,-1.075,-1.075,0,0,-0.764729,-1.6159,-1.52428
4,1.0,0.0,0.0,0.0,0.0,0.0,0.628723,1.35377,-1.075,-1.075,-1.075,-1.075,0,-0.764729,-1.6159,-1.52428
5,1.0,0.0,0.0,0.0,0.0,0.0,0.525206,1.29247,-1.075,-1.075,-1.075,-1.075,-1.075,-0.764729,0.64384,-0.410656
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9517685,0.0,0.0,0.0,1.0,0.0,0.0,-1.14989,-0.576981,0,0,0,0,0,2.852735,1.63813,-0.881804
9517686,0.0,0.0,0.0,1.0,0.0,0.0,-0.942855,-0.464609,0,2.66574,0,0,0,2.852735,-1.16395,-0.196498
9517687,0.0,0.0,0.0,1.0,0.0,0.0,-0.72641,-0.464609,2.40684,0,2.66574,0,0,2.852735,-1.16395,0.0176603
9517688,0.0,0.0,0.0,1.0,0.0,0.0,-0.679357,-0.413531,2.63543,2.40684,0,2.66574,0,2.852735,-0.892786,-0.624814


In [28]:
x.to_csv(os.path.join(preprocessed_data_path, 'dev_x_without_outliers.gz'), index=False)

In [6]:
%%time
x_loaded = pd.read_csv(os.path.join(preprocessed_data_path, 'dev_x_without_outliers.gz'))

CPU times: user 31.6 s, sys: 940 ms, total: 32.5 s
Wall time: 32.6 s


In [7]:
x_loaded

Unnamed: 0,Education,Entertainment/public assembly,Lodging/residential,Office,Other,Public services,air_temperature,dew_temperature,meter_reading_1,meter_reading_2,meter_reading_3,meter_reading_4,meter_reading_5,square_feet,wind_direction,wind_speed
0,1.0,0.0,0.0,0.0,0.0,0.0,0.995738,1.292475,-1.074995,0.000000,0.000000,0.000000,0.000000,-0.764729,-1.615903,-1.524278
1,1.0,0.0,0.0,0.0,0.0,0.0,0.939274,1.404847,-1.074995,-1.074995,0.000000,0.000000,0.000000,-0.764729,-0.983175,-0.881804
2,1.0,0.0,0.0,0.0,0.0,0.0,0.788704,1.404847,-1.074995,-1.074995,-1.074995,0.000000,0.000000,-0.764729,-1.615903,-1.524278
3,1.0,0.0,0.0,0.0,0.0,0.0,0.628723,1.353769,-1.074995,-1.074995,-1.074995,-1.074995,0.000000,-0.764729,-1.615903,-1.524278
4,1.0,0.0,0.0,0.0,0.0,0.0,0.525206,1.292475,-1.074995,-1.074995,-1.074995,-1.074995,-1.074995,-0.764729,0.643840,-0.410656
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9517684,0.0,0.0,0.0,1.0,0.0,0.0,-1.149889,-0.576981,0.000000,0.000000,0.000000,0.000000,0.000000,2.852735,1.638126,-0.881804
9517685,0.0,0.0,0.0,1.0,0.0,0.0,-0.942855,-0.464609,0.000000,2.665735,0.000000,0.000000,0.000000,2.852735,-1.163955,-0.196498
9517686,0.0,0.0,0.0,1.0,0.0,0.0,-0.726410,-0.464609,2.406838,0.000000,2.665735,0.000000,0.000000,2.852735,-1.163955,0.017660
9517687,0.0,0.0,0.0,1.0,0.0,0.0,-0.679357,-0.413531,2.635431,2.406838,0.000000,2.665735,0.000000,2.852735,-0.892786,-0.624814
