# Check if the dates in the files seem to make sense


# Imports 

In [None]:
import altair as alt
import numpy as np
import pandas as pd
from pathlib import Path
import datetime
import tqdm

# Configuration

In [None]:
PREPROCESSED_DIR = Path('/cw/dtaiproj/ml/2020-FLAIR-VITO/profile-clustering/new_preprocessed/') #Jonas
# PREPROCESSED_DIR = Path('/Users/lolabotman/PycharmProjects/FluviusFullData/profiles/preprocessed') #Lola

result_path = PREPROCESSED_DIR / 'combined'
result_path.mkdir(parents = True, exist_ok = True, mode = 0o770)

In [None]:
data_df = pd.read_pickle(result_path/'reindexed_DST_data.pkl')
info_df = pd.read_pickle(result_path/'reindexed_info.pkl')

In [None]:
data_df

In [None]:
info_df

In [None]:
info_df['unique_source'] = info_df['data_source']
is_prosumer = (info_df['data_source'] == 'Infrax') & info_df['heatpump']
info_df.loc[is_prosumer, 'unique_source'] = 'infrax_prosumer'
is_app = (info_df['data_source'] =='Infrax') & info_df.consumer_type.str.startswith('app')
info_df.loc[is_app, 'unique_source'] = 'infrax_'+info_df.loc[is_app, 'consumer_type']
info_df.unique_source.unique()

In [None]:
heatmaps = []
line_charts = []
for data_source, info_subset_df in info_df.groupby('unique_source'): 
    consumption_df = (
        data_df.loc[info_subset_df.index]
        # daily consumption
        .resample('1D', axis = 1).sum()
        # take median
        .median(axis = 0)
        .to_frame('value')
        
        .drop(pd.to_datetime('2016-02-29'), axis = 0)
        .reset_index()
        .assign(
            month = lambda x: x['index'].dt.month, 
            day = lambda x: x['index'].dt.day
        )
    )
#     display(consumption_df)
    heatmaps.append(
    alt.Chart(consumption_df, title = data_source, width = 800, height=400).mark_rect().encode(
        x = 'day:N', 
        y = 'month:N', 
        color = 'value:Q'
    ))
    line_charts.append(
    alt.Chart(consumption_df, title = data_source, width = 800, height = 400).mark_line().encode(
        x = 'index:T', 
        y = 'value:Q', 
    ))
    

# Heatmaps

In [None]:
(alt.vconcat(*heatmaps).resolve_scale(color  = 'independent')|alt.vconcat(*line_charts).resolve_scale(color  = 'independent'))

In [None]:
alt.vconcat(*line_charts).resolve_scale(color  = 'independent')

In [None]:
info_df.consumer_type.unique()