# Imports

In [32]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib as mpl
mpl.style.use('default')
import glob
import dask
import dask.dataframe as dd
from dask.distributed import Client, LocalCluster, progress, fire_and_forget
from dask import delayed

%matplotlib inline

# Setting up LocalCluster & run it
Might not be needed, but experienced that `processes=False, n_workers=1` are the best options to have.

In [2]:
#cluster = LocalCluster(processes=False, n_workers=8)
cluster = LocalCluster(processes=False, n_workers=1)
#cpu_worker = cluster.workers[0]
#cpu_worker.name = 'cpu'
#cpu_worker.set_resources(CPU=90)

client=Client(cluster, processes=True)

In [3]:
cluster

In [4]:
client

0,1
Client  Scheduler: inproc://192.168.0.100/22836/1  Dashboard: http://localhost:8787/status,Cluster  Workers: 1  Cores: 8  Memory: 17.02 GB


# Defining functions

In [17]:
def read_weather_data():
    """
    Reads in the weather Pandas DataFrame.
    :return: Pandas DataFrame
    """
    # Check if UTC to gmt+1 conversion is being handled correctly
    weather = pd.read_csv('F://datc//opschaler//weather_data//knmi_10_min_raw_data//output//df_combined_uncleaned.csv',
                          delimiter='\t', comment='#',
                          parse_dates=['datetime'])
    weather = weather.set_index(['datetime'])
    weather = weather.astype('float32')
    return weather


def smartmeter_data():
    """
    Reads in the file paths and dwelling id's of the smartmeter data.
    :return: file_paths, dwelling_ids, both as lists.
    """
    path = 'F:\\datc\\opschaler\\smartmeter_data\\'
    file_paths = np.array(glob.glob(path + "*.csv"))

    print('Detected %s smartmeter_data files.' % len(file_paths))
    dwelling_ids = np.array(list((map(lambda x: x[-15:-4], file_paths))))

    return file_paths, dwelling_ids


@delayed(nout=2)
def clean_prepare_smart_gas(file_path, dwelling_id):
    """
    Input is a dwelling_id.csv file.
    Output are cleaned & prepared dataframes (smart, gas).

    :param file_path: path to 'dwelling_id.csv' file
    :return: Smart and gas Pandas DataFrames
    """
    df = pd.read_csv(file_path, delimiter=';', header=0)
    df = df.rename(index=str, columns={'Timestamp': 'datetime', 'gasTimestamp': 'datetime'})

    smart = df.iloc[:, :7]
    gas = df.iloc[:, 7:]
    
    del df
    
    try:
        smart['datetime'] = pd.to_datetime(smart['datetime'])
        gas['datetime'] = pd.to_datetime(gas['datetime'])
    except:
        print('datetime column contains non-datetime values')
        smart = clean_datetime(smart)
        gas = clean_datetime(gas)
        smart['datetime'] = pd.to_datetime(smart['datetime'])
        gas['datetime'] = pd.to_datetime(gas['datetime'])

    smart = smart.set_index(['datetime'])
    gas = gas.set_index(['datetime'])

    smart = smart.astype(dtype='float32')
    gas = gas.astype(dtype='float32')

    return smart, gas


@delayed
def clean_datetime(df):
    """
    TODO: Speed up the function
    Input should be a df with a column called 'datetime'.
    This function checks wether a row in the df.datetime column can be parsed to a Pandas datetime object,
    by trying pd.to_datetime() on it.
    If it fails it will replace that row with np.nan().
    Finally this function will return the df with the NaN rows dropped.
    It only drops the row if the datetime column contains a NaN.

    :param df: Pandas DataFrame containing a datetime column called 'datetime'.
    :return: Pandas DataFrame
    """
    for i in range(len(df)):
        try:
            pd.to_datetime(df.datetime[i])
        except ValueError:
            print('-----')
            print('ValueError at index = %s' % i)
            print(df.datetime[i])
            df.datetime = df.datetime.replace(df.datetime[i], np.nan)
    df = df.dropna(subset=['datetime'])
    return df


@delayed(nout=3)
def resample_dfs(smart, gas, weather):
    smart = smart.resample('10s').mean()
    gas = gas.resample('H').mean()
    weather = weather.resample('10min').mean()
    return smart, gas, weather


@delayed
def create_hour_df(smart, gas, weather, dwelling_id):
    gas['gasPower'] = gas['gasMeter'].diff()  # Calculate gasPower column
    gas['gasPower'][0] = gas['gasPower'][1]  # Replace 1st entry (NaN) with 2nd entry
    smart = smart.resample('H').mean()  # Down sample smart
    weather = weather.resample('H').mean()  # Down sample weather
    # Combine gas, smart, weather
    df_hour = pd.merge(smart, gas, left_index=True, right_index=True)
    df_hour = pd.merge(df_hour, weather, left_index=True, right_index=True)
    df_hour['dwelling'] = dwelling_id
    
    return df_hour


@delayed
def create_10s_df(smart, gas, weather, dwelling_id):
    gas = gas.resample('10s').ffill()  # Up sample gas to 10s
    # Calculate gasPower column, is this rhe right way? Or should we ffill it?
    # Currently this code makes it so there is one gasPower value per hour, we could ffill this also?
    gas['gasPower'] = gas['gasMeter'].diff()
    gas['gasPower'][0] = gas['gasPower'][1]  # Replace 1st entry (NaN) with 2nd entry
    weather = weather.resample('10s').ffill()  # forward fill because the raw data is the 10 minute mean
    # Combine gas, smart, weather
    df_10s = pd.merge(smart, gas, left_index=True, right_index=True)
    df_10s = pd.merge(df_10s, weather, left_index=True, right_index=True)
    df_10s['dwelling'] = dwelling_id
    return df_10s


@delayed
def plot_nans(df, dwelling_id, resample_to):
    """
    Create a heatmap of the NaNs in the input DataFrame.
    :param df: Pandas DataFrame
    :param df: String to resample to, for example '1T' or 'H'
    :param dwelling_id: String
    :return: Seaborn heatmap as a Figure
    """
    plt.clf()
    df = df.isnull()
    # Downsample to make all data visible
    df = df.resample(resample_to).sum()  # Downsample to make small NaNs visible
    df = df.apply(lambda x: x > 0, 1)  # Replace values >0 with 1

    # Reindex datetimes
    # https://stackoverflow.com/questions/41046630/set-time-formatting-on-a-datetime-index-when-plotting-pandas-series
    try:
        df.index = df.index.to_period('D')
    except:
        print('plot_nans could not set df.index.to_period')

    # Plot heatmap
    n = int(len(df)*0.1)  # Choose amount of yticklabels to show

    try:
        fig = sns.heatmap(df, cmap='Reds', square=False, vmin=0, cbar=False, yticklabels=n*2, cbar_kws={})
    except TypeError:
        print('plot_nans ValueError')
        fig = sns.heatmap(df, cmap='Reds', square=False, vmin=0, cbar=False, cbar_kws={})

    # Set cbar ticks manually
    #cbar = fig.collections[0].colorbar
    #cbar.set_ticks([0, 1])
    #cbar.set_ticklabels(['Not NaN', 'NaN'])

    # Correct layout
    fig.invert_yaxis()
    fig.tick_params(axis='x', rotation=90)
    fig.tick_params(axis='y', rotation=0)
    fig.set(xlabel='Column [-]', ylabel='Index [-]')
    plt.title('Dwelling ID: '+dwelling_id)

    fig = fig.get_figure()
    #fig.tight_layout()
    #fig.show()
    #print('Saving heatmap')
    #fig.savefig('F://datc//opschaler//nan_information//figures//' + dwelling_id + '.png', dpi=1200)
    #savefig crashes dask
    
    return fig


@delayed
def df_nan_checker(df, threshold_percentage):
    """
    TODO: Parellalize, as in one column per core/worker?
    Checks each column in the input dataframe for NaNs.
    Outputs the amount of NaNs behind each other, including the start and stop index, per column as a sublist.
    For example when the dataframe has three columns.
    Output is in the form of:
    [[column_one_info], [column_two_info], [column_three_info]]
    With the column_..._info being in the form of:
    [start_index, stop_index, amount_of_NaNs]

    :param df: Pandas DataFrame
    :param threshold_percentage: Filter output based on NaN streaks being larger than x % of the total length of the dataframe.
    :return: Pandas DataFrame
    """
    columns = df.columns
    df = df.isnull()
    output = []
    length = len(columns)
    
    
    @delayed
    def check_rows(df, column_name):
        column_info = []
        temp = []
        x = False

        for j, value in enumerate(df[column_name]):
            if x == False and value == True:
                temp.append(df.index[j])
                x = True
            elif x == True and value == True:
                temp.append(df.index[j])
            elif x == True and value == False:
                column_info.append(temp)
                temp = []
                x = False

        lengths = []

        for array in column_info:
            lengths.append([array[0], array[-1], len(array)])

        return lengths

    
    for i in range(length):
        lengths = check_rows(df, columns[i])
        output.append(lengths)
    
    @delayed
    def list_to_df(output):
        # Convert df_info to a readable dataframe instead of list

        """
        Row per column from the 'output' list
        Columns: start-index, stop-index, NaN streak
        """

        df_info = pd.DataFrame(columns=['Column name', 'Start index', 'Stop index', 'Amount of NaNs'])
        length = len(output)
        column_names = []
        starts = []
        stops = []
        amounts = []

        for column in range(length):
            #print('At iteration %s of %s' % (column, length))
            for i in range(len(output[column])):
                column_names.append(df.columns[column])
                starts.append(output[column][i][0])
                stops.append(output[column][i][1])
                amounts.append(output[column][i][2])

        print('Appending NaN info to df')
        # Convert list to pd series
        column_names = pd.Series(column_names)
        starts = pd.Series(starts)
        stops = pd.Series(stops)
        amounts = pd.Series(amounts)
        # Append pd series to a column
        df_info['Column name'] = column_names.values
        df_info['Start index'] = starts.values
        df_info['Stop index'] = stops.values
        df_info['Amount of NaNs'] = amounts.values

        percentage = (df_info['Amount of NaNs'] / len(df)) * 100
        df_info.drop(df_info[percentage < threshold_percentage].index, inplace=True)
        return df_info

    df_info = list_to_df(output)
    
    return df_info


def save_df_unprocessed(df, dwelling_id):
    """
    Save unprocessed dataframe.
    :param df: Pandas DataFrame
    :param dwelling_id: String
    :return: None
    """
    dir = 'F://datc//opschaler//combined_gas_smart_weather_dfs//unprocessed//'
    df.to_csv(dir + dwelling_id + '.csv', sep='\t', index=True)
    print('Saved unprocessed df: %s' % dwelling_id)
    return


@delayed
def drop_nan_streaks_above_threshold(df, df_nan_table, thresholds):
    """
    Drops NaN streaks from the df when they are larger then the threshold value.
    This function also inputs df_nan_table because it already has been made in the smart_gas_nan_checker.
    :param df: Pandas DataDrame to process NaNs off
    :param df_nan_table: NaN info Pandas DataFrame of the input df
    :param thresholds: Dictionary {'column_name':column_threshold}, column_threshold has to be an integer.
    :return: Pandas DataFrame
    """
    df_nan_table = df_nan_table.compute()

    # Check for NaN streaks > threshold and drop them from the df
    length = len(df_nan_table['Amount of NaNs'])
    print('df_nan_table length: %s' % length)

    indices_to_drop = []
    for i, amount in enumerate(df_nan_table['Amount of NaNs']):
        selected_column = df_nan_table['Column name'][i]
        try:
            if amount > thresholds[selected_column]:
                start_index = (df_nan_table['Start index'][i])
                stop_index = (df_nan_table['Stop index'][i])
                indices = df[start_index:stop_index].index
                print('Enumeration %s of %s | From \t %s \t to \t %s | column %s | NaN streak length: %s'
                      % (i, length, start_index, stop_index, selected_column, (len(indices))))
                try:
                    indices_to_drop += indices
                except:
                    print('Could not add indices to indices_to_drop list')
            else:
                #print('amount < threshold')
                pass
        except:
            #print('No threshold detected for %s' % selected_column)
            pass

    print('Dropping NaN streaks > threshold')
    l1 = len(df)
    df = df.drop(indices_to_drop)
    l2 = len(df)
    print('Removed %s rows' % (l1-l2))
    return df


def save_df_processed(df, dwelling_id):
    """
    Save interpolated dataframe.
    :param df: Pandas DataFrame
    :param dwelling_id: String
    :return: None
    """
    dir = 'F://datc//opschaler//combined_gas_smart_weather_dfs//processed//'
    df.to_csv(dir + dwelling_id + '.csv', sep='\t', index=True)
    print('Saved processed df: %s' % dwelling_id)
    return


# Main loop

In [73]:
%%time

client.restart()

weather = read_weather_data()

file_paths, dwelling_ids = smartmeter_data()

file_paths = file_paths[:5]

dfs_hour = []
dfs_10s = []
dfs_nan_table_10s = []
dfs_nan_table_hour = []

dfs_10s_partly_processed = []
dfs_hour_partly_processed = []

smarts = []
gass = []


for i, path in enumerate(file_paths):
    dwelling_id = dwelling_ids[i]
    
    smart, gas = clean_prepare_smart_gas(path, dwelling_id)
    
    # client.persist: Start computing these variables and keep them in memory
    smart = smart.persist()
    gas = gas.persist()

    smart, gas, weather_rs = resample_dfs(smart, gas, weather)
    
    smart = smart.persist()
    gas = gas.persist()
    weather_rs = weather_rs.persist()
    
    df_hour = create_hour_df(smart, gas, weather, dwelling_id)
    df_10s = create_10s_df(smart, gas, weather, dwelling_id)
    
    df_hour = df_hour.persist()
    df_10s = df_10s.persist()
    
    #Slow, plus low cpu usage...
    #fig = plot_nans(df_10s, dwelling_id+' 10s sample rate', '1T')
    
    df_nan_table_10s = df_nan_checker(df_10s, 0)
    df_nan_table_hour = df_nan_checker(df_hour, 0)
    
    df_nan_table_10s = df_nan_table_10s.persist()
    df_nan_table_hour = df_nan_table_hour.persist()
    
    thresholds_10s = {'eMeter': 6, 'ePower': 6, 'gasMeter': 72, 'T': 36, 'Q': 18}
    df_10s_partly_processed = drop_nan_streaks_above_threshold(df_10s, df_nan_table_10s, thresholds_10s)
    df_10s_partly_processed = df_10s_partly_processed.persist()
    
    thresholds_hour = {'eMeter': 2, 'ePower': 2, 'gasMeter': 2, 'T': 1, 'Q': 1}
    df_hour_partly_processed = drop_nan_streaks_above_threshold(df_hour, df_nan_table_hour, thresholds_hour)
    df_hour_partly_processed = df_hour_partly_processed.persist()
    
    dfs_hour.append(df_hour)
    dfs_10s.append(df_10s)
    dfs_nan_table_10s.append(df_nan_table_10s)
    dfs_nan_table_hour.append(df_nan_table_hour)
    
    dfs_10s_partly_processed.append(df_10s_partly_processed)
    dfs_hour_partly_processed.append(df_hour_partly_processed)

Detected 56 smartmeter_data files.
Wall time: 2 s


# Compute stuff

In [None]:
dfs_10s_results = []
dfs_hour_results = []

dfs_nan_table_10s_results = []
dfs_nan_table_hour_results = []

dfs_10s_partly_processed_results = []
dfs_hour_partly_processed_results = []

for i in range(len(dfs_nan_table_10s)):
    dfs_10s_results.append(client.compute(dfs_10s[i].compute()))
    dfs_hour_results.append(client.compute(dfs_hour[i].compute()))
    
    dfs_nan_table_10s_results.append(client.compute(dfs_nan_table_10s[i].compute()))
    dfs_nan_table_hour_results.append(client.compute(dfs_nan_table_hour[i].compute()))
    
    dfs_10s_partly_processed_results.append(client.compute(dfs_10s_partly_processed[i].compute()))
    dfs_hour_partly_processed_results.append(client.compute(dfs_hour_partly_processed[i].compute()))
    
    

Appending NaN info to dfAppending NaN info to df

df_nan_table length: 8
df_nan_table length: 923Enumeration 0 of 8 | From 	 2017-03-08 01:00:00 	 to 	 2017-03-08 06:00:00 | column eMeter | NaN streak length: 6

Enumeration 4 of 8 | From 	 2017-03-08 01:00:00 	 to 	 2017-03-08 06:00:00 | column ePower | NaN streak length: 6
Enumeration 6 of 8 | From 	 2017-03-08 01:00:00 	 to 	 2017-03-08 06:00:00 | column gasMeter | NaN streak length: 6
Dropping NaN streaks > threshold
Removed 6 rows
Enumeration 67 of 923 | From 	 2017-03-07 22:50:40 	 to 	 2017-03-07 23:50:50 | column eMeter | NaN streak length: 362
Enumeration 74 of 923 | From 	 2017-03-08 00:59:50 	 to 	 2017-03-08 07:10:20 | column eMeter | NaN streak length: 2224
Enumeration 671 of 923 | From 	 2017-03-07 22:50:40 	 to 	 2017-03-07 23:50:50 | column ePower | NaN streak length: 362
Enumeration 678 of 923 | From 	 2017-03-08 00:59:50 	 to 	 2017-03-08 07:10:20 | column ePower | NaN streak length: 2224
Enumeration 906 of 923 | From 

In [72]:
dfs_hour_partly_processed_results[2]

Unnamed: 0_level_0,eMeter,eMeterReturn,eMeterLow,eMeterLowReturn,ePower,ePowerReturn,gasMeter,gasPower,DD,DR,...,Q,RG,SQ,T,T10,TD,U,VV,WW,dwelling
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-03-07 07:00:00,3673.515869,0.0,4072.480957,0.0,297.885620,0.0,4419.051758,0.008301,283.466675,0.0,...,0.000000,0.000000,0.000000,5.516667,4.400000,4.916667,95.166664,10295.000000,4.666667,P01S01W0373
2017-03-07 08:00:00,3673.727783,0.0,4072.480957,0.0,233.359558,0.0,4419.060059,0.008301,273.100006,0.0,...,5.333333,0.000000,0.000000,5.933333,5.083333,5.216667,94.500000,10565.000000,3.333333,P01S01W0373
2017-03-07 09:00:00,3673.888916,0.0,4072.480957,0.0,122.046478,0.0,4419.120117,0.060059,281.450012,0.0,...,24.333334,0.000000,0.000000,6.466667,5.966667,5.800000,95.000000,11416.666992,2.000000,P01S01W0373
2017-03-07 10:00:00,3674.018555,0.0,4072.480957,0.0,150.862366,0.0,4419.120117,0.000000,282.100006,100.0,...,64.000000,0.000000,0.000000,7.016666,6.866667,5.916667,92.000000,11550.000000,23.333334,P01S01W0373
2017-03-07 11:00:00,3674.156494,0.0,4072.480957,0.0,130.028168,0.0,4419.120117,0.000000,286.850006,2.0,...,115.000000,0.000000,0.000000,7.566667,7.716667,6.183333,90.500000,13883.333008,32.500000,P01S01W0373
2017-03-07 12:00:00,3674.389160,0.0,4072.480957,0.0,272.143677,0.0,4419.120117,0.000000,288.800018,0.0,...,139.666672,0.000000,0.000000,7.850000,8.200000,6.066667,88.166664,22116.666016,14.666667,P01S01W0373
2017-03-07 13:00:00,3674.567139,0.0,4072.480957,0.0,131.210678,0.0,4419.120117,0.000000,297.966675,0.0,...,191.333328,0.000000,0.000000,7.983334,8.483334,5.966667,86.500000,19116.666016,2.000000,P01S01W0373
2017-03-07 14:00:00,3674.705078,0.0,4072.480957,0.0,136.890137,0.0,4419.335938,0.215820,301.066681,0.0,...,347.666656,0.000000,6.279667,8.866667,9.516666,5.650000,79.833336,31900.000000,1.000000,P01S01W0373
2017-03-07 15:00:00,3674.844482,0.0,4072.480957,0.0,167.316010,0.0,4419.651855,0.315918,305.450012,0.0,...,319.000000,0.000000,7.631617,8.733334,9.133333,4.883333,76.500000,31916.666016,1.333333,P01S01W0373
2017-03-07 16:00:00,3675.061279,0.0,4072.480957,0.0,231.542130,0.0,4420.041992,0.390137,288.200012,0.0,...,391.000000,0.000000,10.000000,9.116667,10.750000,5.416667,77.166664,23800.000000,2.666667,P01S01W0373


In [None]:
zz = []
for i in range(len(dfs_10s_partly_processed)):
    dwelling_id = dwelling_ids[i]
    df = dfs_10s_partly_processed[i].compute()
    z = client.submit(save_df_processed, df, dwelling_id+'_10s')
    zz.append(z) # This makes it run in parallel?
    print('Finished saving %s' % i)

Appending NaN info to df
Appending NaN info to df
df_nan_table length: 8
Enumeration 0 of 8 | From 	 2017-03-08 01:00:00 	 to 	 2017-03-08 06:00:00 | column eMeter | NaN streak length: 6
Enumeration 4 of 8 | From 	 2017-03-08 01:00:00 	 to 	 2017-03-08 06:00:00 | column ePower | NaN streak length: 6
Enumeration 6 of 8 | From 	 2017-03-08 01:00:00 	 to 	 2017-03-08 06:00:00 | column gasMeter | NaN streak length: 6
Dropping NaN streaks > threshold
Removed 6 rows
df_nan_table length: 923
Enumeration 67 of 923 | From 	 2017-03-07 22:50:40 	 to 	 2017-03-07 23:50:50 | column eMeter | NaN streak length: 362
Enumeration 74 of 923 | From 	 2017-03-08 00:59:50 	 to 	 2017-03-08 07:10:20 | column eMeter | NaN streak length: 2224
Enumeration 671 of 923 | From 	 2017-03-07 22:50:40 	 to 	 2017-03-07 23:50:50 | column ePower | NaN streak length: 362
Enumeration 678 of 923 | From 	 2017-03-08 00:59:50 	 to 	 2017-03-08 07:10:20 | column ePower | NaN streak length: 2224
Enumeration 906 of 923 | From 

In [None]:
zz = []
for i in range(len(dfs_hour_partly_processed)):
    dwelling_id = dwelling_ids[i]
    df = dfs_hour_partly_processed[i].compute()
    z = client.submit(save_df_processed, df, dwelling_id+'_hour')
    zz.append(z) # This makes it run in parallel?
    print('Finished saving %s' % i)

In [None]:
zz = []
for i in range(len(dfs_10s_partly_processed)):
    dwelling_id = dwelling_ids[i]
    df = dfs_10s_partly_processed[i].compute()
    z = client.submit(save_df_processed, df, dwelling_id+'_10s')
    zz.append(z) # This makes it run in parallel?
    print('Finished saving %s' % i)

In [None]:
zz = []
for i in range(len(dfs_hour_partly_processed)):
    dwelling_id = dwelling_ids[i]
    df = dfs_hour_partly_processed[i].compute()
    z = client.submit(save_df_processed, df, dwelling_id+'_hour')
    zz.append(z) # This makes it run in parallel?
    print('Finished saving %s' % i)

# Save dataframes
Some unprocessed ones take 230 seconds to save.
It is in parallel, but it is still slow.

In [None]:
"""
Little trick to force run this save function in parallel.
Force compute the df, then submit the save_df_unprocessed function to the scheduler. 
Loop over this, client will process save_df_unprocessed in the back end.
"""

%%time
for i in range (len(dfs_10s)):
    df = dfs_10s[i].compute()
    z = client.submit(save_df_unprocessed, df, (dwelling_ids[i]+'_10s'))

In [None]:
to_save

In [None]:
to_save[1].compute()

# Compute and save

In [None]:
%%time

dfs_hour = dask.compute(dfs_hour)
dfs_10s = dask.compute(dfs_10s)
dfs_nan_table_10s = dask.compute(dfs_nan_table_10s)
dfs_nan_table_hour = dask.compute(dfs_nan_table_hour)

In [None]:
%%time

for i in range(len(dfs_hour[0])):
    save_df_unprocessed(dfs_10s[0][i], dwelling_ids[i]+'_10s')
    save_df_unprocessed(dfs_hour[0][i], dwelling_ids[i]+'_hour')
    dfs_nan_table_10s[0][0][i].to_csv('F://datc//opschaler//nan_information//'+dwelling_ids[i]+'_10s.csv', sep='\t')
    dfs_nan_table_hour[0][0][i].to_csv('F://datc//opschaler//nan_information//' + dwelling_ids[i] + '_hour.csv', sep='\t')
    print('Finished iteration %s out of %s.' % (i, len(dfs_hour[0])))