In [1]:
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt

In [16]:
def load_and_print_column_names(directory_path):

    for filename in os.listdir(directory_path):

        if filename.endswith('.csv'):
            print(filename)
            file_path = os.path.join(directory_path, filename)

            df = pd.read_csv(file_path)
            df = sum_features_excluding(df)
            
            '''print(f"Column names in '{filename}':")
            print(df.columns.tolist())
            print(df['use [kW]'], df_sum['Sum'])
            print('-' * 50)'''
            print(df['Sum'], df['use [kW]'])
    
def sum_features_excluding(df):
    
    exclude_columns = ['Date & Time', 'use [kW]', 'gen [kW]', 'Grid [kW]']
    
    
    valid_columns = [col for col in df.columns if col not in exclude_columns]
    
    
    df['Sum'] = df[valid_columns].sum(axis=1)
    
    return df

In [17]:
load_and_print_column_names('dataset/HomeB/')

HomeB-meter1_2014.csv
0        0.159486
1        0.561556
2        0.545698
3        0.617492
4        0.131881
           ...   
17515    0.768889
17516    0.474930
17517    0.514502
17518    0.427170
17519    0.333996
Name: Sum, Length: 17520, dtype: float64 0        0.304439
1        0.656771
2        0.612895
3        0.683979
4        0.197809
           ...   
17515    1.560890
17516    0.958447
17517    0.834462
17518    0.543863
17519    0.414441
Name: use [kW], Length: 17520, dtype: float64
HomeB-meter1_2015.csv
0        0.203536
1        0.208224
2        0.226490
3        0.101848
4        0.143047
           ...   
17515    0.421509
17516    0.337090
17517    0.580132
17518    0.259581
17519    0.521537
Name: Sum, Length: 17520, dtype: float64 0        0.300237
1        0.291522
2        0.290702
3        0.179216
4        0.214933
           ...   
17515    0.492082
17516    0.401793
17517    0.650312
17518    0.331039
17519    0.583719
Name: use [kW], Length: 17520, dtype

In [18]:
def append_dataframes(directory_path):
    # Initialize an empty DataFrame to store the combined data
    combined_df = pd.DataFrame()

    # Iterate through all files in the specified directory
    for filename in os.listdir(directory_path):
        # Check if the file is a CSV file
        if filename.endswith('.csv'):
            file_path = os.path.join(directory_path, filename)
            # Read the CSV file into a dataframe
            df = pd.read_csv(file_path)
            
            # Align columns by adding missing columns with 0 values
            for col in combined_df.columns:
                if col not in df.columns:
                    df[col] = 0  # Add missing columns with 0

            for col in df.columns:
                if col not in combined_df.columns:
                    combined_df[col] = 0  # Add missing columns to combined_df

            # Append the current dataframe to the combined dataframe
            combined_df = pd.concat([combined_df, df], ignore_index=True)
            print(f"Appended data from {filename}")

    return combined_df

In [19]:
# Example usage
directory_path = 'dataset/HomeB/'  # Replace with your directory path
combined_data = append_dataframes(directory_path)

# Display the combined dataframe
print(combined_data.head())

Appended data from HomeB-meter1_2014.csv
Appended data from HomeB-meter1_2015.csv
Appended data from HomeB-meter1_2016.csv
Appended data from HomeB-meter2_2014.csv
Appended data from HomeB-meter2_2015.csv
Appended data from HomeB-meter2_2016.csv
           Date & Time  use [kW]  gen [kW]  Grid [kW]   AC [kW]  Furnace [kW]  \
0  2014-01-01 00:00:00  0.304439       0.0   0.304439  0.000058      0.009531   
1  2014-01-01 00:30:00  0.656771       0.0   0.656771  0.001534      0.364338   
2  2014-01-01 01:00:00  0.612895       0.0   0.612895  0.001847      0.417989   
3  2014-01-01 01:30:00  0.683979       0.0   0.683979  0.001744      0.410653   
4  2014-01-01 02:00:00  0.197809       0.0   0.197809  0.000030      0.017152   

   Cellar Lights [kW]  Washer [kW]  First Floor lights [kW]  \
0            0.005336     0.000126                 0.011175   
1            0.005522     0.000043                 0.003514   
2            0.005504     0.000044                 0.003528   
3            0.

In [20]:
df = sum_features_excluding(combined_data)

In [24]:
df

Unnamed: 0,Date & Time,use [kW],gen [kW],Grid [kW],AC [kW],Furnace [kW],Cellar Lights [kW],Washer [kW],First Floor lights [kW],Utility Rm + Basement Bath [kW],...,First floor [kW],Tub Whirpool [kW],Kitchen counter [kW],Dishwasher [kW],Fridge [kW],Guest Bedroom / Media Room [kW],MBed KBed Lights &amp; MasterBath [kW],Living room &amp; Kitchen Lights [kW],Bath GFI (1st &amp; 2nd floor bath) [kW],Sum
0,2014-01-01 00:00:00,0.304439,0.0,0.304439,0.000058,0.009531,0.005336,0.000126,0.011175,0.003836,...,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.159486
1,2014-01-01 00:30:00,0.656771,0.0,0.656771,0.001534,0.364338,0.005522,0.000043,0.003514,0.003512,...,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.561556
2,2014-01-01 01:00:00,0.612895,0.0,0.612895,0.001847,0.417989,0.005504,0.000044,0.003528,0.003484,...,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.545698
3,2014-01-01 01:30:00,0.683979,0.0,0.683979,0.001744,0.410653,0.005556,0.000059,0.003499,0.003476,...,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.617492
4,2014-01-01 02:00:00,0.197809,0.0,0.197809,0.000030,0.017152,0.005302,0.000119,0.003694,0.003865,...,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.131881
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
843860,2016-12-15 22:25:00,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.006550,0.00075,0.005350,0.001333,0.003433,0.005550,0.018617,0.014667,0.002383,0.150600
843861,2016-12-15 22:26:00,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.006567,0.00075,0.005367,0.001333,0.003450,0.005567,0.018517,0.014617,0.002383,0.150633
843862,2016-12-15 22:27:00,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.006567,0.00075,0.005367,0.001333,0.003450,0.005583,0.018467,0.014733,0.002400,0.150383
843863,2016-12-15 22:28:00,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.006567,0.00075,0.005367,0.001350,0.003450,0.005600,0.018533,0.014750,0.002383,0.150400
