In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC

In [4]:
def extract_datetime_features(df):
    df['hour'] = df['local_15min'].dt.hour
    df['minute'] = df['local_15min'].dt.minute
    df['day_of_week'] = df['local_15min'].dt.dayofweek
    df['day_of_month'] = df['local_15min'].dt.day
    df['month'] = df['local_15min'].dt.month
    df['weekend'] = df['local_15min'].dt.dayofweek >= 5  # True if it's a weekend (Saturday or Sunday)
    df['ev_present'] = df['ev_car'].apply(lambda x: 1 if x >=1 else 0)  # True if there is an electric vehicle present
    return df

In [5]:
def preprocess_data(df):
    # Ensure the datetime column is in the correct format
    df['local_15min'] = pd.to_datetime(df['local_15min'], utc=True)

    # Extract useful features from datetime
    df = extract_datetime_features(df)

    # Drop the original datetime column (or keep it if necessary)
    df = df.drop(columns=['local_15min', 'ev_car', 'solar', 'grid', 'total_usage'])

    # Encode categorical columns (if 'house_type', 'day_of_the_week', etc. are categorical)
    label_encoders = {}
    for column in ['house_type']:  # Add other categorical columns as needed
        label_encoders[column] = LabelEncoder()
        df[column] = label_encoders[column].fit_transform(df[column])

    # Extract features and label
    y = df['ev_present'].values
    df.drop(columns=['ev_present'], inplace=True)
    X = df.values  # Features (everything except 'ev_present')

    # Split into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)

    # Standardize the features (important for some models)
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    return X_train, X_test, y_train, y_test

In [18]:
def train_svm(X_train, y_train, X_test, y_test):
    # Initialize the SVM model
    clf = SVC(kernel='rbf', random_state=42)  # You can change kernel and other parameters

    # Train the model
    clf.fit(X_train, y_train)

    # Make predictions on the train and test sets
    y_pred_train = clf.predict(X_train)
    y_pred_test = clf.predict(X_test)

    # Evaluate the model
    train_acc = accuracy_score(y_train, y_pred_train)
    test_acc = accuracy_score(y_test, y_pred_test)

    # Precision, recall, and F1-score
    precision = precision_score(y_test, y_pred_test, average='binary', zero_division=0)
    recall = recall_score(y_test, y_pred_test, average='binary', zero_division=0)
    f1 = f1_score(y_test, y_pred_test, average='binary', zero_division=0)

    # Cross-validation scores
    scores = cross_val_score(clf, X_train, y_train, cv=5)
    
    # Print results
    print(f"Cross-validation scores: {scores}")
    print(f"Training Accuracy: {train_acc:.4f}")
    print(f"Test Accuracy: {test_acc:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("\nDetailed Classification Report:\n", classification_report(y_test, y_pred_test))


In [19]:
def main(df):
    # Preprocess the data
    X_train, X_test, y_train, y_test = preprocess_data(df)

    # Train and evaluate the Decision Tree model
    train_svm(X_train, y_train, X_test, y_test)

In [16]:
source = 'pecan_street'
area = 'austin'
house = 6139
df = pd.read_csv(f'../../Dataset/{source}/{area}/house_energy_compressed/{house}_compressed.csv', parse_dates=['local_15min'])

In [17]:
main(df)

Cross-validation scores: [0.96175799 0.9640411  0.94843007 0.94881066 0.93396765]
Training Accuracy: 0.9618
Test Accuracy: 0.9605
Precision: 0.0699
Recall: 0.0699
F1 Score: 0.0699

Detailed Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.98      0.98      8573
           1       0.07      0.07      0.07       186

    accuracy                           0.96      8759
   macro avg       0.52      0.52      0.52      8759
weighted avg       0.96      0.96      0.96      8759



In [20]:
houses = [661, 1642, 4373, 4767, 6139, 8156]
for house in houses:
    df = pd.read_csv(f'../../Dataset/{source}/{area}/house_energy_compressed/{house}_compressed.csv', parse_dates=['local_15min'])
    print(f"\nHouse {house}:")
    main(df)
    print("\n\n")


House 661:
Cross-validation scores: [0.9849667  0.98325404 0.98401522 0.97640343 0.96859536]
Training Accuracy: 0.9871
Test Accuracy: 0.9713
Precision: 0.9144
Recall: 0.6096
F1 Score: 0.7316

Detailed Classification Report:
               precision    recall  f1-score   support

           0       0.97      1.00      0.98      8197
           1       0.91      0.61      0.73       561

    accuracy                           0.97      8758
   macro avg       0.94      0.80      0.86      8758
weighted avg       0.97      0.97      0.97      8758





House 1642:
Cross-validation scores: [0.99153521 0.98037329 0.97806427 0.97421589 0.95362709]
Training Accuracy: 0.9813
Test Accuracy: 0.9344
Precision: 0.7090
Recall: 0.5810
F1 Score: 0.6387

Detailed Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.97      0.96      7798
           1       0.71      0.58      0.64       864

    accuracy                           0.93      8662

In [21]:
houses = [27, 3000, 5679, 9053]
area = 'new_york'
for house in houses:
    df = pd.read_csv(f'../../Dataset/{source}/{area}/house_energy_compressed/{house}_compressed.csv', parse_dates=['local_15min'])
    print(f"\nHouse {house}:")
    main(df)
    print("\n\n")


House 27:
Cross-validation scores: [0.98415094 0.99358491 0.99131748 0.98942997 0.99169498]
Training Accuracy: 0.9943
Test Accuracy: 0.9817
Precision: 0.9871
Recall: 0.7984
F1 Score: 0.8828

Detailed Classification Report:
               precision    recall  f1-score   support

           0       0.98      1.00      0.99      4034
           1       0.99      0.80      0.88       382

    accuracy                           0.98      4416
   macro avg       0.98      0.90      0.94      4416
weighted avg       0.98      0.98      0.98      4416





House 3000:
Cross-validation scores: [0.98301887 0.97735849 0.98528302 0.96224991 0.97395243]
Training Accuracy: 0.9905
Test Accuracy: 0.9783
Precision: 0.8216
Recall: 0.9754
F1 Score: 0.8919

Detailed Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.98      0.99      4010
           1       0.82      0.98      0.89       406

    accuracy                           0.98      4416
