# READ ME
## INSTRUCTIONS
1. Make a copy of this file in and put your name next to it EX: writing_data_to_yourname (This is so we can do the same thing with multiple influx databases)
2. Create your database in influx, and a bucket for the data to go to (in this mine is called API Test)
3. Scroll down to the section at the bottom named "Change this to your database" and look for the comments #Replace with your own. Change token, org, bucket, and cloud_url
4. At the top toolbar go to runtime->restart session and run all
5. You will have to allow access to your drive so it can get the CSV file


The CSV file used in this code is a spreadsheet from one of the last groups measurements. Using the machine learning model, predictions were made for each appliance and added on. In the future, we would not have the columns named Actual/predicted_appliance name. We would only have measure the total power and then made predictions for each appliance.

## InfluxDB data handling integration
Instead of using csv as input file, we'll use the influxDB api to load in the data. Then we can use the machine learning model to disagregate it and write the new data back into influx. Reference on influxDB API here: https://www.influxdata.com/blog/time-series-forecasting-with-tensorflow-influxdb/

In [None]:
! pip install influxdb-client
!pip install tomorrow_io
!pip install requests
!pip install openmeteo-requests requests-cache retry-requests numpy pandas



Library dependencies

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests

import requests_cache
import openmeteo_requests
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Conv1D, MaxPooling1D, Flatten
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from influxdb_client import InfluxDBClient, Point
from influxdb_client.client.write_api import SYNCHRONOUS
from retry_requests import retry

load data

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# List of CSV files to use for training
csv_files = ['/content/drive/Shareddrives/Untitled shared drive/50_ResidentialPowerDisaggregation_SD_Fall23/1.2 Software/Colab Notebooks/ML Models/Andrew/Old Models/prediction_vs_actual.csv']  # Add more file names as needed

# Load and concatenate data from multiple CSV files
data_list = []
for csv_file in csv_files:
    data = pd.read_csv(csv_file)
    data_list.append(data)

# Concatenate data from all CSV files
data = pd.concat(data_list, ignore_index=True)

This prints the orignal data from the CSV file

fetch weather data

In [None]:
# Convert Timestamp to datetime objects
data['Timestamp'] = pd.to_datetime(data['Timestamp'], errors='coerce')
print(data['Timestamp'].head())

0   2024-01-22 03:02:00
1   2024-01-22 03:03:00
2   2024-01-22 03:04:00
3   2024-01-22 03:05:00
4   2024-01-22 03:06:00
Name: Timestamp, dtype: datetime64[ns]


Combine existing data with real-time weather data

In [None]:
cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = openmeteo_requests.Client(session=retry_session)

# Convert 'Timestamp' column to datetime format
data['Timestamp'] = pd.to_datetime(data['Timestamp'])

# Get the start and end dates from the 'Timestamp' column of your data
start_date = data['Timestamp'].min()
end_date = data['Timestamp'].max()



# Convert 'Timestamp' column to 'YYYY-MM-DD HH:mm:ss' format
#data['Timestamp'] = pd.to_datetime(data['Timestamp']).dt.strftime('%Y-%m-%dT%H:%M:%S')
data['Timestamp'] = data['Timestamp'].dt.strftime('%Y-%m-%d %H:00:00')

# Convert 'Timestamp' column back to datetime format
data['Timestamp'] = pd.to_datetime(data['Timestamp'])

# Fetch minute-by-minute temperature data from Open-Meteo API
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
    "latitude": latitude,
    "longitude": longitude,
    "start_date": start_date.strftime('%Y-%m-%d'),  # Use only the date part
    "end_date": end_date.strftime('%Y-%m-%d'),  # Use only the date part
    "hourly": "temperature_2m",
   	"temperature_unit": "fahrenheit",
	  "wind_speed_unit": "mph",
	  "precipitation_unit": "inch",
	  "timeformat": "unixtime",
	  "timezone": "America/New_York"
}
responses = openmeteo.weather_api(url, params=params)

response = responses[0]
hourly_data = response.Hourly()
hourly_temperature_2m = hourly_data.Variables(0).ValuesAsNumpy()

# Create a time range with hourly frequency
hourly_time_range = pd.date_range(
    start=pd.to_datetime(hourly_data.Time(), unit="s"),
    end=pd.to_datetime(hourly_data.TimeEnd(), unit="s"),
    freq=pd.Timedelta(seconds=hourly_data.Interval()),
    closed="left"
)

# Create a DataFrame with hourly temperature data
hourly_data = {"Timestamp": hourly_time_range, "temperature_2m": hourly_temperature_2m}
hourly_dataframe = pd.DataFrame(data=hourly_data)

# Merge hourly data with your existing data DataFrame on the 'Timestamp' column
merged_data = pd.merge(data, hourly_dataframe, how='left', left_on='Timestamp', right_on='Timestamp')


# Drop the 'Timestamp' column from the final output if needed
# merged_data = merged_data.drop(columns=['Timestamp'])

# Now, merged_data contains hourly temperature information alongside your existing data
merged_data

  hourly_time_range = pd.date_range(


Unnamed: 0,Total,Actual_Washer,Actual_BlowerGH,Actual_Lights,Actual_BlowerBed,Actual_CompGH,Actual_CompBed,Actual_Dryer,Actual_Recs1,Actual_Recs2,...,Predicted_Lights,Predicted_BlowerBed,Predicted_CompGH,Predicted_CompBed,Predicted_Dryer,Predicted_Recs1,Predicted_Recs2,Predicted_WaterHeater,Timestamp,temperature_2m
0,52.6,0.6,4.9,25.9,2.0,0.0,5.3,0.0,8.3,5.6,...,50.377780,-8.722680,4.118182,129.59215,-7.054536,23.361870,-20.186514,8.394237,2024-01-22 03:00:00,
1,968.6,0.7,6.2,25.4,227.8,21.1,672.7,0.0,8.8,5.9,...,49.016400,37.552692,44.333454,161.14293,1.098680,24.044157,-9.713423,15.486119,2024-01-22 03:00:00,
2,910.3,0.7,6.1,25.4,227.5,21.0,615.6,0.0,8.1,5.9,...,48.511833,253.663470,102.330124,316.01540,16.988558,24.677510,6.385436,26.801900,2024-01-22 03:00:00,
3,867.3,0.6,6.1,25.5,228.5,21.0,571.6,0.0,8.1,5.9,...,53.936330,355.601260,112.400040,439.14758,14.003014,26.278265,0.368196,2.480625,2024-01-22 03:00:00,
4,911.4,0.7,6.1,25.5,229.9,20.9,614.3,0.0,8.1,5.9,...,61.003914,328.448600,130.901280,616.89700,-3.106190,27.205340,-15.704743,-45.088432,2024-01-22 03:00:00,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1149,1924.6,6.4,8.9,41.7,225.5,25.2,629.4,970.1,10.1,7.3,...,32.020508,902.356100,600.841700,685.75290,37.130672,4.813627,26.974987,27.149942,2024-01-22 22:00:00,39.2864
1150,944.8,6.0,6.3,43.4,226.8,21.1,628.1,0.0,7.3,5.8,...,44.912987,554.754200,165.974460,632.74900,32.808086,11.331118,9.906139,27.406240,2024-01-22 22:00:00,39.2864
1151,949.1,6.0,6.4,43.5,227.9,21.2,631.0,0.0,7.3,5.8,...,46.085804,554.549600,125.079475,677.70750,27.612118,10.222755,4.091804,18.958097,2024-01-22 22:00:00,39.2864
1152,1981.5,6.4,9.2,41.9,225.9,25.6,628.0,1026.7,10.3,7.5,...,36.114090,867.376000,584.114140,687.21250,27.567614,7.031342,-0.502177,1.388543,2024-01-22 22:00:00,39.2864


Data has to be changed so it is within the 30 day retention period, this was already done for the csv file

In [None]:
data = data.drop(columns=['timestamp'])

In [None]:
data


Unnamed: 0,Total,Actual_Washer,Actual_BlowerGH,Actual_Lights,Actual_BlowerBed,Actual_CompGH,Actual_CompBed,Actual_Dryer,Actual_Recs1,Actual_Recs2,...,Predicted_BlowerGH,Predicted_Lights,Predicted_BlowerBed,Predicted_CompGH,Predicted_CompBed,Predicted_Dryer,Predicted_Recs1,Predicted_Recs2,Predicted_WaterHeater,Timestamp
0,52.6,0.6,4.9,25.9,2.0,0.0,5.3,0.0,8.3,5.6,...,5.458139,50.377780,-8.722680,4.118182,129.59215,-7.054536,23.361870,-20.186514,8.394237,2024-01-22 03:00:00
1,968.6,0.7,6.2,25.4,227.8,21.1,672.7,0.0,8.8,5.9,...,12.681525,49.016400,37.552692,44.333454,161.14293,1.098680,24.044157,-9.713423,15.486119,2024-01-22 03:00:00
2,910.3,0.7,6.1,25.4,227.5,21.0,615.6,0.0,8.1,5.9,...,26.024382,48.511833,253.663470,102.330124,316.01540,16.988558,24.677510,6.385436,26.801900,2024-01-22 03:00:00
3,867.3,0.6,6.1,25.5,228.5,21.0,571.6,0.0,8.1,5.9,...,27.296140,53.936330,355.601260,112.400040,439.14758,14.003014,26.278265,0.368196,2.480625,2024-01-22 03:00:00
4,911.4,0.7,6.1,25.5,229.9,20.9,614.3,0.0,8.1,5.9,...,24.843350,61.003914,328.448600,130.901280,616.89700,-3.106190,27.205340,-15.704743,-45.088432,2024-01-22 03:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1149,1924.6,6.4,8.9,41.7,225.5,25.2,629.4,970.1,10.1,7.3,...,120.094680,32.020508,902.356100,600.841700,685.75290,37.130672,4.813627,26.974987,27.149942,2024-01-22 22:00:00
1150,944.8,6.0,6.3,43.4,226.8,21.1,628.1,0.0,7.3,5.8,...,51.698124,44.912987,554.754200,165.974460,632.74900,32.808086,11.331118,9.906139,27.406240,2024-01-22 22:00:00
1151,949.1,6.0,6.4,43.5,227.9,21.2,631.0,0.0,7.3,5.8,...,41.504795,46.085804,554.549600,125.079475,677.70750,27.612118,10.222755,4.091804,18.958097,2024-01-22 22:00:00
1152,1981.5,6.4,9.2,41.9,225.9,25.6,628.0,1026.7,10.3,7.5,...,117.369200,36.114090,867.376000,584.114140,687.21250,27.567614,7.031342,-0.502177,1.388543,2024-01-22 22:00:00


# Change this to your database

In [None]:
# Provide connection details

#Replace with your own
token = "18lXbHfMGT1EohcHoApueNgeQbw9P6KoP0Y3y1-MfP_o9kliTS9-8WWbQtC_JSbR_I8WPVkAbgMe0k6m9U9Cbw=="
#Replace with your own
org = "NCSU"
#Replace with your own
bucket = "API Test2"

#Replace with your own
# InfluxDB Cloud URL
cloud_url = "https://us-east-1-1.aws.cloud2.influxdata.com"

# Establish InfluxDB connection
client = InfluxDBClient(url=cloud_url, token=token, org=org)

# Check if 'timestamp' and 'value' columns are present
if 'Timestamp' not in data.columns:
    raise ValueError("Column 'Timestamp' is required in the DataFrame.")

# Convert DataFrame to InfluxDB Points
points = data.apply(lambda row: Point("NILM")
                                     .field("Total", row["Total"])
                                     #.field("Temperature", row["Temperature"])
                                     .field("Actual_Washer", row["Actual_Washer"])
                                     .field("Predicted_Washer", row["Predicted_Washer"])
                                     .field("Actual_BlowerGH", row["Actual_BlowerGH"])
                                     .field("Predicted_BlowerGH", row["Predicted_BlowerGH"])
                                     .field("Actual_Lights", row["Actual_Lights"])
                                     .field("Predicted_Lights", row["Predicted_Lights"])
                                     .field("Actual_BlowerBed", row["Actual_BlowerBed"])
                                     .field("Predicted_BlowerBed", row["Predicted_BlowerBed"])
                                     .field("Actual_CompGH", row["Actual_CompGH"])
                                     .field("Predicted_CompGH", row["Predicted_CompGH"])
                                     .field("Actual_CompBed", row["Actual_CompBed"])
                                     .field("Predicted_CompBed", row["Predicted_CompBed"])
                                     .field("Actual_Dryer", row["Actual_Dryer"])
                                     .field("Predicted_Dryer", row["Predicted_Dryer"])
                                     .field("Actual_Recs1", row["Actual_Recs1"])
                                     .field("Predicted_Recs1", row["Predicted_Recs1"])
                                     .field("Actual_Recs2", row["Actual_Recs2"])
                                     .field("Predicted_Recs2", row["Predicted_Recs2"])
                                     .field("Actual_WaterHeater", row["Actual_WaterHeater"])
                                     .field("Predicted_WaterHeater", row["Predicted_WaterHeater"])
                                     .time(row["Timestamp"]), axis=1)

# Create a write API instance
write_api = client.write_api(write_options=SYNCHRONOUS)

# Write Points to InfluxDB
write_api.write(bucket=bucket, record=points)