# Save the new datasets

This notebook is used to delete the data used in the measured_data and sensors datasets and upload the new datasets and the historical one to the s3 bucket so they can be used the next day to retrain the model.

In [1]:
# Imports for the s3 connection
# ==============================================================================
import pandas as pd
import boto3
import os

In [2]:
# Read the data connection variables
# ==============================================================================
key_id = os.getenv("AWS_ACCESS_KEY_ID")
secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
endpoint = os.getenv("AWS_S3_ENDPOINT")
bucket_name = os.getenv("AWS_S3_BUCKET")

In [3]:
# Create s3 connection
# ==============================================================================
s3_client = boto3.client(
    "s3",
    aws_access_key_id=key_id,
    aws_secret_access_key=secret_key,
    endpoint_url=endpoint,
    use_ssl=True
)

In [4]:
# Create a new folder and download the raw datasets
# ==============================================================================
os.makedirs("data", exist_ok=True)

s3_client.download_file(bucket_name, '/data/sensors_data.csv', 'data/sensors_data.csv')
s3_client.download_file(bucket_name, '/data/measured_value.csv', 'data/measured_value.csv')

In [5]:
# Select the number of days used as a prediction
# ==============================================================================
days = 1

In [6]:
# Delete the sensor data already used 
# ==============================================================================
sensors_data = pd.read_csv('data/sensors_data.csv')
new_sensors_data = sensors_data.drop(index=range(days)).reset_index(drop=True)
new_sensors_data.to_csv('upload_data/sensors_data.csv', sep=',', index=False, header=True)

In [7]:
# Delete the measured data already used 
# ==============================================================================
measured_value = pd.read_csv('data/measured_value.csv')
new_measured_value = measured_value.drop(index=range(days)).reset_index(drop=True)
new_measured_value.to_csv('upload_data/measured_value.csv', sep=',', index=False, header=True)

In [8]:
# Upload the new datasets
# ==============================================================================
s3_client.upload_file('upload_data/historical.csv', bucket_name, Key="data/historical.csv")
s3_client.upload_file('upload_data/sensors_data.csv', bucket_name, Key="data/sensors_data.csv")
s3_client.upload_file('upload_data/measured_value.csv', bucket_name, Key="data/measured_value.csv")