In [None]:
import pandas as pd

from datetime import datetime, timedelta
from dateutil import tz

# if you encounter a "year is out of range" error the timestamp
# may be in milliseconds, try `ts /= 1000` in that case
# print(datetime.utcfromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S'))

## Weather data

Loading **HI-SEAS** weather data dataframe. Recorded between Mission IV and V. More info at https://www.kaggle.com/dronio/SolarEnergy/version/1.

In [None]:
weatherData = pd.read_csv('SolarPrediction.csv')

weatherData.columns

Cleaning time measures, from a UNIX timestamp, to a datetime object.

Converted to Hawaii timezone, and then manually adjusted by substracting 3 hours.

Solar radiation is measured in $\frac{Watts}{m^2}$. On the other hand, wind speed is in $\frac{miles}{hour}$, so it will converted to $\frac{km}{hour}$.

In [None]:
HawaiiZone = tz.gettz('US/Hawaii')
weatherData.loc[:, 'datetime'] = weatherData.UNIXTime.map(lambda unixTime: datetime.utcfromtimestamp(unixTime).astimezone(HawaiiZone) - timedelta(hours=3))
# Sorting dataframe by datetime
weatherData.sort_values('datetime')

# Converting wind speed from miles/hour to km/hour.
weatherData.loc[:, 'windSpeed'] = weatherData.Speed.map(lambda aSpeedInMiles : 1.60934 * aSpeedInMiles)

# Keeping just selected columns
selectedColumns = ['datetime', 'windSpeed', 'Radiation', 'TimeSunRise', 'TimeSunSet']
weatherData = weatherData[selectedColumns]

In [None]:
weatherData.head()

In [None]:
print(f'datetime values go from \n{weatherData.datetime.min()} to \n{weatherData.datetime.max()}.')

In [None]:
def isOctober(aDateTime):
    return aDateTime.month == 10
octoberData = weatherData[weatherData.datetime.map(isOctober)]

In [None]:
minimumDate = octoberData.datetime.min()
octoberData.loc[:, 'normalizeDateTime'] = octoberData.datetime.map(lambda aDate: aDate - minimumDate)
octoberData = octoberData.sort_values('normalizeDateTime')

In [None]:
15603/3600

In [None]:
import math

"""
    Converts a timedelta object to its DEVS time representation
"""
def formatDateToDEVSEvent(aTimedelta):
    seconds = aTimedelta.seconds
    hours = math.floor(seconds/3600) + aTimedelta.days * 24
    r = seconds%3600
    minutes = math.floor(r/60)
    r = r % 60
    return '%d:%02d:%02d:%03d' % (hours,minutes,r,0)

In [None]:
octoberData.loc[:, 'devsDateTime'] = octoberData.normalizeDateTime.map(formatDateToDEVSEvent)

In [None]:
# ports names
WIND_SPEED_PORT = 'wind_speed'
RADIATION_PORT = 'radiation'
POWER_CONSUMPTION_PORT = 'power_consumption'

In [None]:
octoberData.columns

In [None]:
# TODO: Clip radiation data when sun is down
# Minimum and maximum values in wind speed and radiation data
print(f'Radiation\t\tmin: {octoberData.Radiation.min()}\tmax: {octoberData.Radiation.max()}')
print(f'Wind Speed\t\tmin: {octoberData.windSpeed.min()}\tmax: {octoberData.windSpeed.max()}')

In [None]:
weatherData.windSpeed.quantile(.85)

In [None]:
def writeEvent(file, devsTime, port, value):
    file.write(f'{devsTime} {port} {value}\n')

with open('octoberData.ev', 'w+') as eventsFile:
    # adding constant power consumtpion
    writeEvent(eventsFile, '00:00:00:000', POWER_CONSUMPTION_PORT, 100)    
    for row in octoberData.iterrows():
        row = row[1]
        # radiation data
        writeEvent(eventsFile, row.devsDateTime, RADIATION_PORT, row.Radiation)
        # windspeed
        writeEvent(eventsFile, row.devsDateTime, WIND_SPEED_PORT, row.windSpeed)

## Power consumption data

In [None]:
loadData = pd.read_csv('february2009Consumption.csv', sep=';')

In [None]:
FORMAT_STRING = '%d/%m/%Y %H:%M:%S'
def mergeDateTimeIntoString(row):
    # Add zero padding to date and month
    dateSplits = row['Date'].split('/')
    date = "%02d/%02d/%d" % (int(dateSplits[0]),int(dateSplits[1]),int(dateSplits[2]))
    
    time = row['Time']
    
    # Format both in one string
    formattedDateTime = f'{date} {time}'
    
    return datetime.strptime(formattedDateTime, FORMAT_STRING)

loadData.loc[:, 'datetime'] = loadData.apply(mergeDateTimeIntoString, axis=1)
loadData.sort_values('datetime')

minimumDate = loadData.datetime.min()

loadData.loc[:, 'devsDateTime'] = loadData.datetime.apply(lambda aDateTime: formatDateToDEVSEvent(aDateTime - minimumDate))

In [None]:
# Cleaning unknown data
loadData = loadData[loadData.Global_active_power != '?']

# Convert string kWatt value to float Watt
loadData.loc[:, 'Global_active_power'] = loadData.Global_active_power.map(lambda aString: float(aString)*1000)

In [None]:
loadData = loadData[['devsDateTime', 'Global_active_power']]

## Merge both power consumption and weather dataframes

In [None]:
with open('mergedData.ev', 'w+') as eventsFile:
    # Write weather data to events file
    for row in octoberData.iterrows():
        row = row[1]
        # radiation data
        writeEvent(eventsFile, row.devsDateTime, RADIATION_PORT, row.Radiation)
        # windspeed
        writeEvent(eventsFile, row.devsDateTime, WIND_SPEED_PORT, row.windSpeed)
    # Write power consumption data to events file
    for row in loadData.iterrows():
        row = row[1]
        # load data
        writeEvent(eventsFile, row.devsDateTime, POWER_CONSUMPTION_PORT, row['Global_active_power'])