In [87]:
import pandas as pd
import numpy as np

import matplotlib
import seaborn
import matplotlib.dates as md
from matplotlib import pyplot as plt

In [88]:
class Datasets:
    
    def load_dataset(dataset_name):
        return pd.read_csv(f'../../datasets/anomaly_detection/{dataset_name}.csv')

class ATSFDataset:
    
    _data_df = None
    
    class Columns:
        TIMESTAMP = 'timestamp'
        VALUE_F = 'value_f'
        VALUE_C = 'value_c'
        HOURS = 'hours'
        DAYLIGHT = 'daylight'
        TIME_EPOCH = 'time_epoch'
        DAY_OF_THE_WEEK = 'day_of_the_week'
        WEEK_DAY = 'week_day'
        CATEGORIES = 'categories'
        
    def load(self):
        
        self._data_df = Datasets.load_dataset('ambient_temperature_system_failure')
        
        df = self._data_df
        df[ATSFDataset.Columns.TIMESTAMP] = pd.to_datetime(df[ATSFDataset.Columns.TIMESTAMP])
        
        df.rename(columns={ 'value' : ATSFDataset.Columns.VALUE_F }, inplace=True, errors='raise')
        df[ATSFDataset.Columns.VALUE_C]= (self._data_df[ATSFDataset.Columns.VALUE_F]- 32) * 5 / 9
        
        df[ATSFDataset.Columns.HOURS] = df[ATSFDataset.Columns.TIMESTAMP].dt.hour
        df[ATSFDataset.Columns.DAYLIGHT] = ((df[ATSFDataset.Columns.HOURS] >= 7) \
            & (df[ATSFDataset.Columns.HOURS] <= 22)).astype(int)
        
        df[ATSFDataset.Columns.DAY_OF_THE_WEEK] = df[ATSFDataset.Columns.TIMESTAMP].dt.dayofweek
        df[ATSFDataset.Columns.WEEK_DAY] = (df[ATSFDataset.Columns.DAY_OF_THE_WEEK] < 5).astype(int)
        
        df[ATSFDataset.Columns.TIME_EPOCH] = (df[ATSFDataset.Columns.TIMESTAMP] \
            .astype(np.int64)/100000000000).astype(np.int64)
        
        df[ATSFDataset.Columns.CATEGORIES] = df[ATSFDataset.Columns.WEEK_DAY]*2 + df[ATSFDataset.Columns.DAYLIGHT]
        return self
    
    def get_df(self):
        return self._data_df
    
    def plot(self):
        n_rows = 1
        n_cols = 1
        
        fig, axs = plt.subplots(n_rows, n_cols)
        
        axis_1 = axs
        axis_1.set_title("Temperature By Date Time")
        axis_1.set_xlabel("Date And Time")
        axis_1.set_ylabel("Temp. C")
        
        axis_1.tick_params(axis='x', labelcolor='red', labelrotation=90, labelsize=10)
        axis_1.tick_params(axis='y', labelcolor='blue', labelsize=10)
        
        x = self._data_df[ATSFDataset.Columns.TIMESTAMP]
        y = self._data_df[ATSFDataset.Columns.VALUE_C]
        axis_1.plot(x, y)
        
    def plot_categories(self):
    
        df = self._data_df
        
        a = df.loc[df[ATSFDataset.Columns.CATEGORIES] == 0, ATSFDataset.Columns.VALUE_C]
        b = df.loc[df[ATSFDataset.Columns.CATEGORIES] == 1, ATSFDataset.Columns.VALUE_C]
        c = df.loc[df[ATSFDataset.Columns.CATEGORIES] == 2, ATSFDataset.Columns.VALUE_C]
        d = df.loc[df[ATSFDataset.Columns.CATEGORIES] == 3, ATSFDataset.Columns.VALUE_C]

        fig, ax = plt.subplots()
        a_heights, a_bins = np.histogram(a)
        b_heights, b_bins = np.histogram(b, bins=a_bins)
        c_heights, c_bins = np.histogram(c, bins=a_bins)
        d_heights, d_bins = np.histogram(d, bins=a_bins)

        width = (a_bins[1] - a_bins[0])/6

        ax.bar(a_bins[:-1], a_heights*100/a.count(), width=width, facecolor='blue', label='WeekEndNight')
        ax.bar(b_bins[:-1]+width, (b_heights*100/b.count()), width=width, facecolor='green', label ='WeekEndLight')
        ax.bar(c_bins[:-1]+width*2, (c_heights*100/c.count()), width=width, facecolor='red', label ='WeekDayNight')
        ax.bar(d_bins[:-1]+width*3, (d_heights*100/d.count()), width=width, facecolor='black', label ='WeekDayLight')

        plt.legend()
        plt.show()
        

In [89]:
#test = ATSFDataset().load()
#test.plot()