In [4]:
'''
Investiages the correlation between frequency and severity of violations
through per user analyses

1. Find frequency and severity for each user
    - freq = (# hours of violation) / (total # hours recorded)
    - sevr = sum of (required temperature - measured temperature)
                for all violations
2. Plot each user as a datapoint and perform regression
3. Conclude
'''

import csv
import sys
import os
from datetime import datetime

In [11]:
def calculate_sevr(time, measured_temp, outside_temp):
    '''
    Calculates (required temperature - measured temperature),
    taking into account the time to adjust required temp.
    Returns 0 if measured_temp is not a violation
    :type time: datetime
    :type measured_temp: int
    :type outside_temp: int (optional for nighttime)
    '''
    diff = 0
    day = [hr for hr in range(6, 22)] # day: 6 AM - 10 PM
    night = [22, 23, 0, 1, 2, 3, 4, 5] # night: 10 PM - 6 AM
    if time.hour in day:
        if not outside_temp:
            raise ValueError('Daytime calculation requires outside_temp information')
        elif outside_temp < 55:
            required_temp = 68
            diff = required_temp - measured_temp
        else:
            # Not a violation
            pass
        
    if time.hour in night:
        required_temp = 62
        diff = required_temp - measured_temp
    
    # Turning non-violating diff's into 0 
    if diff < 0:
        diff = 0
        
    return diff
        

In [None]:
class Row:
    def __init__(self, user_id, sensor_id, temp, created_at, outdoor_temp, violation):
        self.user_id = user_id
        self.sensor_id = sensor_id
        self.temp = temp
        self.created_at = created_at
        self.outdoor_temp = outdoor_temp
        self.violation = violation
        
    def __repr__(self):
        return f'<Row Object> user_id: {self.user_id}'
    
    def __str__(self):
        return __repr__(self)
    
    def __eq__(self, another_row):
        '''
        Two Row objects are equal if they have the same user_id
        '''
        return self.user_id == another_row.user_id
    

In [None]:
class User:
    def __init__(self, row):
        self.user_id = row.user_id
        self.row_list = [row]
        self.num_violation = 0
        if row.violation:
            self.num_violation += 1
        
    def add_row(self, row):
        if self.user_id != row.user_id:
            raise ValueError('This row does not belong to this user')
        self.row_list.append(row)
        if row.violation:
            self.num_violation += 1
        
    def __repr__(self):
        return f'<User Object> user_id: {self.user_id}'
    
    def __str__(self):
        return __repr__(self)
    
    def freq(self):
        '''
        Calculates the frequency of violation for this user
        freq = (# hours of violation) / (total # hours recorded)
        '''
        return self.num_violation / len(row_list)
    
    def sevr(self):
        '''
        Calculates the severity of violation for this user
        '''
        severity = 0
        for row in self.row_list:
            severity += calculate_sevr(row.created_at, row.temp, row.outdoor_temp)
        return severity
        

In [None]:
def import_file(path):
    '''
    Imports dataset in a standardized manner
    '''
    
    dataset = []

    with open(path, 'r', newline='') as f:
        csv_reader = csv.reader(f)
        next(csv_reader)
        for row in csv_reader:
            temp_dict = {}
            temp_dict['user_id'] = int(row[0])
            temp_dict['sensor_id'] = row[1]
            temp_dict['created_at'] = datetime.strptime(row[3], '%Y-%m-%d %H:%M:%S')
            temp_dict['outdoor_temp'] = int(row[4])
            
        