In [1]:
import numpy as np
import pandas as pd
import networkx as nx
import os
import math
import re

In [2]:
os.chdir('C:/Users\kristian\OneDrive\Desktop\EMD')

In [3]:
def EMD_file(file1, file2):
    if file1 == file2:
        distance = 0.0
    else:
        
        # set distributions to 0
        dist1 = np.zeros(24)
        dist2 = np.zeros(24)
        
        # import and clean datasets
        ds1 = pd.read_csv(f"{file1}", delimiter=",")
        ds1 = ds1.drop(ds1.index[-2:])
        ds2 = pd.read_csv(f"{file2}", delimiter = ",")
        ds2 = ds2.drop(ds2.index[-2:])
        
        # append to distributions the values of 'NORMALIZED ENERGY'
        for i in range(24):
            dist1[i] = ds1['NORMALIZED ENERGY'][i]/sum(ds1['NORMALIZED ENERGY'])
            dist2[i] = ds2['NORMALIZED ENERGY'][i]/sum(ds2['NORMALIZED ENERGY'])
        
        # create lists with values and indices of non-zero entries of distributions
        l1 = []
        l2 = []
        for i in range(24):
            l1.append((dist1[i],i))
        for i in range(24):
            l2.append((dist2[i],i))
        
        # initialise graph
        g = nx.DiGraph()
        
        # create nodes
        g.add_node('s', demand = -1)
        g.add_node('t', demand = 1)
        for i in range(len(l1)):
            #g.add_node(l1[i], demand = -l1[i][0])
            g.add_node(l1[i])
        for i in range(len(l2)):
            #g.add_node(l2[i], demand = l2[i][0])
            g.add_node(l2[i])
        
        # create edges
        g.add_edge('s','t', capacity = 1e-6, weight = 0)
        for i in range(len(l1)):
            g.add_edge('s', l1[i], capacity = l1[i][0])
        for i in range(len(l2)):
            g.add_edge(l2[i], 't', capacity = l2[i][0])
        for i in range(24):
            for j in range(24):
                g.add_edge(l1[i], l2[j], weight = abs((l2[j][1]-l1[i][1])%24), capacity = max(l1[i][0],l2[i][0]))
        distance = nx.min_cost_flow_cost(g)
        
        # output
        return distance
        
        

In [4]:
def EMD_list(file1, list):

    # set distributions to 0
    dist1 = np.zeros(24)
    dist2 = np.zeros(24)

    # import and clean datasets
    ds1 = pd.read_csv(f"{file1}", delimiter=",")
    ds1 = ds1.drop(ds1.index[-2:])

    # append to distributions the values of 'NORMALIZED ENERGY'
    for i in range(24):
        dist1[i] = ds1['NORMALIZED ENERGY'][i]/sum(ds1['NORMALIZED ENERGY'])
        dist2[i] = list[i]/sum(list)

    # create lists with values and indices of non-zero entries of distributions
    l1 = []
    l2 = []
    for i in range(24):
        l1.append((dist1[i],i))
    for i in range(24):
        l2.append((dist2[i],i))

    # initialise graph
    g = nx.DiGraph()

    # create nodes
    g.add_node('s', demand = -1)
    g.add_node('t', demand = 1)
    for i in range(len(l1)):
        #g.add_node(l1[i], demand = -l1[i][0])
        g.add_node(l1[i])
    for i in range(len(l2)):
        #g.add_node(l2[i], demand = l2[i][0])
        g.add_node(l2[i])

    # create edges
    g.add_edge('s','t', capacity = 1e-6, weight = 0)
    for i in range(len(l1)):
        g.add_edge('s', l1[i], capacity = l1[i][0])
    for i in range(len(l2)):
        g.add_edge(l2[i], 't', capacity = l2[i][0])
    for i in range(24):
        for j in range(24):
            g.add_edge(l1[i], l2[j], weight = abs((l2[j][1]-l1[i][1])%24), capacity = max(l1[i][0],l2[i][0]))
    distance = nx.min_cost_flow_cost(g)
        
    # output
    return distance

In [5]:
def sort_files(house):
    files = ['community1.csv', 'community2.csv']
    sorted_files = sorted(files, key = lambda x: EMD_file(f"{house}", x), reverse = True)
    return(sorted_files)

In [6]:
for i in range(4):
    print(f'house{i+1}:')
    print(sort_files(f'house{i+1}.csv'))

house1:
['community1.csv', 'community2.csv']
house2:
['community2.csv', 'community1.csv']
house3:
['community1.csv', 'community2.csv']
house4:
['community1.csv', 'community2.csv']


In [8]:
while True:
    time_slots = input("In what hourly time-slots are you usually at home? (e.g. 00 - 06, 14 - 18, 21 - 24): ")

    # Split the input into individual time slots
    slots = time_slots.split(',')

    # Check if the input is valid
    valid_input = True
    for slot in slots:
        slot = slot.strip()
        if not re.match(r'^\d{2}\s*-\s*\d{2}$', slot):
            valid_input = False
            break
        start, end = map(int, slot.split('-'))
        if start < 0 or start > 24 or end < 0 or end > 24 or start >= end:
            valid_input = False
            break
    # Create list with inputs given by the user
    if valid_input:
        time_list = [0] * 24
        for slot in slots:
            start,end = map(int, slot.split('-'))
            for i in range(start, end):
                time_list[i] = 1
        s = sum(time_list)
        for i in range(24):
            time_list[i] = time_list[i]/s
        percent_com1 = round((EMD_list('community1.csv', time_list)*100)/(EMD_list('community2.csv', time_list)+EMD_list('community1.csv', time_list)))
        percent_com2 = round((EMD_list('community2.csv', time_list)*100)/(EMD_list('community2.csv', time_list)+EMD_list('community1.csv', time_list)))
        if (EMD_list('community1.csv', time_list)*100)/(EMD_list('community2.csv', time_list)+EMD_list('community1.csv', time_list)) > (EMD_list('community2.csv', time_list)*100)/(EMD_list('community2.csv', time_list)+EMD_list('community1.csv', time_list)):
            opt = 'Community 1'
        else:
            opt = 'Community 2'
        print(f'Community 1: {percent_com1}%\nCommunity 2: {percent_com2}%\nYour optimal community is {opt}!')

        break

    else:
        print("Invalid input. Please enter your consumption time slots in the format 'HH - HH', where HH is an integer between 0 and 24, separated by commas. Also, make sure that each time slot has a start time which is earlier than the end time")

In what hourly time-slots are you usually at home? (e.g. 00 - 06, 14 - 18, 21 - 24): 04-15
Community 1: 49%
Community 2: 51%
Your optimal community is Community 2!


In [9]:
def survey_output(survey):
    perc_comm1 = round((EMD_file('community1.csv', survey)*100)/(EMD_file('community2.csv', survey)+EMD_file('community1.csv', survey)))
    perc_comm2 = round((EMD_file('community2.csv', survey)*100)/(EMD_file('community2.csv', survey)+EMD_file('community1.csv', survey)))
    if perc_comm1 > perc_comm2:
        opt = 'Community 1'
    else:
        opt = 'Community 2'
    print(f'Community 1: {perc_comm1}%\nCommunity 2: {perc_comm2}%\nYour optimal community is {opt}!')