-
Notifications
You must be signed in to change notification settings - Fork 0
/
preperation.py
160 lines (118 loc) · 6.09 KB
/
preperation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
"""
Perform data collection and transformation
Author: Peter Kim (vento277)
Version: August 12, 2023
"""
import pandas as pd
import geopandas as gpd
import pathlib as pl
class Collection:
"""
A class representing the data collection process
Attribute building_name: Name of the building
Invarient: building_name is a string
Attribute folder_path: Relative path of the dataset/building_name
Invarient: folder_path is a string with r''
"""
def __init__(self, building_name, folder_path):
self.building_name = building_name
self.folder_path = folder_path
def skyspark(self):
"""
Returns merged dataframe
Invarient: The value is a 2-dimensional data structure
"""
# Read csv files and set the axes proper name other than the building name.
EE_df = pd.read_csv(self.folder_path + '/' + self.building_name + '/' + self.building_name + '_Elec_Energy.csv').set_axis(['Timestamp', 'Elec_Energy'], axis = 'columns')
EP_df = pd.read_csv(self.folder_path + '/' + self.building_name + '/' + self.building_name + '_Elec_Power.csv').set_axis(['Timestamp', 'Elec_Power'], axis = 'columns')
TE_df = pd.read_csv(self.folder_path + '/' + self.building_name + '/' + self.building_name + '_Thrm_Energy.csv').set_axis(['Timestamp', 'Thrm_Energy'], axis = 'columns')
TP_df = pd.read_csv(self.folder_path + '/' + self.building_name + '/' + self.building_name + '_Thrm_Power.csv').set_axis(['Timestamp', 'Thrm_Power'], axis = 'columns')
WC_df = pd.read_csv(self.folder_path + '/' + self.building_name + '/' + self.building_name + '_Wtr_Cns.csv').set_axis(['Timestamp', 'Wtr_Cns'], axis = 'columns')
# Merge dataframes if time range equals to one another.
if ( EE_df['Timestamp'].equals(EP_df['Timestamp']) &
EP_df['Timestamp'].equals(TE_df['Timestamp']) &
TE_df['Timestamp'].equals(TP_df['Timestamp']) &
TP_df['Timestamp'].equals(WC_df['Timestamp']) ):
Elec_df = pd.merge(EE_df, EP_df, on=['Timestamp'], how='left')
Thrm_df = pd.merge(TE_df, TP_df, on=['Timestamp'], how='left')
Elec_Thrm_df = pd.merge(Elec_df, Thrm_df, on=['Timestamp'], how='left')
m_df = pd.merge(Elec_Thrm_df, WC_df, on=['Timestamp'], how='left')
else: return False
return m_df
def geojson(self, dataframe):
"""
Returns dataframe with geojson data included
Invarient: The value is a 2-dimensional data structure
"""
geo_df = dataframe
gjson = gpd.read_file(self.folder_path + '/ubcv_buildings.geojson')
# Go through the GeoJSON dataframe to find the matching building name and its index.
# Some of the names are SHORTNAMEs instead of NAMEs.
row = 0
index = 0
for name in gjson['NAME']:
row = row + 1
if self.building_name in name:
index = row - 1
# If the index is not 434 (max index), extract and fill the corrosponding values.
if index != 434:
geo_df['BLDG_UID'] = gjson['BLDG_UID'][index]
geo_df['Occu_Date'] = gjson['OCCU_DATE'][index]
geo_df['Condition'] = gjson['BLDG_CONDITION'][index]
geo_df['Green_Status'] = gjson['GREEN_STATUS'][index]
geo_df['Constr_Type'] = gjson['CONSTR_TYPE'][index]
geo_df['MAX_Floors'] = gjson['MAX_FLOORS'][index]
geo_df['BLDG_Height'] = gjson['BLDG_HEIGHT'][index]
geo_df['GBA'] = gjson['GBA'][index]
else:
return False
return geo_df
def eui(self, dataframe):
"""
Returns dataframe with computed eui
Invarient: The value is a 2-dimensional data structure
"""
# Read dataframe
eui_df = dataframe
# If Gross_Floor_Area is empty get user input, if not, compute EUI
if (eui_df['GFA'].isnull().values.any()): return False
else:
eui_df['Elec_EUI'] = eui_df['Elec_Energy'].astype(float) / eui_df['GFA']
eui_df['Thrm_EUI'] = eui_df['Thrm_Energy'].astype(float) / eui_df['GFA']
eui_df['Wtr_WUI'] = eui_df['Wtr_Cns'].astype(float) / eui_df['GFA']
eui_df['Total_EUI_excwtr'] = eui_df['Thrm_EUI'] + eui_df['Elec_EUI']
return eui_df
class Transformation:
"""
A class representing the data transformation process
Attribute dataframe: Dataframe in which the transformation should occur
Invarient: dataframe is a 2-dimensional data structure
"""
def __init__(self, dataframe):
self.dataframe = dataframe
def parse_arrange(self, col):
"""
Returns parsed and arranged dataframe
Invarient: The value is a 2-dimensional data structure
"""
df = self.dataframe
# Parse timestamps and units
for column in df:
# Use temporary column to split year, month, and day - also replace timezone.
if ('Timestamp' in column):
df['temp'] = df[column]
df['temp'] = df['temp'].replace('T00:00:00-08:00 Los_Angeles', '', regex = True)
df['temp'] = df['temp'].replace('T00:00:00-07:00 Los_Angeles', '', regex = True)
df[['Year', 'Month', 'Day']] = df['temp'].str.split('-', expand=True)
df[column] = df[column].replace(' Los_Angeles', 'PST', regex = True)
df[column] = df[column].replace(' Los_Angeles', 'PST', regex = True)
df = df.drop('temp', axis = 1)
# Remove meter units.
if ('Energy' in column): df[column] = df[column].replace('kWh', '', regex = True)
if ('Power' in column): df[column] = df[column].replace('kW', '', regex = True)
if ('Cns' in column): df[column] = df[column].replace('m³', '', regex = True)
# Re-arrange the dataframe
df = df.reindex(columns=col)
return df
def csv_output(path, name, dataframe, function):
dataframe.to_csv(path + '/' + name + '/_' + name + '_' + function + '.csv', index=False)