In [1]:
import pandas as pd
import numpy as np
import h5py
from pytz import timezone
import re
from datetime import datetime
import random
from time import time

one_sqm_in_sqft = 1562500 / 145161 # source: Wolfram Alpha

meta_dict = {}

  from ._conv import register_converters as _register_converters


In [2]:
def apply_func(f):
    '''Apply the given function on the data and close the data'''
    file = 'meta/meta.hdf5'
    buildings_data = h5py.File(file, 'r+')
    results = f(buildings_data)
    buildings_data.close()
    return results

### ireland

In [3]:
ireland_meta = pd.read_csv('./slack/IRELAND_SQFT_SQM.csv')

In [4]:
ireland_meta.head()

Unnamed: 0,ID,Question 6103: What is the approximate floor area of your home?,"Question 61031: Is that 2(sqft), 1(sqm)"
0,1002,999999999,
1,1003,999999999,
2,1004,999999999,
3,1005,999999999,
4,1008,999999999,


In [5]:
ireland_meta.iloc[:, 2].value_counts()

     2451
2    1641
1     140
Name: Question 61031: Is that 2(sqft), 1(sqm), dtype: int64

In [6]:
meta_dict['ireland'] = {}

for row in ireland_meta.as_matrix():
    if row[2] == '2':
        sqft = float(row[1])
    elif row[2] == '1':
        sqft = float(row[1]) * one_sqm_in_sqft
    else:
        sqft = None
    meta_dict['ireland'][str(row[0])] = sqft

### MIT

In [16]:
mit_names = pd.read_csv('./slack/MIT_Building_Names_meta_comparison.csv')
mit_names_map = {}
for i in range(mit_names.shape[0]):
    mit_names_map[mit_names.name_in_hdf5[i]] = mit_names.name_in_metadat[i]

In [18]:
mit_meta = pd.read_table('./slack/MIT_SQFT_edited.txt', sep=' ', skiprows=2, header=None).iloc[:, :2]

In [53]:
mit_meta.head()

Unnamed: 0,0,1
0,2,133168
1,3,160646
2,4,162822
3,5,80942
4,6,90246


In [20]:
meta_dict['MIT'] = {}

for row in mit_meta.as_matrix():
    sqft = float(row[1])
    meta_dict['MIT'][str(row[0])] = sqft

### pecan

In [22]:
pecan_meta = pd.read_csv('./slack/PECAN_SQFT.csv')

In [23]:
pecan_meta.head()

Unnamed: 0,dataid,sqft
0,4495,1087.0
1,8061,1453.0
2,3938,1120.0
3,3036,2100.0
4,26,2075.0


In [24]:
meta_dict['pecan'] = {}

for row in pecan_meta.as_matrix():
    sqft = float(row[1])
    meta_dict['pecan'][str(int(row[0]))] = sqft

### Consolidate Data

In [71]:
def get_counts(buildings_data, dset_name):
    count = 0
    for building in buildings_data[dset_name]:
        if building not in meta_dict[dset_name]:
            count+=1
    print('not found: ' + str(count))
    print('found: ' + str(len(buildings_data[dset_name].keys())-count))

def get_fields(buildings_data, dset_name):
    b1 = list(buildings_data[dset_name].keys())[0]
    return list(buildings_data[dset_name][b1].attrs.keys())

def random_building_fields(buildings_data, dset_name):
    b_rand = random.choice(list(buildings_data[dset_name].keys()))
    print('Building: %s' % b_rand)
    for k in buildings_data[dset_name][b_rand].attrs.keys():
        print('%s: %s' %(k, buildings_data[dset_name][b_rand].attrs[k]))

def count_buildings_with_fields(buildings_data, dset_name, fields):
    count = 0
    num_buildings = len(buildings_data[dset_name].keys())
    for building_name in buildings_data[dset_name].keys():
        for field in fields:
            if field not in buildings_data[dset_name][building_name].attrs:
                count += 1
                break
    print('%d have specified fields' %(num_buildings-count))
    print('%d do not have specified fields' %count)

def get_buildings_with_fields(buildings_data, dset_name, fields):
    buildings = []
    for building_name in buildings_data[dset_name].keys():
        buildings.append(building_name)
        for field in fields:
            if field not in buildings_data[dset_name][building_name].attrs:
                buildings = buildings[:-1]
                break
    return buildings

def get_building_names(buildings_data, dset_name):
    return pd.DataFrame(list(buildings_data[dset_name].keys()), columns=['name'])

In [75]:
# random check
apply_func(lambda x: random_building_fields(x, 'genome2'))

Building: Neeb Hall
Timezone: America/Phoenix
Sqft: 9309.0
Industry: Education
PSU: College Classroom
Subindustry: College/University
Climatezone: 2


In [17]:
# Save MIT building names
# get_building_names(buildings_data, 'MIT').to_csv('MIT_Building_Names.csv', index=False)

### Change Meta Data

In [27]:
# See a random building's meta data (Checks)
# random_building_fields(buildings_data, 'ireland')

### Timezone updates

In [18]:
def timezone_updates(buildings_data):
    dset_name = 'utexas'
    for building in buildings_data[dset_name].keys():
        buildings_data[dset_name][building].attrs['Timezone'] = 'America/Chicago'

    dset_name = 'pecan'
    for building in buildings_data[dset_name].keys():
        buildings_data[dset_name][building].attrs['Timezone'] = 'America/Chicago'

    dset_name = 'MIT'
    for building in buildings_data[dset_name].keys():
        buildings_data[dset_name][building].attrs['Timezone'] = 'America/New_York'

    dset_name = 'ireland'
    for building in buildings_data[dset_name].keys():
        buildings_data[dset_name][building].attrs['Timezone'] = 'Europe/London'

apply_func(timezone_updates)

### Add climate zones
* Generic Updates

In [65]:
def climate_zone_updates(buildings_data):
    dset_name = 'utexas'
    for building in buildings_data[dset_name].keys():
        buildings_data[dset_name][building].attrs['Climatezone'] = '2'

    dset_name = 'pecan'
    for building in buildings_data[dset_name].keys():
        buildings_data[dset_name][building].attrs['Climatezone'] = '2'

    dset_name = 'MIT'
    for building in buildings_data[dset_name].keys():
        buildings_data[dset_name][building].attrs['Climatezone'] = '5'

    dset_name = 'ireland'
    for building in buildings_data[dset_name].keys():
        buildings_data[dset_name][building].attrs['Climatezone'] = '4'

apply_func(climate_zone_updates)

* Update genome datasets

In [68]:
# Check timezones
def climate_zone_check(buildings_data):
    dset_names = ['genome', 'genome2']
    timezones = set()
    for dset_name in dset_names:
        for building in buildings_data[dset_name].keys():
            timezone = buildings_data[dset_name][building].attrs['Timezone']
            timezones.add(timezone)
    return timezones

apply_func(climate_zone_check)

{'America/Chicago',
 'America/Denver',
 'America/Los_Angeles',
 'America/New_York',
 'America/Phoenix',
 'Asia/Singapore',
 'Australia/Perth',
 'Europe/London',
 'Europe/Zurich'}

In [70]:
lookup = {
    'America/Los_Angeles': '3',
    'America/Denver': '5',
    'America/Phoenix': '2',
    'America/Chicago': '5',
    'America/New_York': '4',
    'Europe/London': '4',
    'Europe/Zurich': '5',
    'Asia/Singapore': '1',
    'Australia/Perth': '5'
}

def get_climatezone(timezone):
    return lookup[timezone]

def climate_zone_updates(buildings_data):
    dset_names = ['genome', 'genome2']
    for dset_name in dset_names:
        for building in buildings_data[dset_name].keys():
            timezone = buildings_data[dset_name][building].attrs['Timezone']
            buildings_data[dset_name][building].attrs['Climatezone'] = get_climatezone(timezone)

apply_func(climate_zone_updates)

### Industry, Subindustry and PSU

In [98]:
def industry_updates(buildings_data):
    dset_name = 'ireland'
    for building in buildings_data[dset_name].keys():
        buildings_data[dset_name][building].attrs['Industry'] = 'Residential'
        buildings_data[dset_name][building].attrs['PSU'] = 'Single_family_house'

    dset_name = 'pecan'
    for building in buildings_data[dset_name].keys():
        buildings_data[dset_name][building].attrs['PSU'] = 'Single_family_house'
        buildings_data[dset_name][building].attrs['Industry'] = 'Residential'

    dset_name = 'utexas'
    for building in buildings_data[dset_name].keys():
        buildings_data[dset_name][building].attrs['Industry'] = 'Education'
        buildings_data[dset_name][building].attrs['Subindustry'] = 'College/University'
        
    dset_name = 'MIT'
    for building in buildings_data[dset_name].keys():
        buildings_data[dset_name][building].attrs['Industry'] = 'Education'
        buildings_data[dset_name][building].attrs['Subindustry'] = 'College/University'

apply_func(industry_updates)

### MIT sqft updates

In [84]:
def update_mit(buildings_data):
    for building in buildings_data['MIT']:
        mapped_name = mit_names_map[building]
        sqft = meta_dict['MIT'][mapped_name]
        buildings_data['MIT'][building].attrs['Sqft'] = sqft

apply_func(update_mit)

### Read genome2

In [None]:
genome2 = pd.read_csv('./genome2/TemporalMaster.csv', )

In [None]:
genome2.shape

In [None]:
genome2.head()

In [None]:
genome2_meta = pd.read_csv('./genome2/MetaMasterModified.csv', encoding='utf8')

In [None]:
genome2_buildings = set(genome2_meta.siteid.tolist())

In [None]:
count = 0
not_found = []
for building in genome2.columns[1:]:
    if building not in genome2_buildings:
        count += 1
        not_found.append(building)
print('Found: %d' % (len(genome2.columns) - 1 - count))
print('Not found: %d' % (count))

In [None]:
genome2_meta_less = genome2_meta[['siteid', 'timezone', 'sqft', 'industry', 'subindustry', 'primaryspaceusage']].copy()

In [None]:
meta_dict['genome2'] = {}

for row in genome2_meta_less.as_matrix():
    siteid = row[0]
    meta_dict['genome2'][siteid] = {
        'Timezone': row[1],
        'Sqft': row[2],
        'Industry': row[3],
        'Subindustry': row[4],
        'PSU': row[5]
    }

In [None]:
genome2_meta_less.head()

In [None]:
genome2_meta.columns

In [None]:
genome2_mat = genome2.as_matrix()

### Store genome2 data in memory

In [None]:
genome2_data = {}

start = time()

for j in range(1, genome2_mat.shape[1]):
    site_id = genome2.columns[j]
    genome2_data[site_id] = {}
    tf = ~np.isnan(genome2_mat[:, j].astype(float))
    building_years = genome2_mat[:, [0, j]][tf, :]
    cur_year = None
    prev_i = 0
    
    for i in range(building_years.shape[0]):
        row = building_years[i, :]
        row_date = datetime.strptime(row[0], '%m/%d/%y %H:%M')
        building_years[i, 0] = datetime.strftime(row_date, '%Y-%m-%d %H:%M:%S')
        if cur_year == None:
            cur_year = row_date.year
        else:
            if cur_year != row_date.year:
                genome2_data[site_id][str(cur_year)] = building_years[prev_i: i+1, :]
                cur_year = row_date.year
                prev_i = i+1
    if building_years.shape[0] > 0:
        genome2_data[site_id][str(cur_year)] = building_years[prev_i: i+1, :]

print('Time taken: %.4f' % (time()- start))

### Create new dataset and write to hdf5

In [None]:
# genome2_data['SchoellkopfCrescent.Elec.PowerScout3/kW_System']['2015'].astype('|S32')

In [None]:
def make_genome2(buildings_data):
    if 'genome2' in buildings_data.keys():
        del buildings_data['genome2']
    buildings_data.create_group('genome2')
    for building in genome2_data:
        building_name = re.sub('/', '.', building)
        buildings_data['genome2'].create_group(building_name)
        for field in meta_dict['genome2'][building]:
            buildings_data['genome2'][building_name].attrs[field] = meta_dict['genome2'][building][field]
        for year in genome2_data[building]:
            buildings_data['genome2'][building_name].create_dataset(year, shape=genome2_data[building][year].shape, dtype='|S32')
            buildings_data['genome2'][building_name][year][:] = genome2_data[building][year].astype('|S32')

apply_func(make_genome2)

In [99]:
datasets_names = ['genome', 'genome2', 'utexas', 'ireland', 'MIT', 'pecan']
fields_to_check = ['Sqft', 'Industry', 'Subindustry', 'Timezone', 'PSU']

def check_field(buildings_data, dset_name, field):
    not_found_count = 0
    nan_count = 0
    total = 0
    for building in buildings_data[dset_name]:
        total += 1
        if field not in buildings_data[dset_name][building].attrs:
            not_found_count+=1
            continue
        if pd.isnull(buildings_data[dset_name][building].attrs[field]):
            nan_count+=1
    print('%s - total: %d, not found: %d, nan: %d' %(field, total, not_found_count, nan_count))


def view_genome2(buildings_data):
    #print(list(buildings_data['genome2']['SchoellkopfCrescent.Elec.PowerScout3/kW_System'].attrs.items()))
    #print(buildings_data['genome2']['SchoellkopfCrescent.Elec.PowerScout3/kW_System']['2015'][:])
    #random_building_fields(buildings_data, 'genome2')
    #print(np.isnan(buildings_data['genome2']['178391701e80df305b27965c06b0d977'].attrs['Sqft']))
    check_field(buildings_data, 'genome2', 'Sqft')

def view_data(buildings_data):
    for dset_name in datasets_names:
        print('Summary of %s' % dset_name)
        for field in fields_to_check:
            check_field(buildings_data, dset_name, field)
        print()

apply_func(view_data)

Summary of genome
Sqft - total: 507, not found: 0, nan: 0
Industry - total: 507, not found: 0, nan: 0
Subindustry - total: 507, not found: 0, nan: 0
Timezone - total: 507, not found: 0, nan: 0
PSU - total: 507, not found: 0, nan: 0

Summary of genome2
Sqft - total: 1236, not found: 0, nan: 356
Industry - total: 1236, not found: 0, nan: 0
Subindustry - total: 1236, not found: 0, nan: 0
Timezone - total: 1236, not found: 0, nan: 0
PSU - total: 1236, not found: 0, nan: 0

Summary of utexas
Sqft - total: 112, not found: 1, nan: 0
Industry - total: 112, not found: 0, nan: 0
Subindustry - total: 112, not found: 0, nan: 0
Timezone - total: 112, not found: 0, nan: 0
PSU - total: 112, not found: 1, nan: 0

Summary of ireland
Sqft - total: 6435, not found: 4654, nan: 0
Industry - total: 6435, not found: 0, nan: 0
Subindustry - total: 6435, not found: 6435, nan: 0
Timezone - total: 6435, not found: 0, nan: 0
PSU - total: 6435, not found: 0, nan: 0

Summary of MIT
Sqft - total: 87, not found: 0, n

### Get Start and End Date

In [7]:
min(['2017-05-12','2017-08-12'])

'2017-05-12'

In [23]:
datasets_names = ['genome', 'genome2', 'utexas', 'ireland', 'MIT', 'pecan']
fields_to_check = ['Sqft', 'Industry', 'Timezone', 'PSU']

def get_dates(buildings_data, dset_name, fields):
    min_list = []
    max_list = []
    for building in buildings_data[dset_name]:
        skip = False
        for field in fields:
            if field not in buildings_data[dset_name][building].attrs:
                skip=True
                continue
            if pd.isnull(buildings_data[dset_name][building].attrs[field]):
                skip=True
        if skip:
            continue
        min_, max_ = helper_get_dates(buildings_data, dset_name, building)
        min_list.append(min_)
        max_list.append(max_)
    return (min(min_list), max(max_list))
            
def helper_get_dates(buildings_data, dset_name, building):
    max_list = []
    min_list = []
    for year in buildings_data[dset_name][building]:
        if not buildings_data[dset_name][building][year].shape[0]:
            continue
        max_list.append(max(buildings_data[dset_name][building][year][:, 0]))
        min_list.append(min(buildings_data[dset_name][building][year][:, 0]))
    return (min(min_list), max(max_list))

def view_data(buildings_data):
    for dset_name in datasets_names:
        min_, max_ = get_dates(buildings_data, dset_name, fields_to_check)
        print('%s date range: %s to %s' % (dset_name, min_, max_))

apply_func(view_data)

genome date range: b'2010-01-01 08:00:00' to b'2015-12-31 23:00:00'
genome2 date range: b'2010-01-01 00:00:00' to b'2015-12-31 23:00:00'
utexas date range: b'2009-01-15 00:00:00' to b'2017-08-20 00:00:00'
ireland date range: b'2009-07-14 00:00:00' to b'2010-12-31 23:00:00'
MIT date range: b'2014-01-01 00:00:00' to b'2016-12-31 23:00:00'
pecan date range: b'2012-03-19 19:00:00' to b'2017-09-16 07:00:00'


### Check Meta

In [50]:
datasets_names = ['genome2', 'utexas', 'ireland', 'MIT', 'pecan']
fields_to_check = ['Sqft', 'Industry', 'Subindustry', 'Timezone', 'PSU']

def check_field(buildings_data, dset_name, building, field, skip=None):
    if field == skip:
        if field not in buildings_data[dset_name][building].attrs:
            return (True, None)
        return (True, buildings_data[dset_name][building].attrs[field])

    if field not in buildings_data[dset_name][building].attrs:
        return (False, None)
    if pd.isnull(buildings_data[dset_name][building].attrs[field]):
        return (False, None)
    return (True, buildings_data[dset_name][building].attrs[field])

def inspect_data(buildings_data):
    results = []
    for dset_name in datasets_names:
        for building in buildings_data[dset_name]:
            check = True
            fields = []
            for field in fields_to_check:
                this_check, val = check_field(buildings_data, dset_name, building, field, skip='Subindustry')
                check = check and this_check
                fields.append(val)
            if not check:
                continue
            results.append([dset_name, building] + fields)
    return results

results = apply_func(inspect_data)

In [51]:
len(results)

2972

In [54]:
results_df = pd.DataFrame(results, columns=['Dataset', 'Building'] + fields_to_check)

In [55]:
results_df.head()

Unnamed: 0,Dataset,Building,Sqft,Industry,Subindustry,Timezone,PSU
0,genome2,03db89c9-28e1-5ac9-8d6d-f889f561ed9f,203787.0,Education,College/University,America/Los_Angeles,College Laboratory
1,genome2,1-canon-row,36704.9,Government,Other Government Buildings,Europe/London,Office
2,genome2,1-parliament-street-1-derby-gate,242930.5,Government,Other Government Buildings,Europe/London,Office
3,genome2,10,1029798.0,Commercial Property,Shopping Center/Shopping Mall,America/New_York,Retail
4,genome2,100,81681.0,Education,Primary/Secondary School,America/New_York,Primary/Secondary Classroom


In [62]:
for field in fields_to_check[1:]:
    print('Summary for %s' % field)
    sliced_df = results_df.loc[~pd.isnull(results_df[field]), field]
    print(sliced_df.value_counts())
    print('Total: %d' % sliced_df.shape[0])
    print()

Summary for Industry
Residential             1894
Education                907
Government                87
Light Industrial          25
Commercial Property       25
Food Sales & Storage      25
Healthcare                 6
Hospitality                2
Public Assembly            1
Name: Industry, dtype: int64
Total: 2972

Summary for Subindustry
College/University                       801
Primary/Secondary School                 115
Public Assembly                           33
Grocer/Market                             25
City, County, State                       22
Food Processing                           19
Other Government Buildings                14
Shopping Center/Shopping Mall             14
Manufacturing                              5
Library                                    5
Hospital                                   5
Commercial Real Estate                     4
Business Services                          3
Hotel                                      2
Corporate Office      

In [64]:
datasets_names = ['genome2', 'utexas', 'ireland', 'MIT', 'pecan']
fields_to_check = ['Sqft', 'Industry', 'Subindustry', 'Timezone', 'PSU']

def check_field(buildings_data, dset_name, building, field, skip=None):
    if field == skip:
        if field not in buildings_data[dset_name][building].attrs:
            return (True, None)
        return (True, buildings_data[dset_name][building].attrs[field])

    if field not in buildings_data[dset_name][building].attrs:
        return (False, None)
    if pd.isnull(buildings_data[dset_name][building].attrs[field]):
        return (False, None)
    return (True, buildings_data[dset_name][building].attrs[field])

def count_profiles(buildings_data):
    count = 0
    for dset_name in datasets_names:
        for building in buildings_data[dset_name]:
            check = True
            fields = []
            for field in fields_to_check:
                this_check, val = check_field(buildings_data, dset_name, building, field, skip='Subindustry')
                check = check and this_check
                fields.append(val)
            if not check:
                continue
            for year in buildings_data[dset_name][building]:
                count += buildings_data[dset_name][building][year].shape[0]
    return count

print('The number of profiles for the buildings we are using is %d' %apply_func(count_profiles))

The number of profiles for the buildings we are using is 39334029


### Make 3D table

In [90]:
industry_filter = set([
    'Residential',
    'Education',
    'Government',
])

psu_filter = set([
    'Single_family_house',
    'Office',
    'College Classroom',              
    'College Laboratory',
    'Primary/Secondary Classroom',
    'Dormitory',
    'Library',
    'Industrial',
    'Gymnasium',
    'Community Center',
    'Food Sales',
    'Sports Stadium',
    'Student Union',
    'Retail',
    'Museum',
    'Fitness Center'
])

def get_industry(industry):
    return industry if industry in industry_filter else 'Others'

def get_psu(psu):
    return psu if psu in psu_filter else 'Others'

def inc_count(dic, values):
    for value in values[:-1]:
        if value not in dic:
            dic[value] = {}
        dic = dic[value]
    if values[-1] not in dic:
        dic[values[-1]] = 0
    dic[values[-1]] += 1

In [91]:
datasets_names = ['genome2', 'utexas', 'ireland', 'MIT', 'pecan']
fields_to_check = ['Sqft', 'Subindustry', 'Timezone', 'Climatezone', 'Industry', 'PSU']

def check_field(buildings_data, dset_name, building, field, skip=None):
    if field == skip:
        if field not in buildings_data[dset_name][building].attrs:
            return (True, None)
        return (True, buildings_data[dset_name][building].attrs[field])

    if field not in buildings_data[dset_name][building].attrs:
        return (False, None)
    if pd.isnull(buildings_data[dset_name][building].attrs[field]):
        return (False, None)
    return (True, buildings_data[dset_name][building].attrs[field])

def get_three_dim_dict(buildings_data):
    results = {}
    for dset_name in datasets_names:
        for building in buildings_data[dset_name]:
            check = True
            fields = []
            for field in fields_to_check:
                this_check, val = check_field(buildings_data, dset_name, building, field, skip='Subindustry')
                check = check and this_check
                fields.append(val)
            if not check:
                continue
            inc_count(results, [fields[-3], get_industry(fields[-2]), get_psu(fields[-1])])
    return results

three_dim_dict = apply_func(get_three_dim_dict)

In [101]:
Climatezones = [
    '1',
    '2',
    '3',
    '4',
    '5'
]

Industries = [
    'Residential',
    'Education',
    'Government',
    'Others'
]

PSUs = [
    'Single_family_house',
    'Office',
    'College Classroom',              
    'College Laboratory',
    'Primary/Secondary Classroom',
    'Dormitory',
    'Library',
    'Industrial',
    'Gymnasium',
    'Community Center',
    'Food Sales',
    'Sports Stadium',
    'Student Union',
    'Retail',
    'Museum',
    'Fitness Center',
    'Others'
]

# def generate_tuple(dic):
#     return tuple([dic[psu] if psu in dic else 0 for psu in PSUs])

In [105]:
three_dim_mat = np.zeros((len(Climatezones), len(Industries), len(PSUs)))

for i in range(len(Climatezones)):
    for j in range(len(Industries)):
        for k in range(len(PSUs)):
            if Climatezones[i] in three_dim_dict and Industries[j] in three_dim_dict[Climatezones[i]] and PSUs[k] in three_dim_dict[Climatezones[i]][Industries[j]]:
                three_dim_mat[i][j][k] = three_dim_dict[Climatezones[i]][Industries[j]][PSUs[k]]

In [116]:
np.save('./pies/three_dim_pie.npy', three_dim_mat)
# np.load('three_dim_pie.npy')

In [92]:
three_dim_dict

{'1': {'Education': {'Dormitory': 1,
   'Others': 1,
   'Primary/Secondary Classroom': 4}},
 '2': {'Education': {'College Classroom': 69,
   'College Laboratory': 47,
   'Dormitory': 31,
   'Fitness Center': 10,
   'Gymnasium': 1,
   'Library': 8,
   'Museum': 8,
   'Office': 38,
   'Others': 27,
   'Primary/Secondary Classroom': 2,
   'Retail': 1,
   'Sports Stadium': 11,
   'Student Union': 6},
  'Others': {'Industrial': 1},
  'Residential': {'Single_family_house': 113}},
 '3': {'Education': {'College Classroom': 4,
   'College Laboratory': 6,
   'Gymnasium': 2,
   'Library': 4,
   'Museum': 1,
   'Office': 9,
   'Others': 1,
   'Student Union': 1},
  'Others': {'Food Sales': 17, 'Industrial': 3, 'Office': 4}},
 '4': {'Education': {'College Classroom': 51,
   'College Laboratory': 59,
   'Community Center': 2,
   'Dormitory': 60,
   'Fitness Center': 3,
   'Gymnasium': 11,
   'Library': 14,
   'Museum': 2,
   'Office': 71,
   'Others': 19,
   'Primary/Secondary Classroom': 90,
   'Sp