In [188]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.cluster import KMeans
from sklearn import tree
from sklearn.metrics import confusion_matrix, silhouette_score
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
# import graphviz 

In [170]:
beacons_data = pd.read_csv('../data/raw/beacons_dataset.csv', sep=';').drop(columns=['ts_date','ts_time'])
clinical_data = pd.read_csv('../data/preprocessed/preprocessed.csv', sep=';')
clinical_data.info()
beacons_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 540 entries, 0 to 539
Data columns (total 51 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Unnamed: 0                       540 non-null    int64  
 1   part_id                          540 non-null    int64  
 2   fried                            540 non-null    int64  
 3   gender                           540 non-null    int64  
 4   age                              540 non-null    int64  
 5   hospitalization_one_year         540 non-null    int64  
 6   hospitalization_three_years      540 non-null    float64
 7   ortho_hypotension                540 non-null    int64  
 8   vision                           540 non-null    int64  
 9   audition                         540 non-null    int64  
 10  raise_chair_time                 540 non-null    float64
 11  balance_single                   540 non-null    int64  
 12  gait_get_up           

In [171]:

for index, row in beacons_data.iterrows(): 
    if (not len(row['part_id']) == 4 or not row['part_id'].isdigit()):
       beacons_data.drop(index, inplace=True)

len(beacons_data['part_id'].unique())
        

291

In [172]:
replacement_dict = { 
       'Kitchen': dict.fromkeys({'Kitcheb', 'Kithen', 'Kitchen2', 'Kitvhen', 'Kichen', 'Kiychen', 'Kitcen', 'Kitch', 'kitchen'}, 'Kitchen'),
       'Bedroom': dict.fromkeys({'Bedroom2', 'Bedroom1', 'bedroom', 'Bedroom-1', 'Bedroom1st', 'Chambre', 'Bed'}, 'Bedroom'), 
       'Outdoor': dict.fromkeys({'Veranda', 'Garden', 'Guard'}, 'Outdoor'),
       'Livingroom': dict.fromkeys({'TV', 'Livingroon', 'Livingroon', 'livingroom', 'LivingRoom2', 'Luvingroom1', 'SeatingRoom', 'LivibgRoom', 'Sitingroom', 'Livingroom1', 'Livingroom2', 'Leavingroom', 'Sittingroom', 'LivingRoom', 'Living', 'Livroom', 'Sittigroom', 'Liningroom', 'LeavingRoom', 'Sittinroom', 'SittingOver', 'SittingRoom'}, 'Livingroom'),
       'Bathroom': dict.fromkeys({'Barhroom', 'Baghroom', 'Bathroom-1', 'Bathroom1', 'Bathroon', 'Bathroim', 'Bsthroom'},'Bathroom'), 
       'Entry': dict.fromkeys({'Entrance', 'ExitHall', 'Hall'},'Entry'),
       'Office': dict.fromkeys({'Desk', 'Office1', 'Library', 'Workroom', 'Office2', 'Office-2', 'Office1st'},'Office'),
       'DiningRoom': dict.fromkeys({'DinningRoom', 'Dinerroom', 'DinerRoom','DinnerRoom' }, 'DiningRoom'),
       'nan': dict.fromkeys({'Right', 'Left', 'Two', 'Three', 'One', '2ndRoom', 'three', 'Four', 'Box', 'Box-1', 'K', 'T'},'nan'), 
       'Storage': dict.fromkeys({'Pantry', 'Garage'},'Storage'), 
       'Entry': dict.fromkeys({'Entrance', 'ExitHall', 'Hall'},'Entry')
}

for column in replacement_dict:
    beacons_data['room'].replace(replacement_dict[column], inplace=True)

 

# New Dataset

In [191]:
def divide(number1, number2):
    return round(float(number1 / number2), 2)

records = {}
bedroom = {}
bathroom = {}
livingroom = {}
kitchen = {}
person_dict = { 'part_id':[], 'Bedroom':[], 'Bathroom':[], 'Livingroom':[],'Kitchen':[]}
for value in beacons_data.part_id.unique():
    records[value] = len(beacons_data[(beacons_data.part_id == value)])
    bedroom[value] = len(beacons_data[(beacons_data.part_id == value) & (beacons_data.room == "Bedroom")]) 
    bathroom[value] = len(beacons_data[(beacons_data.part_id == value) & (beacons_data.room == "Bathroom")]) 
    livingroom[value] = len(beacons_data[(beacons_data.part_id == value) & (beacons_data.room == "Livingroom")]) 
    kitchen[value] = len(beacons_data[(beacons_data.part_id == value) & (beacons_data.room == "Kitchen")]) 
    person_dict['part_id'].append(int(value))
    person_dict['Bedroom'].append(divide(bedroom[value],records[value]))
    person_dict['Bathroom'].append(divide(bathroom[value],records[value]))
    person_dict['Livingroom'].append(divide(livingroom[value],records[value]))
    person_dict['Kitchen'].append(divide(kitchen[value],records[value]))

new_beacons_data = pd.DataFrame(person_dict)
print(len(new_beacons_data))

291


In [174]:
merged_dataset= pd.merge(new_beacons_data, clinical_data)

### Clustering

In [192]:
kmeans = KMeans(n_clusters=3, random_state=0).fit(merged_dataset)
labels = kmeans.labels_
score = silhouette_score(merged_dataset, labels)
print(score)

0.7351198455195658


In [196]:
X = PCA(n_components=5).fit_transform(merged_dataset)
kmeans = KMeans(n_clusters=3, random_state=0).fit(X)
labels = kmeans.labels_
score = silhouette_score(X, labels)
print(score)

0.7418672857603282
