## Machine Learning (involving clustering (K-means) and optimization (Linear Sum Assignment)) Algorithmic approach for solving the Room-mate allocation problem 

### Imports - 

In [9]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from scipy.optimize import linear_sum_assignment

### Data Loading - 

In [10]:
data = pd.read_excel("C:/Users/DEEPAK/Downloads/Machine-Learning-approach-to-Roommate-Flat-mate-allocation-algorithm-for-BX-students-main/Sample_Data.xlsx")


### Print the labels for debugging.

In [11]:
file_path = 'C:/Users/DEEPAK/Downloads/Machine-Learning-approach-to-Roommate-Flat-mate-allocation-algorithm-for-BX-students-main/Sample_Data.xlsx'
print(data.columns)

Index(['Name, Surname', 'Sex', 'What language(s) do you speak?',
       'What is your sleeptime (weekdays)?',
       'What is your sleeptime (weekend)?', 'Does noise bother you?',
       'If at night from what time. Noise, if night what time',
       'How often are you willing to clean the common area?',
       'How do you rate your sharing habits?',
       'Do you clean your dishes right after using them?',
       'Do you mind if your roommate invites people to your flat?',
       'Would you invite people to your flat?',
       'How do you handle disagreements?', 'Are you a party-person?'],
      dtype='object')


### Additional functions for cleaning data 

In [None]:
''' 

# Normalize Yes/No responses to lowercase - for the 'Selecting the options' response types in the form.
def normalize_yes_no(value):
    return value.strip().lower()

# Normalize languages by removing spaces
def normalize_languages(languages):
    return [lang.strip().lower() for lang in languages.split(',')]
    
    '''

### Label Handling -

In [12]:
data['Cleanliness'] = data['How often are you willing to clean the common area?'].apply(
    lambda x: 5 if x == 'Every day' else (
        4 if x == 'Few days per week' else (
            3 if x == '1 day per week' else 0
        )
    )
)
data['Dishes'] = data['Do you clean your dishes right after using them?'].apply(lambda x: 1 if x == 'Yes' else 0)
def noise_score(row):
    if row['Does noise bother you?'] == 'During the day':
        return 1
    elif row['Does noise bother you?'] == 'At night':
        time = row['If at night from what time. Noise, if night what time']
        return {'9pm': 2, '10pm': 3, '11pm': 4, 'midnight': 5, 'after midnight': 6}.get(time, 0)
    else:
        return 0

data['Noise Tolerance'] = data.apply(noise_score, axis=1)
data['Party Person'] = data['Are you a party-person?'].apply(lambda x: 1 if x == 'yes' else 0)
data['Sleeptime Weekdays'] = data['What is your sleeptime (weekdays)?'].apply(
    lambda x: {'8 to 9pm': 1, '9 to 10pm': 2, '10 to 11pm': 3, '11 to midnight': 4, 'midnight to 2 am': 5, 'after 2am': 6}.get(x, 3)
)
data['Sleeptime Weekends'] = data['What is your sleeptime (weekend)?'].apply(
    lambda x: {'8 to 9pm': 1, '9 to 10pm': 2, '10 to 11pm': 3, '11 to midnight': 4, 'midnight to 2 am': 5, 'after 2am': 6}.get(x, 3)
)
data['Mind Invites'] = data['Do you mind if your roommate invites people to your flat?'].apply(lambda x: 1 if x == 'yes' else 0)
data['Invite People'] = data['Would you invite people to your flat?'].apply(lambda x: 1 if x == 'Yes' else 0)
data['Handle Disagreements'] = data['How do you handle disagreements?'].apply(lambda x: {'Mediated discussion': 1, 'Confrontation': 2}.get(x, 0))
data['Sharing Habits'] = data['How do you rate your sharing habits?'] 

### Weights -

In [13]:
#the more the weight, the more likely those people to be grouped together by the algo.
#Think of it as a grouping together algorithmic structure.
weights = {
    'Cleanliness': 6, #I could have put 7 too here but I felt like it would have been too heavy...
    'Dishes': 3,
    'Sleeptime Weekdays': 4,
    'Sleeptime Weekends': 4,
    'Noise Tolerance': 3,
    'Party Person': 3,
    'Mind Invites': 3,
    'Invite People': 2,
    'Handle Disagreements': 2,
    'Sharing Habits': 3,  
}

### Calculating compatibility scores - 

In [14]:
def calculate_compatibility_score(student1, student2):
    score = 0
    for factor in weights:
        score += weights[factor] * (student1[factor] == student2[factor])
    return score

### Implementing/Evaluating the Compatibility Matrices - 

In [15]:
num_students = len(data)
compatibility_matrix = np.zeros((num_students, num_students))

for i in range(num_students):
    for j in range(num_students):
        if i != j:
            compatibility_matrix[i, j] = calculate_compatibility_score(data.iloc[i], data.iloc[j])

### Clustering - 

In [16]:
kmeans = KMeans(n_clusters=num_students // 4).fit(compatibility_matrix)
data['Cluster'] = kmeans.labels_

### Forming the groups of people (to be filled into the appartments), finally the good part - 

In [18]:
groups = {}
for i in range(0, len(data), 4):
    group = []
    for j in range(4):
        if i + j < len(data):
            group.append(data.iloc[i + j]['Name, Surname'])
    groups[f'Group {i // 4 + 1}'] = group


### Print the groups!

In [19]:
for group, members in groups.items():
    print(f"{group}: {', '.join(members)}")

Group 1: John Doe, Jane Smith, Alice Johnson, Bob Brown
Group 2: Charlie Davis, Eve Wilson, Frank Harris, Grace Lee


### Save data to a newly generated Excel format file with a name of your choice! 

In [20]:
output = pd.DataFrame.from_dict(groups, orient='index').transpose()
output.to_excel('room_allocation_results.xlsx', index=False)