# Group Forming with K-Means and t-SNE



## Authentication

In [1]:
from google.colab import auth
from oauth2client.client import GoogleCredentials
import gspread

auth.authenticate_user()
gc = gspread.authorize(GoogleCredentials.get_application_default())

## Retrieving Data

In [2]:
worksheet = gc.open('Artificial Intelligence for Resilient Urban Planning (Responses)').sheet1
rows = worksheet.get_all_values()

name        = []
proficiency = []
interest    = []
for i, row in enumerate(rows):
    if i == 0 or i == 1:  # 1 == Efa
        continue
    name.append(row[1])

    p_data = [int(x) for x in row[2:6]]
    proficiency.append(p_data)

    i_data = [int(x) for x in row[6:]]
    interest.append(i_data)

SpreadsheetNotFound: ignored

## t-SNE

In [None]:
!pip install adjustText

In [None]:
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from adjustText import adjust_text

def reduce_dimensions(data, names, perplexity, learning_rate, iterations, cols):

    # Instantiate Algorithm Object
    tsne = TSNE(
        n_components=2,
        perplexity=perplexity,
        learning_rate=learning_rate,
        n_iter=iterations
    )

    # Train Model
    dim_red = tsne.fit_transform(data)
    print(f't-SNE ran for {tsne.n_iter_} iterations.')

    # Show Dimensionally Reduced Data
    fig = plt.figure(figsize=[6, 6])
    plt.scatter(dim_red[:,0], dim_red[:,1], c=cols)

    # Label
    labels = []
    for x, y, text in zip(dim_red[:,0], dim_red[:,1], names):
        labels.append(plt.text(x, y, text))
    adjust_text(labels, force_text=0.05, arrowprops=dict(arrowstyle='-', color='black'))

    plt.show()



# PARAMETERS
CLUSTER_COUNT = 6
KM_ITS        = 100000

PERPLEXITY = 5
LR         = 200
TSNE_ITS   = 10000

# Instantiate Algorithm Object
kmeans = KMeans(
    n_clusters=CLUSTER_COUNT,
    n_init=10,
    max_iter=KM_ITS,
    tol=1e-100
)

# Train Model
interest_k_means = interest.copy()
kmeans.fit(interest_k_means)
print(f'K-Means clustering ran for {kmeans.n_iter_} iterations.')
cols = kmeans.labels_

# t-SNE for Plots
reduce_dimensions(proficiency, name, PERPLEXITY, LR, TSNE_ITS, cols)
reduce_dimensions(interest, name, PERPLEXITY, LR, TSNE_ITS, cols)

# Printing Groups
for i in range(CLUSTER_COUNT):
    print(f'Group {i}:')
    for n, group in zip(name, cols):
        if group == i:
            print(f'  - {n}')

Developed by the City Intelligence Lab, Austrian Institute of Technology GmbH