In [None]:
import folium
from IPython.display import display

countries = [
    "Brazil",
    "Spain",
    "France",
    "Argentina",
    "Uruguay",
    "Colombia",
    "United Kingdom",
    "Paraguay",
    "Germany",
    "Ecuador",
    "Portugal",
    "Italy",
    "Morocco",
    "Egypt",
    "South Korea",
    "Japan",
    "Mexico",
    "Costa Rica",
    "New Zealand",
    "Australia",
]

elo_ratings = [
    1994,
    2150,
    2031,
    2140,
    1922,
    1953,
    2012,
    1799,
    1988,
    1911,
    1988,
    1914,
    1807,
    1668,
    1745,
    1875,
    1817,
    1653,
    1596,
    1736,
]

locations = [
    (-14.2350, -51.9253),  # Brazil
    (40.4637, -3.7492),    # Spain
    (46.6034, 1.8883),     # France
    (-38.4161, -63.6167),  # Argentina
    (-32.5228, -55.7659),  # Uruguay
    (4.5709, -74.2973),    # Colombia
    (55.3781, -3.4360),    # United Kingdom
    (-23.4420, -58.4438),  # Paraguay
    (51.1657, 10.4515),    # Germany
    (-1.8312, -78.1834),   # Ecuador
    (39.3999, -8.2245),    # Portugal
    (41.8719, 12.5674),    # Italy
    (31.7915, -7.0926),    # Morocco
    (26.8206, 30.8025),    # Egypt
    (35.9078, 127.7669),   # South Korea
    (36.2048, 138.2529),   # Japan
    (23.6345, -102.5528),  # Mexico
    (9.7489, -83.7534),    # Costa Rica
    (-40.9006, 174.8860),  # New Zealand
    (-25.2744, 133.7751),  # Australia
]

locationdict = {country: locations[countries.index(country)] for country in countries}

# # plot all countries on a map
# m = folium.Map(location=[20, 20], zoom_start=2)
# for name, rating, location in zip(countries, countriesratings.values(), locations):
#     folium.Marker(
#         location=location,
#         icon=folium.Icon(color="red"),
#         tooltip=f"{name} [{rating}]",
#     ).add_to(m)
# display(m)

In [264]:
import numpy as np
from math import radians, sin, cos, sqrt, atan2

# DISTANCE CALCULATIONS
def haversine(pos1, pos2):
    lat1, lon1 = pos1
    lat2, lon2 = pos2
    # Convert degrees to radians
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    # Haversine formula
    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    R = 6371.0 # Radius of Earth in kilometers
    return R * c # Distance in kilometers

# Prepare the data for clustering
data = np.array([[location[0], location[1]] for country, location in locationdict.items()])
data = [tuple(location) for location in data]
print(data)

[(-14.235, -51.9253), (40.4637, -3.7492), (46.6034, 1.8883), (-38.4161, -63.6167), (-32.5228, -55.7659), (4.5709, -74.2973), (55.3781, -3.436), (-23.442, -58.4438), (51.1657, 10.4515), (-1.8312, -78.1834), (39.3999, -8.2245), (41.8719, 12.5674), (31.7915, -7.0926), (26.8206, 30.8025), (35.9078, 127.7669), (36.2048, 138.2529), (23.6345, -102.5528), (9.7489, -83.7534), (-40.9006, 174.886), (-25.2744, 133.7751)]


In [265]:
def calculate_centroid(cluster):
    if not cluster:
        return (0, 0)
    latitudes = [point[0] for point in cluster]
    longitudes = [point[1] for point in cluster]
    return (sum(latitudes) / len(latitudes), sum(longitudes) / len(longitudes))

def k_cluster(k, data):
    # Points of latitude and longitude
    
    # Forgy Method
    random_indices = list(np.random.choice(len(data), size=k, replace=False))
    centroids = [data[indx] for indx in random_indices]
    clusters = {i: [] for i in range(k)}
    
    # Random Partition
    # random_indices = np.random.randint(0, k, len(data))
    # clusters = {i: [] for i in range(k)}
    # for idx, point in zip(random_indices, data):
    #     clusters[idx].append(point)
    # centroids = [calculate_centroid(cluster) for cluster in clusters.values()]
    
    
    converged = False
    while not converged: 
        clusters = {i: [] for i in range(k)}
        
        for point in data:
            distance_to_centroids = [haversine(point, centroid) for centroid in centroids]
            idx = distance_to_centroids.index(min(distance_to_centroids))
            count = 1
            while len(clusters[idx]) >= len(data)/k:
                idx = sorted(range(len(distance_to_centroids)), key=lambda x: distance_to_centroids[x])[count]
                count += 1
            
            clusters[idx].append(point)
        
        new_centroids = [calculate_centroid(cluster) for cluster in clusters.values()]
        
        converged = (new_centroids == centroids)
        centroids = new_centroids
        
        
        if converged:
            return clusters
            
num_clusters = 5
clusters = k_cluster(num_clusters, data)

# Display clusters on the map
cluster_map = folium.Map(location=[20, 20], zoom_start=2)
colors = ['red', 'blue', 'green', 'purple', 'orange', 'pink'] * (num_clusters // len(['red', 'blue', 'green', 'purple', 'orange', 'pink']) + 1)
colors = colors[:num_clusters]

for cluster_id, points in clusters.items():
    for point in points:
        folium.Marker(
            location=point,
            icon=folium.Icon(color=colors[cluster_id % len(colors)]),
            tooltip=f"Cluster {cluster_id}"
        ).add_to(cluster_map)

display(cluster_map)
