In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
import pandas as pd

# Load the CSV file into a DataFrame
file_path = '/content/drive/MyDrive/node_features_df.csv'
data = pd.read_csv(file_path)

# Extract Cluster IDs from the first column
cluster_ids = data.iloc[:, 0].tolist()

# Extracting longitude and latitude as pairs
cluster_centers = list(zip(data.iloc[:, 3], data.iloc[:, 2]))

print("Cluster IDs:")
print(cluster_ids)

print("Cluster Centers (latitude, longitude):")
print(cluster_centers)

Cluster IDs:
[100, 8, 78, 56, 131, 22, 6, 15, 80, 45, 0, 19, 33, 14, 63, 135, 145, 27, 107, 44, 26, 104, 52, 108, 77, 41, 60, 75, 72, 42, 36, 7, 23, 97, 133, 73, 129, 4, 24, 89, 84, 18, 106, 48, 31, 46, 5, 21, 91, 25, 29, 120, 55, 43, 35, 109, 118, 2, 127, 69, 124, 148, 105, 139, 134, 79, 110, 101, 11, 12, 65, 86, 28, 9, 117, 16, 90, 111, 59, 136, 121, 95, 122, 10, 141, 70, 116, 66, 68, 102, 138, 88, 64, 47, 126, 67, 98, 132, 34, 113, 38, 130, 53, 76, 50, 83, 58, 39, 51, 93, 92, 140, 99, 37, 3, 20, 87, 85, 54, 32, 40, 96, 144, 71, 137, 17, 149, 13, 61, 62, 112, 81, 119, 74, 49, 1, 114, 123, 128, 146, 143, 30, 147, 115, 142, 103, 82, 94, 57, 125]
Cluster Centers (latitude, longitude):
[(77.22234933571428, 28.63248719523809), (77.21414228421052, 28.6413942), (77.22068524888888, 28.63039345111111), (77.2101978904762, 28.640264495238096), (77.212547725, 28.640693075), (77.2214069054054, 28.6346417), (77.22693740789474, 28.60064228421053), (77.22517948888888, 28.58416395555556), (77.2384869

In [8]:
from scipy.spatial.distance import cdist
import numpy as np

num_metro_stations = 20

# Generating random offsets (perturbations) for metro stations
np.random.seed(42)  # For reproducibility
perturbation_range = 0.1  # The max perturbation for latitude and longitude

# Generating metro stations by adding random perturbations to the cluster centers
metro_stations = []
for _ in range(num_metro_stations):
    # Choosing a random cluster center
    cluster_idx = np.random.choice(range(len(cluster_centers)))
    cluster_center = cluster_centers[cluster_idx]

    # Generating a perturbation for the metro station location
    perturbation = np.random.uniform(-perturbation_range, perturbation_range, 2)
    metro_station = (cluster_center[0] + perturbation[0], cluster_center[1] + perturbation[1])

    # Adding the metro station's coordinates to the list
    metro_stations.append(metro_station)

# Converting the list of metro station coordinates into a DataFrame
metro_stations_df = pd.DataFrame(metro_stations, columns=["Latitude", "Longitude"])

# Assigning a cluster ID to each metro station based on the nearest cluster center
def assign_cluster_id(metro_station, cluster_centers, cluster_ids):
    distances = cdist([metro_station], cluster_centers, metric='euclidean')
    nearest_cluster_idx = np.argmin(distances)
    return cluster_ids[nearest_cluster_idx]

# Assigning cluster IDs to metro stations
metro_stations_df["Cluster_ID"] = metro_stations_df.apply(lambda row: assign_cluster_id((row["Latitude"], row["Longitude"]), cluster_centers, cluster_ids), axis=1)

print("Generated Metro Stations with Assigned Cluster IDs:")
print(metro_stations_df)

Generated Metro Stations with Assigned Cluster IDs:
     Latitude  Longitude  Cluster_ID
0   77.233674  28.514172          68
1   77.211763  28.582907          35
2   77.211836  28.530805         117
3   77.207021  28.475765         117
4   77.258542  28.729105          65
5   77.136482  28.556296          67
6   77.252062  28.554474          88
7   77.202033  28.548760          30
8   77.176786  28.697818         146
9   77.216686  28.666085         115
10  77.268604  28.619339          39
11  77.153500  28.691849          74
12  77.220060  28.551632         117
13  77.108058  28.613058         103
14  77.212488  28.587866          35
15  77.132057  28.666028          74
16  77.221439  28.529817         117
17  77.104074  28.676178          74
18  77.216227  28.714755          58
19  77.234176  28.612456         119
