In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report


In [2]:
df = pd.read_csv("vibe_dataset_lat_lon_vibe.csv")

print("Dataset Head:")
print(df.head())

Dataset Head:
   latitude  longitude       vibe
0   22.5726    88.3639       city
1   27.0410    88.2663   mountain
2   21.6278    87.5336      beach
3   21.9497    89.1833     forest
4   26.7271    88.3953  hill-town


In [7]:
# Remove any rows with missing latitude or longitude
df = df.dropna(subset=['latitude', 'longitude', 'vibe'])


In [8]:
X = df[['latitude', 'longitude']]
y = df['vibe']

# Encode vibe labels (text → numbers)
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

In [9]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42
)

In [13]:
knn = KNeighborsClassifier(
    n_neighbors=3,       # you can tune this
    metric='haversine'   # works best for geo-coordinates
)

# ⚠️ Haversine requires radians
import numpy as np
X_train_rad = np.radians(X_train)
X_test_rad = np.radians(X_test)

# Train
knn.fit(X_train_rad, y_train)

In [14]:
y_pred = knn.predict(X_test_rad)

print("\nAccuracy:", accuracy_score(y_test, y_pred))

labels = np.unique(y_test)                         # only labels present in test set
label_names = encoder.inverse_transform(labels)    # convert them back to strings

print("\nClassification Report:")
print(classification_report(
    y_test, 
    y_pred, 
    labels=labels, 
    target_names=label_names
))


Accuracy: 0.7727272727272727

Classification Report:
              precision    recall  f1-score   support

   backwater       1.00      1.00      1.00         1
       beach       0.50      0.67      0.57         3
        city       0.67      0.33      0.44         6
     coastal       1.00      1.00      1.00         1
coastal-city       0.50      0.50      0.50         2
    cultural       0.33      0.50      0.40         2
      desert       1.00      1.00      1.00         6
      forest       1.00      1.00      1.00         1
    heritage       0.73      0.79      0.76        14
   hill-town       1.00      0.75      0.86         4
    mountain       1.00      0.88      0.93         8
       rural       0.60      0.43      0.50         7
   spiritual       0.50      1.00      0.67         1
 temple-town       0.25      0.50      0.33         2
       urban       0.87      0.90      0.89        30

    accuracy                           0.77        88
   macro avg       0.73   

In [17]:
def predict_vibe(lat, lon):
    coords = pd.DataFrame([[lat, lon]], columns=['latitude', 'longitude'])
    coords_rad = np.radians(coords)
    pred = knn.predict(coords_rad)[0]
    return encoder.inverse_transform([pred])[0]

print("Example:", predict_vibe(23.6739, 86.9524))

Example: industrial


In [18]:
import pickle

model_bundle = {
    "model": knn,
    "encoder": encoder
}

with open("vibe_knn_model.pkl", "wb") as f:
    pickle.dump(model_bundle, f)
