# import

In [8]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
import numpy as np

# dataset

In [9]:
# constants for distance conversion
km_per_latitude = 110.574
km_per_longitude = 111.320  # this value changes with latitude: 111.320*cos(latitude)

# user data
user_data = {
    'UserID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'Latitude': [25.2849, 4.5970, 5.3302, 10.1234, 15.6789, 20.4321, 18.5678, 28.9876, 32.1098, 37.2468],
    'Longitude': [55.3047, 101.0901, 103.1408, 98.7654, 95.4321, 88.8765, 77.5432, 66.7890, 59.0123, 50.9876],
    'Preferences': ['Fashion', 'Food', 'Electronics', 'Fashion', 'Food', 'Electronics', 'Fashion', 'Food', 'Electronics', 'Fashion']
}

# discount data
discount_data = {
    'StoreID': [101, 102, 103, 104, 105, 106, 107, 108, 109, 110],
    'Latitude': [25.3049, 4.5970, 5.4202, 37.3368, 15.7389, 20.4321, 18.6478, 28.9876, 32.1908, 37.3368],
    'Longitude': [55.3047, 101.1001, 103.2108, 51.1004, 95.4321, 88.9365, 77.5432, 66.8790, 59.0123, 50.9876],
    'Category': ['Fashion', 'Food', 'Electronics', 'Fashion', 'Food', 'Electronics', 'Fashion', 'Food', 'Electronics', 'Fashion'],
    'Discount': [0.2, 0.15, 0.25, 0.18, 0.12, 0.2, 0.3, 0.22, 0.17, 0.25]
}

# convert data to dataframe
users_df = pd.DataFrame(user_data)
discounts_df = pd.DataFrame(discount_data)

# train

In [10]:
# distances between users and stores in kilometers
def calculate_distance_km(user_coords, store_coords):
    lat_diff_km = abs(user_coords[0] - store_coords[0]) * km_per_latitude
    lon_diff_km = abs(user_coords[1] - store_coords[1]) * km_per_longitude * np.cos(np.radians(user_coords[0]))
    return (lat_diff_km + lon_diff_km)

def train_recommendation_model(users_df, discounts_df):
    distances_km = []
    for _, user_row in users_df.iterrows():
        user_coords = (user_row['Latitude'], user_row['Longitude'])
        user_distances = []
        for _, discount_row in discounts_df.iterrows():
            store_coords = (discount_row['Latitude'], discount_row['Longitude'])
            distance_km = calculate_distance_km(user_coords, store_coords)
            user_distances.append(distance_km)
        distances_km.append(user_distances)

    users_df['Distance_Store_km'] = distances_km

    model = NearestNeighbors(n_neighbors=1, algorithm='ball_tree')
    model.fit(users_df['Distance_Store_km'].to_list())

    return model

# train the model
trained_model = train_recommendation_model(users_df, discounts_df)

# test

In [11]:
def recommend_stores_within_threshold(users_df, discounts_df, user_id, max_lat_diff_km, max_lon_diff_km, model):
    user = users_df.loc[users_df['UserID'] == user_id]
    user_lat, user_lon = user['Latitude'].values[0], user['Longitude'].values[0]

    max_lat_diff_deg = max_lat_diff_km / km_per_latitude
    max_lon_diff_deg = max_lon_diff_km / (km_per_longitude * np.cos(np.radians(user_lat)))

    nearby_stores = discounts_df[
        (abs(discounts_df['Latitude'] - user_lat) <= max_lat_diff_deg) &
        (abs(discounts_df['Longitude'] - user_lon) <= max_lon_diff_deg)
    ]

    return nearby_stores[['StoreID', 'Category', 'Discount']]

def test_recommendation_model(users_df, discounts_df, model, user_ids_to_test, max_lat_diff_km=10, max_lon_diff_km=10):
    for user_id in user_ids_to_test:
        recommended_stores = recommend_stores_within_threshold(users_df, discounts_df, user_id, max_lat_diff_km, max_lon_diff_km, model)
        print(f"Discount within 20 km for User {user_id}:")
        if recommended_stores.empty:
            print("No discount found within the 20 km.")
        else:
            for index, row in recommended_stores.iterrows():
                print(f"StoreID: {row['StoreID']}")
                print(f"Category: {row['Category']}")
                print(f"Discount: {row['Discount']*100:.0f}%")
                user_distance_km = users_df.loc[users_df['UserID'] == user_id, 'Distance_Store_km'].iloc[0][index]
                print(f"Distance: {user_distance_km:.2f} km")
                print()
        print("=====================================")

# test the model
user_ids_to_test = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
test_recommendation_model(users_df, discounts_df, trained_model, user_ids_to_test, max_lat_diff_km=10, max_lon_diff_km=10)

Discount within 20 km for User 1:
StoreID: 101
Category: Fashion
Discount: 20%
Distance: 2.21 km

Discount within 20 km for User 2:
StoreID: 102
Category: Food
Discount: 15%
Distance: 1.11 km

Discount within 20 km for User 3:
StoreID: 103
Category: Electronics
Discount: 25%
Distance: 17.71 km

Discount within 20 km for User 4:
No discount found within the 20 km.
Discount within 20 km for User 5:
StoreID: 105
Category: Food
Discount: 12%
Distance: 6.63 km

Discount within 20 km for User 6:
StoreID: 106
Category: Electronics
Discount: 20%
Distance: 6.26 km

Discount within 20 km for User 7:
StoreID: 107
Category: Fashion
Discount: 30%
Distance: 8.85 km

Discount within 20 km for User 8:
StoreID: 108
Category: Food
Discount: 22%
Distance: 8.76 km

Discount within 20 km for User 9:
StoreID: 109
Category: Electronics
Discount: 17%
Distance: 8.96 km

Discount within 20 km for User 10:
StoreID: 104
Category: Fashion
Discount: 18%
Distance: 19.95 km

StoreID: 110
Category: Fashion
Discount: 2