In [53]:
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
import joblib
import pandas as pd
import math

# Load the saved model
model = tf.keras.models.load_model('model.h5')

# Load the scaler
scaler_filename = 'scaler.save'
scaler = joblib.load(scaler_filename)

# Load the dataset with pandas
dataset = pd.read_csv('dataset.csv')

In [54]:
def calculate_distance(lat1, lon1, lat2, lon2):
    R = 6371000  # Earth's radius in meters

    # Convert latitude and longitude from degrees to radians
    lat1_rad = math.radians(lat1)
    lon1_rad = math.radians(lon1)
    lat2_rad = math.radians(lat2)
    lon2_rad = math.radians(lon2)

    # Calculate differences between latitudes and longitudes
    delta_lat = lat2_rad - lat1_rad
    delta_lon = lon2_rad - lon1_rad

    # Apply Haversine formula
    a = math.sin(delta_lat/2) * math.sin(delta_lat/2) + math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(delta_lon/2) * math.sin(delta_lon/2)
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    distance = R * c

    return distance

In [77]:
def predict(dataset, scaler, model, category, domisile, latitude, longitude):

    # Filter the dataset by category and domisile
    dataset = dataset[(dataset['kategori'] == category) & (dataset['kota_administrasi'] == domisile)]

    # Get the features
    features = dataset[['nama_toko', 'latitude', 'longitude', 'rerata_rating', 'jumlah_rating']]

    # Create a new column for distance
    features['jarak'] = features.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)

    # Drop the latitude and longitude columns and move jarak to after nama_toko
    features = features.drop(['latitude', 'longitude'], axis=1)
    features = features[['nama_toko', 'jarak', 'rerata_rating', 'jumlah_rating']]

    # Drop and save the nama_toko column, jarak, rerata_rating, and jumlah_rating columns to a variable
    dropped_features = features[['nama_toko', 'jarak', 'rerata_rating', 'jumlah_rating']]
    features = features.drop(['nama_toko'], axis=1)

    # Rename the columns to distance(meters), rating_overall, rating_count
    features = features.rename(columns={'jarak': 'distance(meters)', 'rerata_rating': 'rating_overall', 'jumlah_rating': 'rating_count'})

    # Normalize the features
    features_scaled = scaler.transform(features)

    # Make predictions, return them as a dataframe with the nama_toko column and the predictions column, avoiding NaN values for nama_toko
    predictions = pd.DataFrame(model.predict(features_scaled), columns=['predictions'])
    predictions = pd.concat([dropped_features.reset_index(drop=True), predictions], axis=1)

    # Reformat the predictions column to 2 decimal places
    predictions['predictions'] = predictions['predictions'].apply(lambda x: round(x, 2))

    # Sort the predictions by the predictions column in descending order
    predictions = predictions.sort_values(by=['predictions'], ascending=False)

    return predictions

    

In [79]:
# Set the category, domisile, latitude, and longitude
category = 'emas'
domisile = 'JAKARTA PUSAT'
latitude = -6.221877
longitude = 106.846261

# Make predictions
predictions = predict(dataset, scaler, model, category, domisile, latitude, longitude)

# Print the dataframe
predictions



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  features['jarak'] = features.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


Unnamed: 0,nama_toko,jarak,rerata_rating,jumlah_rating,predictions
5,Damung,6118.022591,4.5,20,0.96
6,Rokhani,6108.078807,3.1,9,0.07
0,Tukiyem,6101.022606,1.4,27,0.0
1,Sisrianto,6106.475076,0.1,34,0.0
2,Wawan Hermawan,6099.632814,1.1,32,0.0
3,Parmi,6109.254896,1.5,19,0.0
4,Painem,6109.361814,0.8,32,0.0
7,Dewi Utari,6117.48795,2.1,19,0.0
