In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MinMaxScaler
import joblib
import tensorflow as tf
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [None]:
import pandas as pd

df = pd.read_csv('/content/hidden_gem_dataset.csv')
df

Unnamed: 0,name,price_level,rating,types,user_ratings_total,place_id,hidden_gem
0,Cinema Bakery,2.0,4.5,"['bakery', 'food', 'point_of_interest', 'store...",2732,ChIJJ9EYdFj_ei4Rwcr9mZCBmas,1
1,GEMAHRIPAH78 WAROENG JOGJAKARTA,2.0,4.8,"['restaurant', 'food', 'point_of_interest', 'e...",292,ChIJHVFenDJbei4RqP0tu3iSG88,1
2,Warung Tuman BSD,2.0,4.5,"['restaurant', 'point_of_interest', 'food', 'e...",3412,ChIJ3QHJ3A7laS4RnE--2dQVeJ0,1
3,wagokoro Jakal,,4.7,"['restaurant', 'food', 'point_of_interest', 'e...",307,ChIJI_CEzvpZei4ROnW18peB8qY,1
4,MIAMUR Mie Ayam Jamur,,4.5,"['restaurant', 'food', 'point_of_interest', 'e...",198,ChIJ5eVjBsVXei4RyyaBYH8xIvM,1
...,...,...,...,...,...,...,...
186,Royal Steak Citraland Mall,,3.9,"['restaurant', 'food', 'point_of_interest', 'e...",14,ChIJt_wE5ViLcC4RjATI-B32Jyo,0
187,Quest Hotel Simpang Lima Semarang,,4.4,"['lodging', 'restaurant', 'food', 'point_of_in...",9526,ChIJd0gb_VSLcC4RQZ8hdkGgj9M,0
188,Super Penyet,2.0,4.4,"['restaurant', 'food', 'point_of_interest', 'e...",5094,ChIJE8TGGlSLcC4RXfqTxEghLFI,0
189,Platinum Restaurant and Kota Lama Coffee & Ter...,,3.6,"['restaurant', 'food', 'point_of_interest', 'e...",5,ChIJMyUyLar0cC4RUI2MvYcHGtk,0


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 191 entries, 0 to 190
Data columns (total 7 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   name                191 non-null    object 
 1   price_level         78 non-null     float64
 2   rating              191 non-null    float64
 3   types               191 non-null    object 
 4   user_ratings_total  191 non-null    int64  
 5   place_id            191 non-null    object 
 6   hidden_gem          191 non-null    int64  
dtypes: float64(2), int64(2), object(3)
memory usage: 10.6+ KB


In [None]:
features = ['rating', 'user_ratings_total']
target = 'hidden_gem'

In [None]:
X = df[features]
y = df[target]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
scaler = MinMaxScaler()
X_train[features] = scaler.fit_transform(X_train[features])

joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

In [None]:
def train_models(X_train, y_train):
    models = {
        'Random Forest': RandomForestClassifier(),
        'SVM': SVC(),
        'Logistic Regression': LogisticRegression(),
        'K-Nearest Neighbors': KNeighborsClassifier()
    }

    trained_models = {}

    for name, model in models.items():
        model.fit(X_train, y_train)
        trained_models[name] = model

    return trained_models

In [None]:
trained_models = train_models(X_train, y_train)

trained_models

{'Random Forest': RandomForestClassifier(),
 'SVM': SVC(),
 'Logistic Regression': LogisticRegression(),
 'K-Nearest Neighbors': KNeighborsClassifier()}

In [None]:
scaler = joblib.load('scaler.pkl')

X_test[features] = scaler.transform(X_test[features])

In [None]:
def evaluate_models(trained_models, X_test, y_test):
    results = {}

    for name, model in trained_models.items():
        # print("x_test: ", X_test)
        y_pred = model.predict(X_test)

        accuracy = accuracy_score(y_test, y_pred)
        results[name] = {'Accuracy': accuracy}

        conf_matrix = confusion_matrix(y_test, y_pred)
        results[name]['Confusion Matrix'] = conf_matrix

    return results

In [None]:
evaluate_models(trained_models, X_test, y_test)

{'Random Forest': {'Accuracy': 0.9230769230769231,
  'Confusion Matrix': array([[29,  0],
         [ 3,  7]])},
 'SVM': {'Accuracy': 0.9487179487179487,
  'Confusion Matrix': array([[28,  1],
         [ 1,  9]])},
 'Logistic Regression': {'Accuracy': 0.7435897435897436,
  'Confusion Matrix': array([[29,  0],
         [10,  0]])},
 'K-Nearest Neighbors': {'Accuracy': 0.8974358974358975,
  'Confusion Matrix': array([[29,  0],
         [ 4,  6]])}}

In [None]:
for name, model in trained_models.items():
    if name == 'SVM':
        joblib.dump(model, '/content/svm_model.pkl')

In [None]:
for name, model in trained_models.items():
    if name == 'SVM':
        svm_model = model

In [None]:
testing_data = pd.DataFrame([3.5,67], columns=['rating', 'user_ratings_total'])
svm_model.predict('rating':0.88, 'user_ratings_total':0.003534)

array([0])