## Hotels Challenge II.

given a database of hotels, and a set of input coordinates, price ranges and star ratings, for each coordinate-pricerange-startrating combination, find the hotel closest to the coordinates with the given start rating and within the price range. if no hotel fits in the price range star rating combination, return the dict `{"missing": True}`

a solution is represented the same way as in [challenge one](../../challenge-1-hotels/draft_notebooks/challenge-draft-v0.ipynb), input sizes are the same as well, outputs now must include star-rating and price as well

### install package for data downloading and evaluation

In [3]:
!pip install --upgrade git+https://github.com/endreMBorza/jkg_evaluators

Collecting git+https://github.com/endreMBorza/jkg_evaluators
  Cloning https://github.com/endreMBorza/jkg_evaluators to c:\users\asus\appdata\local\temp\pip-req-build-lnu0wylo
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
Building wheels for collected packages: jkg-evaluators
  Running setup.py bdist_wheel for jkg-evaluators: started
  Running setup.py bdist_wheel for jkg-evaluators: finished with status 'done'
  Stored in directory: C:\Users\asus\AppData\Local\Temp\pip-ephem-wheel-cache-1q9suk_w\wheels\2f\44\31\28bfdabae65bda2f313ba79aaa14214c387f01f77651aaa2ff
Successfully built jkg-evaluators
Installing collected packages: jkg-evaluators
  Found existing installation: jkg-evaluators 0.0.2
    Uninstalling jkg-evaluators-0.0.2:
      Successfully uninstalled jkg-evaluators-0.0.2
Successfully installed jkg-evaluators-0.0.2


  Missing build requirements in pyproject.toml for git+https://github.com/endreMBorza/jkg_evaluators.
  The project does not specify a build backend, and pip cannot fall back to setuptools without 'wheel'.


In [12]:
from jkg_evaluators.challenges.data.hotels import get_hotel_data, dump_hotel_filter_input
import shutil
import os

### download practice data

In [2]:
get_hotel_data()

### select one and move to notebook root

In [3]:
data_size_to_copy = 10000
shutil.copyfile(os.path.join("data", 
                             f"{data_size_to_copy}.csv"), 
                "data.csv")

'data.csv'

### generate some inputs

In [13]:
dump_hotel_filter_input(size=10, path="inputs.json")

## base solution ETL

In [5]:
%%time
import pandas as pd

data_file_path = "data.csv"

df = pd.read_csv(data_file_path)

df.drop_duplicates().assign(
    price=lambda _df: _df["current-price"]
    .str[1:]
    .str.replace(",", "")
    .astype(float)
).loc[:, ["lon", "lat", "name", "stars", "price"]].to_pickle("filtered.pkl")

Wall time: 220 ms


## base solution process

In [6]:
%%time
import pandas as pd
import numpy as np
import json

input_dicts = json.load(open("inputs.json", "r"))

df = pd.read_pickle("filtered.pkl")

min_distances = [np.inf] * len(input_dicts)

answers = [{"missing": True}] * len(input_dicts)

for idx, row in df.iterrows():

    for input_idx, input_dict in enumerate(input_dicts):
        if row["stars"] != input_dict["stars"]:
            continue
        if (row["price"] > input_dict["max_price"]) or (
            row["price"] < input_dict["min_price"]
        ):
            continue
        distance = (
            (input_dict["lon"] - row["lon"]) ** 2
            + (input_dict["lat"] - row["lat"]) ** 2
        ) ** 0.5
        if distance < min_distances[input_idx]:
            min_distances[input_idx] = distance
            answers[input_idx] = row[["lon", "lat", "name", "stars", "price"]].to_dict()

json.dump(answers, open("outputs.json", "w"))


Wall time: 3.88 s


In [6]:
import pandas as pd

data_file_path = "data.csv"

df = pd.read_csv(data_file_path)

df.loc[:, ['lon','lat','name', 'current-price', 'stars']].to_csv('filtered.csv',index=None)

df['current-price'].replace(regex=True,inplace=True,to_replace=r'\D',value=r'')
df['current-price'] = df['current-price'].astype('int64')
df = df.sort_values(by = 'current-price')
df.drop_duplicates(inplace = True)
df.dropna(how = "all")

starunique = sorted(df['stars'].unique())
stardict = {elem : pd.DataFrame() for elem in starunique}

for key in stardict.keys():
    stardict[key] = df[df['stars'] == key]

In [None]:
input_dicts = json.load(open("inputs.json", "r"))

In [43]:
answers = [{"missing": True}] * len(input_dicts)

for input_idx, input_dict in enumerate(input_dicts):
    try:
        stardict_tmp = stardict[input_dict["stars"]]
    except:
        pass
    df_tmp = (stardict_tmp['current-price'] >= input_dict["min_price"]) &\
    (stardict_tmp['current-price'] <= input_dict["max_price"])

# flask próba

In [10]:
from flask import Flask
app = Flask(__name__)

@app.route('/')
def hello_world():
    return 'Hello, World!'

In [None]:
import subprocess
import os
import requests
import time

FNULL = open(os.devnull, 'w')
proc = subprocess.Popen(["python", "flask_prep.py"], stderr=FNULL, stdout=FNULL)

print("STARTING proc pid: ", proc.pid)

while True:
    try:
        time.sleep(1)
        requests.get("http://127.0.0.1:5113/started")
        time.sleep(4)
        break
    except Exception as e:
        print(f"ERROR: ({type(e)}) -  {e}")

In [3]:
import json
import pandas as pd
import numpy as np
from flask import Flask
from flask import request
from flask import current_app
from sklearn.neighbors import KDTree

app = Flask(__name__)


@app.route("/started")
def started():
    return "FING"


@app.route("/")
def solution():
    input_json = json.load(open("inputs.json", "r"))
    result = current_app.tree.query([[r["lon"], r["lat"]] for r in input_json])
    indexes = [x[0] for x in result[1]]
    out = [{"lon": x[0], "lat": x[1], "name": x[2]} for x in current_app.arr[indexes, :]]
    json.dump(out, open("outputs.json", "w"))
    return "FING"


def shutdown_server():
    func = request.environ.get("werkzeug.server.shutdown")
    if func is None:
        raise RuntimeError("Not running with the Werkzeug Server")
    func()


@app.route("/shutdown")
def shutdown():
    shutdown_server()
    return "Server shutting down..."


app.dfo = (
    pd.read_csv("data.csv")
    .loc[:, ["lon", "lat", "name"]]
    .dropna(how="any")
    .drop_duplicates()
)
app.dfo = (
    pd.read_csv("data.csv")
    .loc[:, ['lon','lat','name', 'current-price', 'stars']]
    .sort_values(by = 'current-price') #df.sort_values(by = 'current-price')
    .drop_duplicates(inplace = True)
)


app.dfo['stars'].unique().sort() #app.starunique = sorted(app.dfo['stars'].unique())
app.stardict = {elem : pd.DataFrame() for elem in app.starunique}

for key in app.stardict.keys():
    app.stardict[key] = app.dfo[app.dfo['stars'] == key]


app.coords = np.array(app.dfo[["lon", "lat"]])
app.arr = np.array(app.dfo[["lon", "lat", "name"]])
app.tree = KDTree(app.coords, leaf_size=10)


if __name__ == "__main__":
    app.run(debug=True, port=5113)


TypeError: 'NoneType' object is not subscriptable

In [None]:
!python

In [None]:
-------------------------------------------------------

In [8]:
import subprocess
import os
import requests
import time

FNULL = open(os.devnull, 'w')
se = open("se.txt", "w")
so = open("so.txt", "w")

proc = subprocess.Popen(["python", "flask_prep.py"], stderr=se, stdout=so)

print("STARTING proc pid: ", proc.pid)

STARTING proc pid:  2568


In [7]:
requests.get("http://127.0.0.1:5113/started")

<Response [200]>

In [2]:
while True:
    try:
        time.sleep(3)
        requests.get("http://127.0.0.1:5113/started")
        time.sleep(4)
        break
    except Exception as e:
        print(f"ERROR: ({type(e)}) -  {e}")

ERROR: (<class 'requests.exceptions.ConnectionError'>) -  HTTPConnectionPool(host='127.0.0.1', port=5113): Max retries exceeded with url: /started (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x00000156111B9320>: Failed to establish a new connection: [WinError 10061] Nem hozható létre kapcsolat, mert a célszámítógép már visszautasította a kapcsolatot'))
ERROR: (<class 'requests.exceptions.ConnectionError'>) -  HTTPConnectionPool(host='127.0.0.1', port=5113): Max retries exceeded with url: /started (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x00000156111B9B38>: Failed to establish a new connection: [WinError 10061] Nem hozható létre kapcsolat, mert a célszámítógép már visszautasította a kapcsolatot'))


KeyboardInterrupt: 

In [56]:
import json
import pandas as pd
import numpy as np
from flask import Flask
from flask import request
from flask import current_app
from sklearn.neighbors import KDTree

app = Flask(__name__)


@app.route("/started") #az eredeti URL mögé localhost:5113 ha beírod a /started-et, akkor kiírja, hogy fing
def started():         #azért kell csak, hogy éecsekkoljuk, hogy működik-e a szerver
    return "FING"


@app.route("/")        #ha megpróbálom a porton lévő szervert elérni localhost:5113, akkor ez fut le 
def solution():        #mivel a proccess.py helyén ez van, szóval a teszt fv. ezt hívja meg, így kapcsolódik össze
    input_dicts = json.load(open("inputs.json", "r"))
    min_distances = [np.inf] * len(input_dicts)
    answers = [{"missing": True}] * len(input_dicts)
    for input_idx, input_dict in enumerate(input_dicts):
        try:
            stardict_tmp = app.stardict[input_dict["stars"]]
        except:
            continue
        for idx, row in stardict_tmp.iterrows():
            if (row["current-price"] < input_dict["max_price"]) and (row["current-price"] > input_dict["min_price"]):
                distance = ((input_dict["lon"] - row["lon"]) ** 2 + (input_dict["lat"] - row["lat"]) ** 2)
                if distance < min_distances[input_idx]:
                    min_distances[input_idx] = distance
                    answers[input_idx] = row[["lon", "lat", "name", "stars", "price"]].to_dict()
    json.dump(answers, open("outputs.json", "w"))


def shutdown_server(): #fv definíció, amit később meghívunk, hogy lője le a szervert
    func = request.environ.get("werkzeug.server.shutdown")
    if func is None:
        raise RuntimeError("Not running with the Werkzeug Server")
    func()


@app.route("/shutdown") 
def shutdown():
    shutdown_server()      #itt hívja meg az fv-t, hogy lelője a szerót
    return "Server shutting down..."



app.dfo = pd.read_csv("data.csv")
app.dfo.loc[:, ['lon','lat','name', 'current-price', 'stars']]

app.dfo['current-price'].replace(regex=True,inplace=True,to_replace=r'\D',value=r'')
app.dfo['current-price'] = app.dfo['current-price'].astype('int64')
app.dfo = app.dfo.sort_values(by = 'current-price')
app.dfo.drop_duplicates(inplace = True)
app.dfo.dropna(how = "all")


app.starunique = sorted(app.dfo['stars'].unique())
app.stardict = {elem : pd.DataFrame() for elem in app.starunique}

for key in app.stardict.keys():
    app.stardict[key] = app.dfo[app.dfo['stars'] == key]
    

if __name__ == "__main__":
    app.run(debug=True, port=5113)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: on


 * Restarting with stat


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
