Skip to content

Commit

Permalink
Merge branch 'christoph' into noah
Browse files Browse the repository at this point in the history
  • Loading branch information
noahluech committed Dec 2, 2023
2 parents ab86fa7 + 3bda211 commit 1061605
Show file tree
Hide file tree
Showing 4 changed files with 235 additions and 74 deletions.
123 changes: 113 additions & 10 deletions src/data_acquisition/web_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ def __init__(self, url_list, server_name, server_port, database_name, collection
self.collection_name = collection_name
self.server_name = server_name
self.server_port = server_port
self.query_strings = [
(78, 'Active wheelchair'),
(81, 'No limitations'),
(79, 'E-wheelchair'),
(80, 'Stroller'),
(3951, 'Scewo BRO')
]

def download_and_store(self) -> None:
"""
Expand All @@ -53,8 +60,96 @@ def download_and_store(self) -> None:
for url in self.url_list:
self._download_json(url)

# Define a function to replace grade values with accessibility descriptions
def _replace_accessibility(self, grade):
if grade == 1:
return "Completely accessible"
elif grade == 2:
return "Partially accessible"
elif grade == 3:
return "Not easily accessible"
else:
return "Unknown accessibility"

def _traverse_and_replace(self, obj):
if isinstance(obj, dict):
if "accessibility" in obj and "grade" in obj["accessibility"]:
obj["accessibility"]["grade"] = self._replace_accessibility(obj["accessibility"]["grade"])
for value in obj.values():
self._traverse_and_replace(value)
elif isinstance(obj, list):
for item in obj:
self._traverse_and_replace(item)

def _remove_not_needed_keys(self, obj):
list_of_keys = [
"version",
"createdAt",
"updatedAt",
"url",
"accessUrl",
"approval",
"readyForApproval",
"ratingProfileNotice",
"status",
"webUrl",
"resourceUrl",
"changesUrl",
"attributionUrl",
"isOpenData",
"license",
"position",
"mainImage",
"totalClassifications",
"companyAssignment",
"numberOfComments",
"structure",
"areaClassifications"
]

for key in list_of_keys:
if key in obj:
obj.pop(key)

if 'accessibility' in obj and isinstance(obj['accessibility'], dict):
grade_value = obj['accessibility'].get('grade')
obj['accessibility'] = grade_value

def _remove_property_values(self, obj):
if isinstance(obj, dict):
obj.pop("propertyValues", None) # Remove propertyValues if present
for value in obj.values():
self._remove_property_values(value)
elif isinstance(obj, list):
for item in obj:
self._remove_property_values(item)


def _modify_structure(self, obj):
if isinstance(obj, dict):
if "accessibility" in obj and isinstance(obj["accessibility"], dict):
obj["accessibility"] = obj["accessibility"].get("grade", None)

obj.pop("readyForApproval", None)

obj.pop("images", None)

for value in obj.values():
self._modify_structure(value)
elif isinstance(obj, list):
for item in obj:
self._modify_structure(item)

def _move_criterion_values(self, obj):
if "pathClassifications" in obj and isinstance(obj["pathClassifications"], list):
for item in obj["pathClassifications"]:
if "criterion" in item:
criterion_values = item.pop("criterion")
item.update(criterion_values)



def _download_json(self, url : str) -> None:
def _download_json(self, input_url : str) -> None:
"""
This provate function downloads the JSCOn data from a specific internet ressource and initiates the storage in the MongoDB
Expand All @@ -63,15 +158,23 @@ def _download_json(self, url : str) -> None:
url : str
url to the JSON-File
"""
try:
response = requests.get(url)
if response.status_code == 200:
data = response.json()
self.store_in_mongodb(data)
else:
print(f"Failed to download JSON. Status code: {response.status_code}")
except requests.RequestException as e:
print(f"An error occurred: {e}")
for id, description in self.query_strings:
url = input_url + "?rating_profile_id="+str(id)
try:
response = requests.get(url, verify=False)
if response.status_code == 200:
data = response.json()
self._traverse_and_replace(data)
self._remove_not_needed_keys(data)
self._remove_property_values(data)
self._move_criterion_values(data)
self._modify_structure(data)
data["category"] = description
self.store_in_mongodb(data)
else:
print(f"Failed to download JSON. Status code: {response.status_code}")
except requests.RequestException as e:
print(f"An error occurred: {e}")

def store_in_mongodb(self, data : dict) -> None:
"""
Expand Down
15 changes: 15 additions & 0 deletions src/data_handler/data_handler.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pymongo
from bson import ObjectId

class DataHandler:
"""
Expand Down Expand Up @@ -61,6 +62,20 @@ def get_places(self) -> list:
for result in results:
places.append((result['_id'], result.get('name', 'N/A')))
return places

def get_json_values(self, id : str):
# Example ObjectId (replace with your specific _id)
document_id = ObjectId(id)

# Find document by _id
result = self.collection.find_one({"_id": document_id})

if result:
return result
else:
raise Exception("Document not found")



def close_connection(self) -> None:
"""
Expand Down
145 changes: 96 additions & 49 deletions src/json_query_engine/jsons.py
Original file line number Diff line number Diff line change
@@ -1,55 +1,102 @@
json_value = {
"people": [
{
"name": "John Doe",
"age": 30,
"address": {
"city": "New York",
"state": "NY"
json_schema = {
"$schema": "http://json-schema.org/draft-06/schema#",
"$ref": "#/definitions/OpenData",
"definitions": {
"OpenData": {
"type": "object",
"additionalProperties": False,
"properties": {
"_id": {
"$ref": "#/definitions/ID"
},
"id": {
"type": "string",
"format": "uuid"
},
"name": {
"type": "string"
},
"category": {
"type": "string"
},
"accessibility": {
"$ref": "#/definitions/Accessibility"
},
"pathClassifications": {
"type": "array",
"items": {
"$ref": "#/definitions/PathClassification"
}
}
},
"is_student": False,
"hobbies": ["reading", "traveling"]
"required": [
"_id",
"accessibility",
"category",
"id",
"name",
"pathClassifications"
],
"title": "OpenData"
},
{
"name": "Anna Smith",
"age": 25,
"address": {
"city": "San Francisco",
"state": "CA"
"ID": {
"type": "object",
"additionalProperties": False,
"properties": {
"$oid": {
"type": "string"
}
},
"is_student": True,
"hobbies": ["painting", "gardening"]
}
]
}

json_schema = {
"type": "object",
"properties": {
"people": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
"address": {
"type": "object",
"properties": {
"city": {"type": "string"},
"state": {"type": "string"}
},
"required": ["city", "state"]
},
"is_student": {"type": "boolean"},
"hobbies": {
"type": "array",
"items": {"type": "string"}
}
"required": [
"$oid"
],
"title": "ID"
},
"PathClassification": {
"type": "object",
"additionalProperties": False,
"properties": {
"id": {
"type": "integer"
},
"presetKey": {
"type": "string"
},
"name": {
"type": "string"
},
"accessibility": {
"$ref": "#/definitions/Accessibility"
},
"required": ["name", "age", "address", "is_student", "hobbies"]
}
"key": {
"type": "string"
},
"description": {
"type": "string"
},
"isActive": {
"type": "boolean"
}
},
"required": [
"accessibility",
"description",
"id",
"isActive",
"key",
"name",
"presetKey"
],
"title": "PathClassification"
},
"Accessibility": {
"type": "string",
"enum": [
"Completely accessible",
"Not easily accessible",
"Partially accessible"
],
"title": "Accessibility"
}
},
"required": ["people"]
}
}
26 changes: 11 additions & 15 deletions src/main.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,28 @@
from settings import ProjectSettings
from json_query_engine import QueryEngine
from json_query_engine import json_value, json_schema
from json_query_engine import json_schema
from data_handler import DataHandler
from dash_app import OpenDataDashApp
if __name__ == '__main__':
print("Project entry point")
# Usage
config = ProjectSettings.get_instance()

# Accessing the variables
print(config.OPENAI_API_KEY)
print(config.DB_SERVER)
print(config.DB_PORT)
print(config.DB_NAME)
print(config.COLLECTION_NAME)

#query_engine = QueryEngine(config.OPENAI_API_KEY, json_value, json_schema)
#print(query_engine.get_nl_response("How old is Anna Smith?"))
"""
query_engine = QueryEngine(config.OPENAI_API_KEY, json_value, json_schema)
print(query_engine.get_nl_response("What is the accessibility of the Foyer?"))
#print(query_engine.get_raw_response("How old is Anna Smith?"))
"""
data_handler = DataHandler(config.DB_SERVER, config.DB_PORT, config.DB_NAME, config.COLLECTION_NAME)
places = data_handler.get_places()

"""dash_places = []
for id, place in places:
print(place)
dash_places.append(place)"""

try:
json_document = data_handler.get_json_values('656b62ad6fada3f2c0b08d5c')
query_engine = QueryEngine(config.OPENAI_API_KEY, json_document, json_schema)
print(query_engine.get_nl_response("Quel est le degré d'accessibilité de l'escalier du Naturmuseum St. Gallen?"))
except Exception as e:
print(e)
data_handler.close_connection()

#open_data_app = OpenDataDashApp(dash_places)
Expand Down

0 comments on commit 1061605

Please sign in to comment.