In [1]:
from pymongo import MongoClient

client = MongoClient("localhost", 27017)

# Assign the database to a variable
db = client.NonRelProj 

# Take the collection "Reviews"
reviews = db.Reviews

# Take the collection "MetaData"
meta = db.MetaData

# Convert hours <br>
Convert opening and closing times

The following is a document of the collection MetaData (the one containing the data of businesses). We want to convert the format of 'hour' 
```python
# Example data
restaurant_data = {
    "name": "Kraken Coffee Kahului",
    "category": "Restaurant",
    "hours": [['Thursday', '11AM–8PM'], ['Friday', '11AM–8PM'], 
    ['Saturday', '11AM–8PM'], ['Sunday', 'Closed'], ['Monday', '11AM–8PM'], 
    ['Tuesday', '11AM–8PM'], ['Wednesday', '11AM–8PM']],
    "location": {"type": "Point", "coordinates": [-156.4506136, 20.8882377]}
}
```

The purpose is to have a more structured format for 'hour', we want each document to explicit its opening and closing time for each day and obtain in the end something like this:

[{'day': 'Thursday', 'open_time': '11:00', 'close_time': '20:00'}, {'day': 'Friday', 'open_time': '11:00', 'close_time': '20:00'}, {'day': 'Saturday', 'open_time': '11:00', 'close_time': '20:00'}, {'day': 'Sunday', 'open_time': '99:99', 'close_time': '99:99'}, {'day': 'Monday', 'open_time': '11:00', 'close_time': '20:00'}, {'day': 'Tuesday', 'open_time': '11:00', 'close_time': '20:00'}, {'day': 'Wednesday', 'open_time': '11:00', 'close_time': '20:00'}]


In [2]:
from datetime import datetime

def convert_hours(hours_list):
    if not hours_list or not isinstance(hours_list, list):
        return []  # Return empty list if invalid

    formatted_hours = []
    
    for entry in hours_list:
        if isinstance(entry, dict):  
            # Already formatted, keep as is
            formatted_hours.append(entry)  
            continue  

        if not isinstance(entry, list) or len(entry) != 2:
            print(f"Skipping invalid entry: {entry}")  
            continue  

        day, time_range = entry

        if not isinstance(day, str) or not \
            isinstance(time_range, str):
            print(f"Skipping invalid data format: {entry}")  
            continue  

        # Handle 'Closed' case 
        # -> Assign '99:99' to indicate closure
        if time_range.lower() == "closed":
            formatted_hours.append({
                "day": day,
                "open_time": "99:99",
                "close_time": "99:99"
            })
            continue  

        # Handle 'Open 24 hours' case
        if "24 hours" in time_range.lower():
            formatted_hours.append({
                "day": day,
                "open_time": "00:00",
                "close_time": "23:59"
            })
            continue  

        try:
            # Split by en-dash
            open_time, close_time = time_range.split("–")  
        except ValueError:
            print(f"Skipping invalid time format: {entry}")  
            continue  

        # Ensure open_time gets the same AM/PM 
        # indication as close_time if missing
        if "AM" in close_time or "PM" in close_time:
            if not ("AM" in open_time or "PM" in open_time):
                open_time += "AM" \
                    if "AM" in close_time else "PM"

        # Convert open time
        try:
            if ":" in open_time:
                open_time = datetime.strptime(
                    open_time.strip(), "%I:%M%p").strftime(
                        "%H:%M")
            else:
                open_time = datetime.strptime(
                    open_time.strip(), "%I%p").strftime(
                        "%H:%M")
        except ValueError:
            print(f"Skipping invalid open_time: \
                  {open_time} (close_time is {close_time})")  
            continue  

        # Convert close time
        try:
            if ":" in close_time:
                close_time = datetime.strptime(
                    close_time.strip(), "%I:%M%p").strftime(
                        "%H:%M")
            else:
                close_time = datetime.strptime(
                    close_time.strip(), "%I%p").strftime(
                        "%H:%M")
        except ValueError:
            print(f"Skipping invalid close_time: \
                  {close_time} (open_time is {open_time})")  
            continue  

        formatted_hours.append({
            "day": day,
            "open_time": open_time,
            "close_time": close_time
        })

    return formatted_hours





# Example data with 'hours' in a format like the raw one, let's see the output we get after the conversion
restaurant_data = {
    "name": "Kraken Coffee Kahului",
    "category": "Restaurant",
    "hours": [['Thursday', '11AM–8PM'], 
              ['Friday', '11AM–8PM'], 
              ['Saturday', '11AM–8PM'], 
              ['Sunday', 'Closed'], 
              ['Monday', '11AM–8PM'], 
              ['Tuesday', '11AM–8PM'], 
              ['Wednesday', '11AM–8PM']],
    "location": {"type": "Point", "coordinates": 
                 [-156.4506136, 20.8882377]}
}

# Apply the function
formatted_hours = convert_hours(restaurant_data["hours"])

# Print the output
print(formatted_hours)



[{'day': 'Thursday', 'open_time': '11:00', 'close_time': '20:00'}, {'day': 'Friday', 'open_time': '11:00', 'close_time': '20:00'}, {'day': 'Saturday', 'open_time': '11:00', 'close_time': '20:00'}, {'day': 'Sunday', 'open_time': '99:99', 'close_time': '99:99'}, {'day': 'Monday', 'open_time': '11:00', 'close_time': '20:00'}, {'day': 'Tuesday', 'open_time': '11:00', 'close_time': '20:00'}, {'day': 'Wednesday', 'open_time': '11:00', 'close_time': '20:00'}]


Let's convert the format of hours for all documents

In [3]:
# Initialize a counter for documents not updated
not_updated_count = 0

# Process all documents with the "hours" field
for doc in meta.find({"hours": {"$exists": True}}):
    try:
        # Transform hours
        updated_hours = convert_hours(doc["hours"])  
        
        if updated_hours:
            # Only update if we have valid hours  
            meta.update_one({"_id": doc["_id"]}, 
                            {"$set": {"hours": updated_hours}
                             })
            #print(f"Updated document ID: {doc['_id']}")
        else:
            # Increment the counter if hours 
            # are empty or invalid
            not_updated_count += 1
            print(f"Skipping update for document ID \
                  {doc['_id']} due to empty or \
                    invalid hours.")

    except Exception as e:
        print(f"Error processing document ID\
               {doc['_id']}: {e}")
        # Increment count on exception
        not_updated_count += 1  

print(f"All documents processed. {not_updated_count}\
       documents were not updated.")


Skipping update for document ID                   67c751ac8c4ae1c3ac1bcf12 due to empty or                     invalid hours.
Skipping update for document ID                   67c751ac8c4ae1c3ac1bcf14 due to empty or                     invalid hours.
Skipping update for document ID                   67c751ac8c4ae1c3ac1bcf68 due to empty or                     invalid hours.
Skipping update for document ID                   67c751ac8c4ae1c3ac1bcf6a due to empty or                     invalid hours.
Skipping update for document ID                   67c751ac8c4ae1c3ac1bd054 due to empty or                     invalid hours.
Skipping update for document ID                   67c751ac8c4ae1c3ac1bd106 due to empty or                     invalid hours.
Skipping update for document ID                   67c751ac8c4ae1c3ac1bd231 due to empty or                     invalid hours.
Skipping update for document ID                   67c751ac8c4ae1c3ac1bd237 due to empty or                     invalid

Show a restaurant with the new **'hour'** format

In [4]:
example = meta.find_one({})

# Print the document if found
if example:
    from pprint import pprint  # Pretty print for better readability
    pprint(example)


{'MISC': {'Accessibility': ['Wheelchair accessible entrance'],
          'Amenities': ['Good for kids'],
          'Atmosphere': ['Casual'],
          'Crowd': ['Groups', 'Tourists'],
          'Offerings': ['Comfort food', 'Quick bite'],
          'Payments': ['NFC mobile payments'],
          'Popular for': ['Lunch', 'Solo dining'],
          'Service options': ['Outdoor seating', 'Takeout', 'Delivery']},
 '_id': ObjectId('67c751ac8c4ae1c3ac1bceda'),
 'address': 'Hale Pops, 55-370 Kamehameha Hwy, Laie, HI 96762',
 'avg_rating': 4.4,
 'category': ['Restaurant'],
 'description': None,
 'gmap_id': '0x7c00456eecad3111:0x8217f9600c51f33',
 'hours': [{'close_time': '20:00', 'day': 'Thursday', 'open_time': '11:00'},
           {'close_time': '20:00', 'day': 'Friday', 'open_time': '11:00'},
           {'close_time': '20:00', 'day': 'Saturday', 'open_time': '11:00'},
           {'close_time': '99:99', 'day': 'Sunday', 'open_time': '99:99'},
           {'close_time': '20:00', 'day': 'Monday', 

In order to ease the filtering of restaurants based on time (hour of the day) let's convert all the opening hours in seconds from midnight.

In [5]:
def time_to_seconds(time_str):
    # If it's already an integer, return as is
    if isinstance(time_str, int):  
        return time_str
    
    # Handle unexpected types
    if not isinstance(time_str, str):  
        return None  

    # Split the time string by ":"
    time_parts = time_str.split(":")
    
    # Check if the string is correctly 
    # formatted (should have exactly two parts)
    if len(time_parts) == 2:
        try:
            # Convert hours and minutes to integers
            hours = int(time_parts[0])
            minutes = int(time_parts[1])
            
            # Calculate total seconds from midnight
            return hours * 3600 + minutes * 60
        except ValueError:
            # Handle case where hours or 
            # minutes are not integers
            return None  
    else:
        # Handle invalid time format (not "HH:MM")
        return None  
    

print(time_to_seconds("23:30"))  # Should print 84600 
print(time_to_seconds("10:45"))  # Should print 38700 
print(time_to_seconds("14:47"))  # Should print 53220


84600
38700
53220


Loop for converting opening and closing time of all restaurants:

In [6]:
# Find all documents
for doc in meta.find({}):
    # Get the 'hours' field safely  
    hours_list = doc.get("hours")

    # Skip if 'hours' is missing or not a list
    if not isinstance(hours_list, list):  
        continue

    updated_hours = []

    for entry in hours_list:
        # Ensure each entry is a dictionary  
        if not isinstance(entry, dict):  
            continue

        updated_hours.append({
            "day": entry.get("day", ""),  
            "open_time": time_to_seconds(
                entry.get("open_time", "99:99")),  
            "close_time": time_to_seconds(
                entry.get("close_time", "99:99"))  
        })

    if updated_hours:
        meta.update_one({"_id": doc["_id"]}, 
                        {"$set": {"hours": updated_hours}})

print("All documents updated successfully!")


All documents updated successfully!


In [7]:
example = meta.find_one({})

# Print the document if found
if example:
    from pprint import pprint  # Pretty print for better readability
    pprint(example)


{'MISC': {'Accessibility': ['Wheelchair accessible entrance'],
          'Amenities': ['Good for kids'],
          'Atmosphere': ['Casual'],
          'Crowd': ['Groups', 'Tourists'],
          'Offerings': ['Comfort food', 'Quick bite'],
          'Payments': ['NFC mobile payments'],
          'Popular for': ['Lunch', 'Solo dining'],
          'Service options': ['Outdoor seating', 'Takeout', 'Delivery']},
 '_id': ObjectId('67c751ac8c4ae1c3ac1bceda'),
 'address': 'Hale Pops, 55-370 Kamehameha Hwy, Laie, HI 96762',
 'avg_rating': 4.4,
 'category': ['Restaurant'],
 'description': None,
 'gmap_id': '0x7c00456eecad3111:0x8217f9600c51f33',
 'hours': [{'close_time': 72000, 'day': 'Thursday', 'open_time': 39600},
           {'close_time': 72000, 'day': 'Friday', 'open_time': 39600},
           {'close_time': 72000, 'day': 'Saturday', 'open_time': 39600},
           {'close_time': 362340, 'day': 'Sunday', 'open_time': 362340},
           {'close_time': 72000, 'day': 'Monday', 'open_time': 396