In [1]:
import queue, json, pandas as pd, itertools, numpy as np, os
from requests_toolbelt.threaded import pool
from datetime import datetime, timedelta
from csv import writer

In [2]:
def collect_live_busy_ness(target_place_ids_and_json_urls):
    """
    collect the busy-ness and save
    params:
        target_place_ids_and_json_urls : pairs of target place id and json url
    """
    json_results, failed_results = get_json_results(target_place_ids_and_json_urls)
    for place_id, json_result in json_results:
        #save json results
        save(place_id, json_result)

def get_json_results(place_ids_and_json_urls, trial=1):
    """
    collect the busy-ness json results
    the data collection happens in parallel
    you can control the number of pairs (place_ids_and_json_urls) up to your CPUs and Proxies
    params:
        place_ids_and_json_urls : pairs of target place id and json url
        trial : it is used for retrial and to stop the retrial
    """
    
    jobs = queue.Queue()
    
    for place_id, json_url in place_ids_and_json_urls:
        #here you can add a proxy argument if you have
        #jobs.put({'method': 'GET', 'url': json_url, "timeout":10, 'params':{'place_id':place_id}, 'proxies':{'http': 'proxy url', 'https': 'proxy url'}})
        jobs.put({'method': 'GET', 'url': json_url, "timeout":10, 'params':{'place_id':place_id}})
    
    p = pool.Pool(job_queue=jobs)
    p.join_all()
    responses = p.responses()
    json_results = []
    failed_results = []
    place_ids_and_json_urls_to_retry = []
    for response in responses:
        place_id = response.request_kwargs['params']['place_id']
        status_code = response.status_code if response else None
        if status_code == 200:
            json_results.append( (place_id, json.loads(response.content.decode('utf-8')[4:]) ) )
        else:
            place_ids_and_json_urls_to_retry.append( ( place_id, response.request_kwargs['url']) )

    if len(place_ids_and_json_urls) != (len(json_results)+len(place_ids_and_json_urls_to_retry)):
        place_ids_got_results = [r[0] for r in json_results] + [r[0] for r in place_ids_and_json_urls_to_retry]
        place_ids_and_json_urls_to_retry = place_ids_and_json_urls_to_retry+ [(place_id, json_url) for place_id, json_url in place_ids_and_json_urls if place_id not in place_ids_got_results]

    if len(place_ids_and_json_urls_to_retry) > 0:
        if  trial == 10:
            failed_results = place_ids_and_json_urls_to_retry
        else:
            new_json_results, failed_results = get_json_results(place_ids_and_json_urls_to_retry,trial+1)
            json_results = json_results + new_json_results
            
    return json_results, failed_results
        
    
def get_busy_ness_of_current_hour(timings):
    """
    return the busy_ness of the current hour from timings data
    params
        timings : 
    """
    flag = ""
    busy_ness_baseline = None
    business_hours = None
    if timings:
        busy_ness_baseline_data = timings[0]
        busy_ness_baseline_df = get_busy_ness_baseline(busy_ness_baseline_data)
        busy_ness_baseline = list(itertools.chain.from_iterable(busy_ness_baseline_df.T.to_numpy()))
        business_hours = list([ 0 if v is None else 1 for v in busy_ness_baseline])
        current_day = timings[1]
        if current_day == 0:
            current_day = 7
        current_hour = timings[4]
        original_busy_ness = busy_ness_baseline_df[current_day][current_hour]
        if original_busy_ness:
            if len(timings) == 8:
                current_busy_ness = timings[7][1]
                flag = 'is open now'
            else:
                current_busy_ness = -1
                flag = 'no live busy-ness information'
        else:
            current_busy_ness = -2
            flag = 'is closed now'
    else:
        current_hour = -3
        original_busy_ness = -3
        current_busy_ness = -3
        flag = "no timing information"

    return current_hour, original_busy_ness, current_busy_ness, flag, busy_ness_baseline, business_hours


def get_busy_ness_baseline(busy_ness_baseline_data):
    """
    return the busy_ness_baseline
    params:
        busy_ness_baseline_data : busy_ness baseline data
    """
    busy_ness_baseline_df = pd.DataFrame(index=range(0,24), columns=range(1,8))
    for busy_ness_of_day in busy_ness_baseline_data:
        busy_ness_list = [None]*24
        if busy_ness_of_day[1]:
            for busy_ness_of_hour in busy_ness_of_day[1]:
                if busy_ness_of_hour[1] != 0 and busy_ness_of_hour[2] != '':
                    busy_ness_list[busy_ness_of_hour[0]] = busy_ness_of_hour[1]
            busy_ness_baseline_df[busy_ness_of_day[0]] = busy_ness_list
    return busy_ness_baseline_df.replace({pd.np.nan: None})


def get_timing(json_result):
    """
    return the specific part having the busy-ness information from the json_result
    params:
        json_result : the result from the request to Google Place json url
    """
    try:
        return json_result[6][84]
    except:
        return None

    
def append_into_file(collected_place_busy_ness_row):
    """
    append the busy_ness row into the file
    this is like a log writing
    params:
        collected_place_busy_ness_row : [
                            place_id,
                            current_hour,
                            original_busy_ness, #normal busy_ness
                            current_busy_ness, #live busy_ness
                            flag,
                            collected_datatime]
    """
    with open(FILE_NAME, 'a+',encoding="utf-8", newline='') as write_obj:
        csv_writer = writer(write_obj)
        csv_writer.writerow(collected_place_busy_ness_row)

        
def save(place_id, json_result):
    """
    save the busy_ness and corresponding information
    params:
        place_id : the place id, this is used to join or connect with the place information (like place name)
        json_result : the result from the request to Google Place json url
    """
    timing = get_timing(json_result)
    if timing is not None:
        current_hour, original_busy_ness, current_busy_ness, flag, busy_ness_baseline, _ = get_busy_ness_of_current_hour(timing)

        collected_place_busy_ness_row = [
                            place_id,
                            current_hour,
                            original_busy_ness,
                            current_busy_ness,
                            flag,
                            datetime.now().strftime('%Y-%m-%d %H:%M:%S')]

        append_into_file(collected_place_busy_ness_row)

        return True
    else:
        return False

In [3]:
#your file name to append the collected data
FILE_NAME = 'test'

#two places near your university
target_place_ids_and_json_urls = [
                                    ['ftNnZL-SGZ7Zxc8PmOab-Ag', #Trattoria Naccarato
                                   'https://www.google.com/maps/preview/place?authuser=0&hl=en&gl=qa&pb=!1m19!1s0x4795b70248d531db%3A0xfaa8b9f5b2dc4900!2s!3m9!1m3!1d4636.958475911476!2d7.037003617703534!3d49.272517219078736!2m0!3m2!1i1728!2i684!4f13.1!4m2!3d49.27536836362703!4d7.034693956375111!5e4!15m2!1m1!4s%2Fg%2F11b76gw4b0!13m50!2m2!1i408!2i240!3m2!2i10!5b1!7m42!1m3!1e1!2b0!3e3!1m3!1e2!2b1!3e2!1m3!1e2!2b0!3e3!1m3!1e8!2b0!3e3!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e9!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e10!2b0!3e4!2b1!4b1!9b0!14m4!1sSdNnZPX4ComIkdUPnJ69yAk!3b1!7e81!15i10555!15m50!1m10!4e2!18m7!3b0!6b0!14b1!17b1!20b1!27m1!1b0!20e2!2b1!4b1!5m6!2b1!3b1!5b1!6b1!7b1!10b1!10m1!8e3!11m1!3e1!17b1!20m2!1e3!1e6!24b1!25b1!26b1!29b1!30m1!2b1!36b1!43b1!52b1!55b1!56m2!1b1!3b1!65m5!3m4!1m3!1m2!1i224!2i298!107m2!1m1!1e1!22m1!1e81!29m0!30m3!3b1!6m1!2b1!32b1!37i646&q=*&pf=t'],
                                    ['DdVnZNjXK_mVxc8Pof6uCA', #Volkans Bistro
                                     'https://www.google.com/maps/preview/place?authuser=0&hl=en&gl=qa&pb=!1m14!1s0x410b7dd3b7d549e5%3A0x48ef8eb98436f6ba!3m9!1m3!1d4636.687379319532!2d7.037797541308595!3d49.2754012!2m0!3m2!1i1248!2i684!4f13.1!4m2!3d49.2787587524644!4d7.038470506668086!12m4!2m3!1i360!2i120!4i8!13m57!2m2!1i203!2i100!3m2!2i4!5b1!6m6!1m2!1i86!2i86!1m2!1i408!2i240!7m42!1m3!1e1!2b0!3e3!1m3!1e2!2b1!3e2!1m3!1e2!2b0!3e3!1m3!1e8!2b0!3e3!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e9!2b1!3e2!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e10!2b0!3e4!2b1!4b1!9b0!14m5!1sCdVnZMuqJZu_xc8PtcySyAI!4m1!2i5210!7e81!12e3!15m82!1m29!4e2!13m9!2b1!3b1!4b1!6i1!8b1!9b1!14b1!20b1!25b1!18m17!3b1!4b1!5b1!6b1!9b1!12b1!13b1!14b1!15b1!17b1!20b1!21b1!22b0!25b0!27m1!1b0!28b0!2b1!5m6!2b1!3b1!5b1!6b1!7b1!10b1!10m1!8e3!11m1!3e1!14m1!3b1!17b1!20m2!1e3!1e6!24b1!25b1!26b1!29b1!30m1!2b1!36b1!39m3!2m2!2i1!3i1!43b1!52b1!54m1!1b1!55b1!56m2!1b1!3b1!65m5!3m4!1m3!1m2!1i224!2i298!71b1!72m4!1m2!3b1!5b1!4b1!89b1!103b1!113b1!21m28!1m6!1m2!1i0!2i0!2m2!1i530!2i684!1m6!1m2!1i1198!2i0!2m2!1i1248!2i684!1m6!1m2!1i0!2i0!2m2!1i1248!2i20!1m6!1m2!1i0!2i664!2m2!1i1248!2i684!22m2!1e81!8e1!29m0!30m3!3b1!6m1!2b1!34m2!7b1!10b1!37i646!39sVolkans+Bistro&q=Volkans%20Bistro']
                                    ]


In [4]:
collect_live_busy_ness(target_place_ids_and_json_urls)

  return busy_ness_baseline_df.replace({pd.np.nan: None})
  return busy_ness_baseline_df.replace({pd.np.nan: None})


In [5]:
df = pd.read_csv(FILE_NAME,header=None)
df.columns = [
                            'place_id',
                            'current_hour',
                            'original_busy_ness', #normal busy_ness
                            'current_busy_ness', #live busy_ness
                            'flag',
                            'collected_datatime']
df

Unnamed: 0,place_id,current_hour,original_busy_ness,current_busy_ness,flag,collected_datatime
0,ftNnZL-SGZ7Zxc8PmOab-Ag,22,,-2,is closed now,2023-05-19 23:19:07
1,DdVnZNjXK_mVxc8Pof6uCA,22,25.0,31,is open now,2023-05-19 23:19:07
