In [10]:
import overpy
from geopy.geocoders import Nominatim
import pandas as pd
import numpy as np
import time

def geocode_address(address):
    geolocator = Nominatim(user_agent="your_app_name")
    location = geolocator.geocode(address)
    time.sleep(1)
    return (location.latitude, location.longitude)



def get_nearby_amenities(address):
    try:
        coordinates = geocode_address(address)

        overpass_query = f"""
            node(around:500, {coordinates[0]}, {coordinates[1]})
            ["amenity"];
            out;
        """

        api = overpy.Overpass()
        result = api.query(overpass_query)

        amenities = [node.tags.get('amenity', 'N/A') for node in result.nodes]
        time.sleep(1)
        return ", ".join(amenities)
    
    except Exception as e:
        return str(e)

def process_excel(input_file, output_file):
    df = pd.read_excel(input_file)

    df['Amenities'] = df['Address'].apply(get_nearby_amenities)

    df.to_csv(output_file, index=False)

# Example usage
input_excel_file = "final_input_addresses.xlsx"
output_csv_file = "final_output_amenities.csv"
process_excel(input_excel_file, output_csv_file)


In [11]:
import re
# Create data frame
csv = pd.read_csv("final_output_amenities.csv", encoding='utf-8')
amt = pd.DataFrame(csv)

# Extracting all categories from amenities 
amenities_mentioned = amt["Amenities"].str.split().explode().unique().tolist()
amenities_mentioned = [amenity.rstrip(",") for amenity in amenities_mentioned]

#remove irrelevant ones
remove = ["'HTTPSConnectionPool(host='nominatim.openstreetmap.org'", 'Max', 'retries', 'exceeded', 'with', 'url', 'Caused', 'by', 'Read', 'timed', 'out', 'read', "'NoneType'", 'object', 'has', 'no', 'attribute', "'latitude'", "read", "url", "Caused", "archive", "out", "HTTPSConnectionPool(host='nominatim.openstreetmap.org'"]
filtered_amenities= [amenity for amenity in amenities_mentioned if amenity not in remove]
filtered_amenities = [amenity for amenity in filtered_amenities if not any(error_word in amenity for error_word in ['Error', 'timeout', 'IncompleteRead', '/search?q='])]

# Remove non-alphabetic characters
filtered_amenities = [re.sub(r'[^a-zA-Z_]', '', amenity) for amenity in filtered_amenities]
filtered_amenities = [amenity for amenity in filtered_amenities if amenity]

#drop duplicates
amenities_mentioned = list(set(filtered_amenities))

# Print list of unique amenities
print(amenities_mentioned)

['read', 'bench', 'dental_hygienist', 'casino', 'playground', 'cannabis', 'university', 'restaurant', 'college', 'url', 'brothel', 'bbq', 'clinic', 'post_office', 'pedalo', 'Caused', 'library', 'cafe', 'ferry_terminal', 'meeting_centre', 'nightclub', 'loading_dock', 'parking_entrance', 'bus_station', 'accupuncture', 'music_studio', 'hospital', 'coworking_space', 'boat_rental', 'stripclub', 'bureau_de_change', 'bar', 'fast_food', 'healthcare', 'dentist', 'locker', 'karaoke_box', 'fuel', 'atm', 'compressed_air', 'waste_basket', 'waste_disposal', 'gym', 'balance_beam', 'warehouse', 'parking', 'social_facility', 'events_venue', 'cooking_school', 'pub', 'pharmacy', 'music_school', 'chiropractic', 'school', 'bank', 'charging_station', 'food_court', 'car_rental', 'taxi', 'luggage_locker', 'lockers', 'recycling', 'animal_shelter', 'fountain', 'drinking_water', 'place_of_worship', 'vending_machine', 'theatre', 'car_sharing', 'childcare', 'fire_station', 'station', 'motorcycle_parking', 'post_bo

In [12]:
excel = "3800_reviews_english.xlsx"

# Read Excel file into DataFrame
rating_amenities = pd.read_excel(excel)

# Drop specific columns from the original DataFrame
#keep place id, overall rating, individual rating
rating_amenities = rating_amenities.drop(["name", "review_text", "location_link", "reviews", "review_datetime_utc", "reviews_per_score_1", "reviews_per_score_2", "reviews_per_score_3", "reviews_per_score_4", "reviews_per_score_5", "query", "google_id", "reviews_link", "review_id", "review_pagination_id", "author_id", "review_text_english", "review_img_url", "review_img_url", "review_questions", "review_photo_ids", "owner_answer", "owner_answer_timestamp", "review_img_urls",  "owner_answer_timestamp_datetime_utc", "review_link", "review_timestamp", "review_likes", "reviews_id"], axis=1)

# Display new DataFrame
print(rating_amenities)

                         place_id  rating  review_rating
0     ChIJCwHQg38JxkcRJeVtM9mPCnM     3.2              1
1     ChIJCwHQg38JxkcRJeVtM9mPCnM     3.2              2
2     ChIJCwHQg38JxkcRJeVtM9mPCnM     3.2              5
3     ChIJCwHQg38JxkcRJeVtM9mPCnM     3.2              1
4     ChIJCwHQg38JxkcRJeVtM9mPCnM     3.2              5
...                           ...     ...            ...
3830  ChIJGU-a6R8XxkcR5cxWLxQYpgs     4.0              4
3831  ChIJGU-a6R8XxkcR5cxWLxQYpgs     4.0              5
3832  ChIJGU-a6R8XxkcR5cxWLxQYpgs     4.0              5
3833  ChIJGU-a6R8XxkcR5cxWLxQYpgs     4.0              4
3834  ChIJGU-a6R8XxkcR5cxWLxQYpgs     4.0              5

[3835 rows x 3 columns]


In [13]:
amenity_counts_dict = {amenity: [] for amenity in amenities_mentioned}

# Loop to count the amount of amenities per location
for amenities_str in amt["Amenities"]:
    for amenity in amenities_mentioned:
        count = amenities_str.count(amenity)
        
        amenity_counts_dict[amenity].append(count)

# Add the counts as new columns in amt
for amenity, counts in amenity_counts_dict.items():
    amt[f'{amenity}_counts'] = counts

amt["total_counts"] = amt[[f"{amenity}_counts" for amenity in amenities_mentioned]].sum(axis=1)

# Display
display(amt)



  amt[f'{amenity}_counts'] = counts
  amt[f'{amenity}_counts'] = counts
  amt[f'{amenity}_counts'] = counts
  amt[f'{amenity}_counts'] = counts
  amt[f'{amenity}_counts'] = counts
  amt[f'{amenity}_counts'] = counts
  amt[f'{amenity}_counts'] = counts
  amt[f'{amenity}_counts'] = counts
  amt[f'{amenity}_counts'] = counts
  amt[f'{amenity}_counts'] = counts
  amt[f'{amenity}_counts'] = counts


Unnamed: 0,Address,place_id,Amenities,read_counts,bench_counts,dental_hygienist_counts,casino_counts,playground_counts,cannabis_counts,university_counts,...,police_counts,bicycle_repair_station_counts,car_wash_counts,ice_cream_counts,port_counts,mailboxes_counts,parcel_locker_counts,ticket_validator_counts,nursing_home_counts,total_counts
0,"Julianaplein 1, Amsterdam",ChIJCwHQg38JxkcRJeVtM9mPCnM,"post_box, recycling, fast_food, restaurant, re...",0,34,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,110
1,"Joan Muyskenweg 22, 1096 CJ Amsterdam",ChIJxZHiajgLxkcRlOo3zmIibOs,"parking, post_box, parking, fuel, charging_sta...",0,4,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,20
2,"Nieuwe Leeuwarderweg, 1021 BZ Amsterdam",ChIJYyOpnFQIxkcRErWFhYVuAeQ,"fuel, post_box, fast_food, pharmacy, pub, pub,...",0,46,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,121
3,"Spoorslag 29, 1082 MM Amsterdam",ChIJ15K9igQKxkcR8A7le6H02hI,"cafe, parking, place_of_worship, cafe, parking...",0,35,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,149
4,"Julianaplein Oost, 1097 DN Amsterdam",ChIJPd2JhH8JxkcRQ8jSHUchci0,"post_box, recycling, fast_food, restaurant, re...",0,38,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,121
5,"Termini 23, Amsterdam",ChIJn0fdmPwJxkcRYe7a-6JHfpI,"townhall, healthcare, social_centre, dentist, ...",0,43,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,136
6,"Stationsplein 39, 1012 AB Amsterdam",ChIJf_z8xbcJxkcRCc-nQwIMAP8,"restaurant, pub, pub, pub, restaurant, bureau_...",0,34,0,2,0,2,0,...,0,1,0,3,0,0,0,0,0,463
7,"Cornelis Lelylaan 35, 1062 HD Amsterdam",ChIJ71V5XiPixUcRSKlH5OYy-iE,"recycling, recycling, recycling, post_box, res...",0,12,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,98
8,"Europaboulevard 4A, 1083 AD Amsterdam",ChIJLWMViCIKxkcRi1ThzDUCzCo,"theatre, conference_centre, parking, parking, ...",0,47,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,97
9,"Kattenburgerstraat 5, 1018 JA Amsterdam",ChIJ8exiz50JxkcRA0dHjOHV59g,"pub, post_box, restaurant, restaurant, restaur...",0,35,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,157


In [14]:
#add place ID
place_ID_df = pd.read_excel("final_input_addresses.xlsx")

# Merge DataFrames based on adress and safe place_id
merged_df = pd.merge(amt, place_ID_df, on = "Address", how = "left")

amt["place_id"] = place_ID_df.groupby("Address")["place_id"].transform("first")
display(amt)

Unnamed: 0,Address,place_id,Amenities,read_counts,bench_counts,dental_hygienist_counts,casino_counts,playground_counts,cannabis_counts,university_counts,...,police_counts,bicycle_repair_station_counts,car_wash_counts,ice_cream_counts,port_counts,mailboxes_counts,parcel_locker_counts,ticket_validator_counts,nursing_home_counts,total_counts
0,"Julianaplein 1, Amsterdam",ChIJCwHQg38JxkcRJeVtM9mPCnM,"post_box, recycling, fast_food, restaurant, re...",0,34,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,110
1,"Joan Muyskenweg 22, 1096 CJ Amsterdam",ChIJxZHiajgLxkcRlOo3zmIibOs,"parking, post_box, parking, fuel, charging_sta...",0,4,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,20
2,"Nieuwe Leeuwarderweg, 1021 BZ Amsterdam",ChIJYyOpnFQIxkcRErWFhYVuAeQ,"fuel, post_box, fast_food, pharmacy, pub, pub,...",0,46,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,121
3,"Spoorslag 29, 1082 MM Amsterdam",ChIJ15K9igQKxkcR8A7le6H02hI,"cafe, parking, place_of_worship, cafe, parking...",0,35,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,149
4,"Julianaplein Oost, 1097 DN Amsterdam",ChIJPd2JhH8JxkcRQ8jSHUchci0,"post_box, recycling, fast_food, restaurant, re...",0,38,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,121
5,"Termini 23, Amsterdam",ChIJn0fdmPwJxkcRYe7a-6JHfpI,"townhall, healthcare, social_centre, dentist, ...",0,43,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,136
6,"Stationsplein 39, 1012 AB Amsterdam",ChIJf_z8xbcJxkcRCc-nQwIMAP8,"restaurant, pub, pub, pub, restaurant, bureau_...",0,34,0,2,0,2,0,...,0,1,0,3,0,0,0,0,0,463
7,"Cornelis Lelylaan 35, 1062 HD Amsterdam",ChIJ71V5XiPixUcRSKlH5OYy-iE,"recycling, recycling, recycling, post_box, res...",0,12,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,98
8,"Europaboulevard 4A, 1083 AD Amsterdam",ChIJLWMViCIKxkcRi1ThzDUCzCo,"theatre, conference_centre, parking, parking, ...",0,47,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,97
9,"Kattenburgerstraat 5, 1018 JA Amsterdam",ChIJ8exiz50JxkcRA0dHjOHV59g,"pub, post_box, restaurant, restaurant, restaur...",0,35,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,157


In [15]:
#merge the two to have ratings and amenities in one DataFrame
rating_and_count = pd.merge(amt, rating_amenities, on = "place_id", how = "left")

#turn index column into 
rating_and_count.reset_index(inplace = True)

#Display merged DataFrame
display(rating_and_count)

#Save to csv
rating_and_count.to_csv("rating_and_count.csv", index = False)
for col in rating_and_count.columns:
    print(col)
    
all_columns = rating_and_count.columns.tolist()
print("List of all columns in the DataFrame:")
print(all_columns)


Unnamed: 0,index,Address,place_id,Amenities,read_counts,bench_counts,dental_hygienist_counts,casino_counts,playground_counts,cannabis_counts,...,car_wash_counts,ice_cream_counts,port_counts,mailboxes_counts,parcel_locker_counts,ticket_validator_counts,nursing_home_counts,total_counts,rating,review_rating
0,0,"Julianaplein 1, Amsterdam",ChIJCwHQg38JxkcRJeVtM9mPCnM,"post_box, recycling, fast_food, restaurant, re...",0,34,0,0,0,0,...,0,0,0,0,0,0,0,110,3.2,1.0
1,1,"Julianaplein 1, Amsterdam",ChIJCwHQg38JxkcRJeVtM9mPCnM,"post_box, recycling, fast_food, restaurant, re...",0,34,0,0,0,0,...,0,0,0,0,0,0,0,110,3.2,2.0
2,2,"Julianaplein 1, Amsterdam",ChIJCwHQg38JxkcRJeVtM9mPCnM,"post_box, recycling, fast_food, restaurant, re...",0,34,0,0,0,0,...,0,0,0,0,0,0,0,110,3.2,5.0
3,3,"Julianaplein 1, Amsterdam",ChIJCwHQg38JxkcRJeVtM9mPCnM,"post_box, recycling, fast_food, restaurant, re...",0,34,0,0,0,0,...,0,0,0,0,0,0,0,110,3.2,1.0
4,4,"Julianaplein 1, Amsterdam",ChIJCwHQg38JxkcRJeVtM9mPCnM,"post_box, recycling, fast_food, restaurant, re...",0,34,0,0,0,0,...,0,0,0,0,0,0,0,110,3.2,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3839,3839,"Busplein 16, 1315 KR Almere",ChIJGU-a6R8XxkcR5cxWLxQYpgs,"parking, post_box, fast_food, fast_food, fast_...",0,15,0,1,0,0,...,0,1,0,0,0,0,0,95,4.0,4.0
3840,3840,"Busplein 16, 1315 KR Almere",ChIJGU-a6R8XxkcR5cxWLxQYpgs,"parking, post_box, fast_food, fast_food, fast_...",0,15,0,1,0,0,...,0,1,0,0,0,0,0,95,4.0,5.0
3841,3841,"Busplein 16, 1315 KR Almere",ChIJGU-a6R8XxkcR5cxWLxQYpgs,"parking, post_box, fast_food, fast_food, fast_...",0,15,0,1,0,0,...,0,1,0,0,0,0,0,95,4.0,5.0
3842,3842,"Busplein 16, 1315 KR Almere",ChIJGU-a6R8XxkcR5cxWLxQYpgs,"parking, post_box, fast_food, fast_food, fast_...",0,15,0,1,0,0,...,0,1,0,0,0,0,0,95,4.0,4.0


index
Address
place_id
Amenities
read_counts
bench_counts
dental_hygienist_counts
casino_counts
playground_counts
cannabis_counts
university_counts
restaurant_counts
college_counts
url_counts
brothel_counts
bbq_counts
clinic_counts
post_office_counts
pedalo_counts
Caused_counts
library_counts
cafe_counts
ferry_terminal_counts
meeting_centre_counts
nightclub_counts
loading_dock_counts
parking_entrance_counts
bus_station_counts
accupuncture_counts
music_studio_counts
hospital_counts
coworking_space_counts
boat_rental_counts
stripclub_counts
bureau_de_change_counts
bar_counts
fast_food_counts
healthcare_counts
dentist_counts
locker_counts
karaoke_box_counts
fuel_counts
atm_counts
compressed_air_counts
waste_basket_counts
waste_disposal_counts
gym_counts
balance_beam_counts
warehouse_counts
parking_counts
social_facility_counts
events_venue_counts
cooking_school_counts
pub_counts
pharmacy_counts
music_school_counts
chiropractic_counts
school_counts
bank_counts
charging_station_counts
food_

In [16]:
# Clean up column names to try and avoid "rating" not unique error
rating_and_count.head()
rating_and_count.info()
print(rating_and_count.dtypes)

duplicate_columns = rating_and_count.columns[rating_and_count.columns.duplicated()]
print("Duplicate Columns:", duplicate_columns)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3844 entries, 0 to 3843
Columns: 117 entries, index to review_rating
dtypes: float64(2), int64(112), object(3)
memory usage: 3.4+ MB
index                        int64
Address                     object
place_id                    object
Amenities                   object
read_counts                  int64
                            ...   
ticket_validator_counts      int64
nursing_home_counts          int64
total_counts                 int64
rating                     float64
review_rating              float64
Length: 117, dtype: object
Duplicate Columns: Index([], dtype='object')


In [29]:
# Correlation between features and ratings

# List of features for correlation analysis
selected_features = all_columns
selected_features = [feature for feature in all_columns if feature != 'rating']

# Ensure unique elements in the list
selected_features = list(set(selected_features))

# Check if all selected features are present in the DataFrame
missing_features = [feature for feature in selected_features if feature not in rating_and_count.columns]

# If there are missing features, you can remove them from the selected_features list
if missing_features:
    print("The following features are missing in the DataFrame and will be removed:")
    print(missing_features)
    selected_features = [feature for feature in selected_features if feature in rating_and_count.columns]

# Calculate correlation matrix for selected features
correlation_matrix_selected = rating_and_count[selected_features + ['rating']].corr(numeric_only=True)

# Display the correlation matrix
print("Correlation Matrix:")
print(correlation_matrix_selected)

# Find features with the highest correlation with ratings

# Find features with the highest correlation with ratings
correlation_with_rating = correlation_matrix_selected[['rating']].abs()

# Sort the values in descending order
highest_correlation_selected = correlation_with_rating.sort_values(by='rating', ascending=False)

# Print the features with the highest correlation
print("\nFeatures with the highest correlation with ratings:")
print(highest_correlation_selected)

# Print the first 100 highest correlations with ratings
print("\nTop 100 Features with the Highest Correlation with Ratings:")
highest_correlation_selected_100 = highest_correlation_selected.head(100)
print(highest_correlation_selected_100)


selected_features_100 = highest_correlation_selected_100.index

# Convert the index to a list
selected_features100_list = list(selected_features_100)

# Print the list of features
print("Top 100 in list:")
print(selected_features100_list)

"""
Top 100 Features with the Highest Correlation with Ratings:
                            rating
rating                    1.000000
index                     0.436143
post_office,_counts       0.435711

Under "rating" is actually the correlation
"""

Correlation Matrix:
                         hospital_counts  gym_counts  mailboxes_counts  \
hospital_counts                 1.000000   -0.074255         -0.050805   
gym_counts                     -0.074255    1.000000         -0.013430   
mailboxes_counts               -0.050805   -0.013430          1.000000   
bench_counts                   -0.396517    0.151920          0.174882   
parking_entrance_counts        -0.511620    0.042552          0.069663   
...                                  ...         ...               ...   
arts_centre_counts             -0.156288   -0.041313         -0.028267   
childcare_counts               -0.098768   -0.026108          0.298573   
shelter_counts                 -0.217945    0.007677         -0.039418   
ferry_terminal_counts          -0.132465   -0.035016         -0.023958   
rating                          0.206059    0.054468          0.015425   

                         bench_counts  parking_entrance_counts  \
hospital_counts          

'\nTop 100 Features with the Highest Correlation with Ratings:\n                            rating\nrating                    1.000000\nindex                     0.436143\npost_office,_counts       0.435711\n\nUnder "rating" is actually the correlation\n'

In [31]:
"""
WORK IN PROGRESS: 

Make correlation score between all of them (all possible combinations). 
When highly correlated, drop one of the two. 
Take code from Vanessa?  

Matrix: all possible features x all possible features and correlation scores for all of them

->

import itertools
import pandas as pd

# Assuming 'Amenities' column contains strings with comma-separated amenity names
rating_and_count['Amenities'] = rating_and_count['Amenities'].astype(str)
rating_and_count['Amenities'] = rating_and_count['Amenities'].str.split(',')

# Create a list of all amenities
amenities = list(set(itertools.chain.from_iterable(rating_and_count['Amenities'])))

# Create all possible combinations of amenities
combinations = list(itertools.combinations(amenities, 2))

# Create an empty DataFrame to store correlation scores
correlation_scores = pd.DataFrame(index=amenities, columns=amenities)

# Calculate correlation scores for all combinations
for amenity1, amenity2 in combinations:
    correlation_score = rating_and_count[amenity1].corr(rating_and_count[amenity2])
    correlation_scores.loc[amenity1, amenity2] = correlation_score
    correlation_scores.loc[amenity2, amenity1] = correlation_score

# Identify highly correlated features
highly_correlated_features = set()
for amenity in amenities:
    correlated_with_amenity = correlation_scores[amenity][abs(correlation_scores[amenity]) > 0.8].index.tolist()
    highly_correlated_features.update(correlated_with_amenity)

# Display highly correlated features
print("\nHighly Correlated Features to be Dropped:")
print(highly_correlated_features)

# Drop the highly correlated features from the dataframe
rating_and_count_filtered = rating_and_count.drop(columns=highly_correlated_features)

# Display the filtered dataframe
print("\nFiltered DataFrame after dropping highly correlated features:")
print(rating_and_count_filtered)


ORIGINAL WORKING CODE BELLOW: 
"""

# Keep only the selected features in the DataFrame
df_regr = rating_and_count[selected_features100_list + ['review_rating']]

# # Calculate correlation matrix for all columns
correlation_matrix = df_regr.corr()

# Identify highly correlated features
highly_correlated_features = set()
for i in range(len(correlation_matrix.columns)):
    for j in range(i):
        if abs(correlation_matrix.iloc[i, j]) > 0.8:
            colname_i = correlation_matrix.columns[i]
            colname_j = correlation_matrix.columns[j]

            # Drop the first feature in the pair
            highly_correlated_features.add(colname_i)

# Display highly correlated features to be dropped
print("\nHighly Correlated Features to be Dropped:")
print(highly_correlated_features)

# Drop the highly correlated features from the dataframe
df_regr_filtered = df_regr.drop(columns=highly_correlated_features)


# Drop the highly correlated features from the dataframe
rating_and_count_filtered = rating_and_count.drop(columns=highly_correlated_features)

# Display the filtered dataframe
print("\nFiltered DataFrame after dropping highly correlated features:")
print(rating_and_count_filtered)

#Drops 71 columns, 62 amenities remaining



Highly Correlated Features to be Dropped:
{'school_counts', 'ice_cream_counts', 'parcel_locker_counts', 'cannabis_counts', 'music_studio_counts', 'bureau_de_change_counts', 'bicycle_parking_counts', 'theatre_counts', 'recycling_counts', 'veterinary_counts', 'parking_counts', 'meeting_centre_counts', 'townhall_counts', 'animal_shelter_counts', 'health_centre_counts', 'accupuncture_counts', 'clinic_counts', 'read_counts', 'lockers_counts', 'ticket_validator_counts', 'karaoke_box_counts', 'community_centre_counts', 'photo_booth_counts', 'taxi_counts', 'fire_station_counts', 'post_box_counts', 'playground_counts', 'Caused_counts', 'warehouse_counts', 'university_counts', 'vending_machine_counts', 'atm_counts', 'luggage_locker_counts', 'restaurant_counts', 'toilets_counts', 'out_counts', 'cafe_counts', 'locker_counts', 'port_counts', 'review_rating', 'bicycle_repair_station_counts', 'bank_counts', 'loading_dock_counts', 'charging_station_counts', 'total_counts', 'nightclub_counts', 'arts_c