In [2]:
import pandas as pd
import numpy as np
import geopandas as gpd
import folium

Webscraped Data of coffee shops in riyadh, which will need alot of cleaning

In [3]:
cafes = pd.read_csv('riyadh_cafes.csv')
cafes = cafes.drop('index', axis=1)
cafes = cafes.replace("Null", np.nan)
cafes = cafes.replace("null", np.nan)
cafes = cafes.rename(columns={
    'lan': 'latitude',
    'lon': 'longitude'})
print(cafes.shape)
cafes['latitude'] = pd.to_numeric(cafes['latitude'], errors='coerce')
cafes['longitude'] = pd.to_numeric(cafes['longitude'], errors='coerce')

cafes.head()

(2609, 7)


Unnamed: 0,coffeeName,rating,rating_count,url,24_hours,longitude,latitude
0,Cacti Cafe,4.2,2212,https://www.google.com/maps/place/Cacti+Cafe/d...,True,46.735613,24.833986
1,فازا قهوة مختصة,4.3,889,https://www.google.com/maps/place/%D9%81%D8%A7...,False,46.697337,24.772458
2,ناريز,3.8,39,https://www.google.com/maps/place/%D9%86%D8%A7...,False,46.852109,24.807277
3,Fc Lounge - اف سي لاونج,3.5,539,https://www.google.com/maps/place/Fc+Lounge+-+...,True,46.768059,24.813115
4,PEAKS,4.6,25,https://www.google.com/maps/place/PEAKS/data=!...,False,46.634809,24.742045


In [4]:
cleaned_words = []

functions

In [5]:
import re

#Basic String Cleaning:
def clean_str(text):
    search  = ["آ","إ","أ","ة","ى","  "]
    replace = ["ا","ا","ا","ه","ي"," "]
    
    text = text.replace('وو', 'و')
    text = text.replace('يي', 'ي')
    text = text.replace('اا', 'ا')

    #remove tashkeel
    p_tashkeel = re.compile(r'[\u0617-\u061A\u064B-\u0652]')
    text = re.sub(p_tashkeel,"", text)
    
    for i in range(0, len(search)):
        text = text.replace(search[i], replace[i])
        
    # Remove unwanted symbols, keeping / and % and.
    text = re.sub(r'[^\w\s/٪.]', '', text) 
    
    return text.strip() 

import unicodedata

def remove_accents(text):
    if isinstance(text, str):
        return ''.join(
            c for c in unicodedata.normalize('NFKD', text)
            if not unicodedata.combining(c)
        )
    return text


def normalize_arabic(text):
    if not isinstance(text, str):
        return text
    text = re.sub(r'[\u064B-\u0652\u0640\u200f\u200e\u00A0]', '', text)  # remove diacritics, tatweel, NBSP
    return text.strip()

def fix_words(contained_word, true_word):
    mask = cafes['coffeeName'].str.contains(contained_word, case=False, na=False)
    cafes.loc[mask, 'coffeeName'] = true_word

    cleaned_words.append(true_word)

Cleaning the coffeeName column

lowercase, strip, remove extra spaces

In [6]:
cafes['coffeeName']  = cafes['coffeeName'].str.lower().str.strip().str.replace(r'\s+', ' ', regex=True).apply(clean_str).apply(remove_accents).apply(normalize_arabic).str.strip('.').str.strip()
# Remove multiple spaces and strip
cafes['coffeeName'] = cafes['coffeeName'].str.replace(r'\s+', ' ', regex=True).str.strip()

In [7]:
cafes[cafes.coffeeName.str.contains("مزاج ")].coffeeName.unique().tolist()

['مزاج مغربي', 'استكانه المزاج فرع اليرموك', 'ديوانيه مزاج الراحه']

In [8]:
cafes[cafes.coffeeName.str.contains("توتي")].coffeeName.unique().tolist()

['كافيه توتي سفرجل']

In [9]:
pd.set_option('display.max_colwidth', None)
cafes[cafes['coffeeName']=='ديوانيه مزاج الراحه']


Unnamed: 0,coffeeName,rating,rating_count,url,24_hours,longitude,latitude
2361,ديوانيه مزاج الراحه,3.9,133,https://www.google.com/maps/place/%D8%AF%D9%8A%D9%88%D8%A7%D9%86%D9%8A%D8%A9+%D9%85%D8%B2%D8%A7%D8%AC+%D8%A7%D9%84%D8%B1%D8%A7%D8%AD%D8%A9%E2%80%AD/data=!4m7!3m6!1s0x3e2f1f61ca2d0791:0xd324cf45df912b38!8m2!3d24.6333332!4d46.5309647!16s%2Fg%2F11h3_c4bq0!19sChIJkQctymEfLz4ROCuR30XPJNM?authuser=0&hl=en&rclk=1,True,46.530965,24.633333


Some data cleaning for first few coffee places

In [10]:
## starbucks

arabic_starbucks_variants = [
    'ستار بكس',
    'ستار بوكس',
    'كافي استار بوكس',
    'استار بوكس',
    'استاربوكس'
]
mask_star = cafes['coffeeName'].str.contains('starbucks', case=False, na=False) & ~cafes['coffeeName'].str.contains('reserve', case=False, na=False)
cafes.loc[mask_star, 'coffeeName'] = 'starbucks'
cafes.loc[cafes['coffeeName'].isin(arabic_starbucks_variants), 'coffeeName'] = 'starbucks'

cleaned_words.append('starbucks')


# mccafe

mask_mccafe = (
    cafes['coffeeName'].str.contains(r'\bmccafe\b', case=False, na=False) |
    cafes['coffeeName'].str.contains('ماك كافيه', na=False)
)
cafes.loc[mask_mccafe, 'coffeeName'] = 'mccafe'

cleaned_words.append('mccafe')

# coffee address

fix_words(r'(address|عنوان)', 'coffee address')


## dunkin donuts

fix_words(r'dunkin|دانكن', 'dunkin donuts')

## barns

fix_words(r'\bbarns\b|بارنز', 'barns')



## dr.cafe  ( after next cell we remove coffee)

mask_drcafe = cafes['coffeeName'].str.contains(r'dr\.?\s*cafe|د\.?كيف', case=False, na=False)
cafes.loc[mask_drcafe, 'coffeeName'] = 'dr.cafe coffee'
cleaned_words.append('dr.cafe')


## java ( after next cell we remove cafe)
mask_java_cafe = (
    cafes['coffeeName'].str.contains(r'java\s*cafe|جافا\s*كافيه', case=False, na=False) &
    ~cafes['coffeeName'].str.contains(r'java\s*time|جافا\s*تايم', case=False, na=False)
)

cafes.loc[mask_java_cafe, 'coffeeName'] = 'java cafe'
cleaned_words.append('java')

## java time

fix_words(r'java\s*time|جافا\s*تايم', 'java time')


## dan (removed cafe after next cell is executed)

mask_dan = cafes['coffeeName'].str.contains(r'\bdan\s*cafe\b|دان\s*كافيه', case=False, na=False)
cafes.loc[mask_dan, 'coffeeName'] = 'dan cafe'
cleaned_words.append('dan')



## dana (removed cafe after next cell is executed)

mask_dana = cafes['coffeeName'].str.contains(r'\bdana\s+(cafe|coffee)\b|دانه\s*كافيه', case=False, na=False)
cafes.loc[mask_dana, 'coffeeName'] = 'dana cafe'

cleaned_words.append('dana')


## mammabunz (removed cafe after next cell is executed)

mask_mammabunz = cafes['coffeeName'].str.contains(r'mammabunz|ماما\s*بنز', case=False, na=False)
cafes.loc[mask_mammabunz, 'coffeeName'] = 'mammabunz cafe'
cleaned_words.append('mammabunz')


  mask = cafes['coffeeName'].str.contains(contained_word, case=False, na=False)
  mask_dana = cafes['coffeeName'].str.contains(r'\bdana\s+(cafe|coffee)\b|دانه\s*كافيه', case=False, na=False)


removing cafe or coffee only if its at the end as a standalone word

In [11]:
cafes['coffeeName'] = cafes['coffeeName'].str.replace(r'\b(coffee|cofee|cafe|كافيه|كيف|الكيف|الكافيه|قهوة|القهوة|القهوه|قهوه|كافي)$', '', regex=True).str.strip()
cafes['coffeeName'] = cafes['coffeeName'].str.replace(r'^كافيه\s*', '', regex=True)
cafes['coffeeName'] = cafes['coffeeName'].str.strip() ## do not run twice

In [12]:
# removing coffee places that are not actually coffee

cafes = cafes.drop(index=[982])

# removing places that dont exist / closed?
cafes = cafes.drop(index=[1818,1958,1348,2371,648,18,189,318])

# warehouse? then it's not a shop

cafes = cafes.drop(index=[1198])

In [13]:
# renaming unnamed cafes
# Example mapping from index to correct coffee shop name
manual_renames = {
    109: 'actual name here',           # e.g., 'sandwich lab'
    369: 'kif',
    1368: 'cofen',
    2597: 'وادي الفوارس',
    1092: 'artista',
    554: "&",
    1374: "&",
    1927: "coffee boutique"
    
}

# Apply the renaming
for idx, true_name in manual_renames.items():
    cafes.loc[idx, 'coffeeName'] = true_name

more data cleaning

In [14]:
## kyan

fix_words(r'^(kyan\s*كيان|كيان\s*kyan|kyanكيان|كيانkyan|kyan|كيان)(.*)?$', 'kyan')
## arabica 
cleaned_words.append('arabica')

## tutti

cleaned_words.append('tutti')

## mezaj

fix_words(r'مزاج\s+مغربي|mezaj', 'mezaj')


# taj

fix_words(r'\b(taj|تاج)\b', 'taj')

# veloce
fix_words(r'\b(veloce|فيلوتشي)\b', 'veloce')

# costa
fix_words(r'\b(costa|كوستا)\b', 'costa')


# half million
fix_words(r'\b(half\s+million|هاف\s+مليون)\b', 'half million')

# tim hortons

fix_words(r'\b(tim\s*hortons|تيم\s+هور(?:تنز|تونز)?)\b', 'tim hortons')

# sors

fix_words(r'\b(sors|سورس)\b', 'sors')

# arabica
fix_words(r'^(arabica|ارابيكا)$', 'arabica')

# arabica star
fix_words(r'^(arabica star|ارابيكا ستار|اربيكا استار)$', 'arabica star')

# key

fix_words(r'^(key|كي)$', 'key')

# draft
fix_words(r'\b(draft|درافت)\b', 'draft')

# moroccan taste 

fix_words(r'^(المذاق المغربي|moroccan taste)$', 'moroccan taste')

# dose

cleaned_words.append("dose")

# 4twins

mask_4twins = (
    cafes['coffeeName'].str.contains(r'4twins|فورتوينز', case=False, na=False)
    & ~cafes['coffeeName'].str.contains('sweet', case=False, na=False)
)
cafes.loc[mask_4twins, 'coffeeName'] = '4twins'

cleaned_words.append('4twins')

# kif
cleaned_words.append('kif')

# barkif
fix_words(r'باركيف\s*barkif', 'barkif')

# star sky
fix_words(r'^(ستار سكاي\s*starskycoffee|ستار سكاي\s*star sky)$', 'star sky')

# stark lounge

fix_words(r'^ستارك\s*لاونج$', 'stark lounge')

# tday
cleaned_words.append('tday')

# caffeine day

cleaned_words.append('caffeine day')

# coffee day
fix_words(r'^(coffee\s*day.*)$', 'coffee day')

# joffreys

fix_words(r'^(joffreys|جوفريز|joffreys\s*جوفريز)$', 'joffreys')

# &
cleaned_words.append("&")

# coffee boutique

cleaned_words.append("coffee boutique")

# pianolla
fix_words(r'(pianolla cafe|بيانولا)', 'pianolla')

# hi 
cleaned_words.append("hi")

# shine
fix_words(r'^(بريق القهوه shine|بريق)$', 'shine')

# molten chocolate
cleaned_words.append('molten chocolate')

# molten

fix_words(r'^molten مولتن$', 'molten')

# cofen

fix_words(r'^(كوفن\s*cofen|كوفن)$', 'cofen')


  mask = cafes['coffeeName'].str.contains(contained_word, case=False, na=False)
  mask = cafes['coffeeName'].str.contains(contained_word, case=False, na=False)
  mask = cafes['coffeeName'].str.contains(contained_word, case=False, na=False)
  mask = cafes['coffeeName'].str.contains(contained_word, case=False, na=False)
  mask = cafes['coffeeName'].str.contains(contained_word, case=False, na=False)
  mask = cafes['coffeeName'].str.contains(contained_word, case=False, na=False)
  mask = cafes['coffeeName'].str.contains(contained_word, case=False, na=False)
  mask = cafes['coffeeName'].str.contains(contained_word, case=False, na=False)
  mask = cafes['coffeeName'].str.contains(contained_word, case=False, na=False)
  mask = cafes['coffeeName'].str.contains(contained_word, case=False, na=False)
  mask = cafes['coffeeName'].str.contains(contained_word, case=False, na=False)
  mask = cafes['coffeeName'].str.contains(contained_word, case=False, na=False)
  mask = cafes['coffeeName'].str.contain

In [15]:

for i in cafes[cafes.coffeeName.str.contains("مولتن")].coffeeName.unique().tolist():
    print(i)

In [16]:
for i in cafes[cafes.coffeeName.str.contains("كوفن")].coffeeName.unique().tolist():
    print(i)


In [17]:
for i in cafes[cafes.coffeeName.str.contains("شوب")].coffeeName.unique().tolist():
    print(i)


كوفي شوب
كافي شوب ناريز


In [18]:
cafes[cafes['coffeeName']=='كوفي شوب']

Unnamed: 0,coffeeName,rating,rating_count,url,24_hours,longitude,latitude
322,كوفي شوب,3.5,15,https://www.google.com/maps/place/%D9%83%D9%88%D9%81%D9%8A+%D8%B4%D9%88%D8%A8%E2%80%AD/data=!4m7!3m6!1s0x3e2f03fd4076ae99:0x12b7ea65e4ff8403!8m2!3d24.7039656!4d46.745765!16s%2Fg%2F11c57d5c8p!19sChIJma52QP0DLz4RA4T_5GXqtxI?authuser=0&hl=en&rclk=1,False,46.745765,24.703966
1109,كوفي شوب,3.5,11,https://www.google.com/maps/place/%D9%83%D9%88%D9%81%D9%8A+%D8%B4%D9%88%D8%A8%E2%80%AD/data=!4m7!3m6!1s0x3e2f1b6f1d8210e1:0xb314f794d4e28051!8m2!3d24.595879!4d46.6027369!16s%2Fg%2F11h033h2s2!19sChIJ4RCCHW8bLz4RUYDi1JT3FLM?authuser=0&hl=en&rclk=1,False,46.602737,24.595879
1865,كوفي شوب,5.0,4,https://www.google.com/maps/place/%D9%83%D9%88%D9%81%D9%8A+%D8%B4%D9%88%D8%A8%E2%80%AD/data=!4m7!3m6!1s0x3e2f1a9b20814d93:0xae734367d87dba54!8m2!3d24.5874424!4d46.6426109!16s%2Fg%2F11gfnh2bxc!19sChIJk02BIJsaLz4RVLp92GdDc64?authuser=0&hl=en&rclk=1,False,46.642611,24.587442
1870,كوفي شوب,3.1,8,https://www.google.com/maps/place/%D9%83%D9%88%D9%81%D9%8A+%D8%B4%D9%88%D8%A8%E2%80%AD/data=!4m7!3m6!1s0x3e2f0b8d5c072af5:0x1395c8a50b1ebc2b!8m2!3d24.5367453!4d46.7721515!16s%2Fg%2F11f653z81p!19sChIJ9SoHXI0LLz4RK7weC6XIlRM?authuser=0&hl=en&rclk=1,True,46.772151,24.536745


In [19]:
cafes[~cafes['coffeeName'].isin(cleaned_words)]['coffeeName'].value_counts()

coffeeName
نيفانا                        4
ounce speciality              4
مون كافيه moon                4
كوفي شوب                      4
كيوان كافيه kiwan             3
                             ..
ريشيو ratio                   1
ديوانيه ومقهي الوتين          1
quill                         1
شاي السرور                    1
مقهي سوداني maqaa sudaniun    1
Name: count, Length: 1851, dtype: int64

In [20]:
cafes.coffeeName.value_counts()

coffeeName
dunkin donuts                 54
dr.cafe                       53
barns                         51
mccafe                        47
starbucks                     44
                              ..
ديوانيه ومقهي الوتين           1
quill                          1
شاي السرور                     1
45 degrees                     1
مقهي سوداني maqaa sudaniun     1
Name: count, Length: 1894, dtype: int64

Removing some rows that have duplicate lat and long(not all tho)

In [21]:
cafes = cafes.drop(index=[1047, 521, 453, 209,352, 1209, 1370, 2580,1813])

In [22]:
mask = cafes.duplicated(subset=['latitude', 'longitude'], keep=False)
duplicates = cafes[mask].sort_values(['longitude','latitude'])
duplicates

Unnamed: 0,coffeeName,rating,rating_count,url,24_hours,longitude,latitude
417,diwaniyat,5.0,1,https://www.google.com/maps/place/Diwaniyat+Cafe/data=!4m7!3m6!1s0x3e2f02d22ab8de1b:0xee848e054e6d560a!8m2!3d24.7135517!4d46.6752957!16s%2Fg%2F11c2l15lb0!19sChIJG964KtICLz4RClZtTgWOhO4?authuser=0&hl=en&rclk=1,False,46.675296,24.713552
598,coffee talk arabia,3.6,10,https://www.google.com/maps/place/Coffee+Talk+Arabia/data=!4m7!3m6!1s0x3e2f02d3dd4dd745:0xd94ea18ec5a4d47d!8m2!3d24.7135517!4d46.6752957!16s%2Fg%2F11f3_4p_g7!19sChIJRddN3dMCLz4RfdSkxY6hTtk?authuser=0&hl=en&rclk=1,False,46.675296,24.713552
231,alnakheel,5.0,1,https://www.google.com/maps/place/Alnakheel+Cafe/data=!4m7!3m6!1s0x3e2f059e26a1d6b1:0xf15a7ce062944751!8m2!3d24.6200862!4d46.7095332!16s%2Fg%2F11rklb3sd3!19sChIJsdahJp4FLz4RUUeUYuB8WvE?authuser=0&hl=en&rclk=1,False,46.709533,24.620086
647,la caverna,1.0,1,https://www.google.com/maps/place/La+Caverna+Cafe/data=!4m7!3m6!1s0x3e2f059e26a1d6b1:0x2b84ab02c4c36ca5!8m2!3d24.6200862!4d46.7095332!16s%2Fg%2F11rkl9z5tr!19sChIJsdahJp4FLz4RpWzDxAKrhCs?authuser=0&hl=en&rclk=1,False,46.709533,24.620086
1950,my way,3.6,7,https://www.google.com/maps/place/My+Way+Cafe/data=!4m7!3m6!1s0x3e2f059e26a1d6b1:0x68d467c6d2478326!8m2!3d24.6200862!4d46.7095332!16s%2Fg%2F11pkhr9r8l!19sChIJsdahJp4FLz4RJoNH0sZn1Gg?authuser=0&hl=en&rclk=1,False,46.709533,24.620086
363,استراحه الامير,4.6,8,https://www.google.com/maps/place/%D8%A7%D8%B3%D8%AA%D8%B1%D8%A7%D8%AD%D8%A9+%D8%A7%D9%84%D8%A7%D9%85%D9%8A%D8%B1%E2%80%AD/data=!4m7!3m6!1s0x3e2effb3ee940f6b:0x9d98c2d3272180d!8m2!3d24.8144107!4d46.7653716!16s%2Fg%2F11h3wybjw5!19sChIJaw-U7rP_Lj4RDRhyMi2M2Qk?authuser=0&hl=en&rclk=1,False,,
427,veloce,4.5,22,https://www.google.com/maps/place/Veloce+Cafe/data=!4m7!3m6!1s0x3e2efdf26aa8c68b:0x9b8711b1e87fa144!8m2!3d24.7622419!4d46.7227265!16s%2Fg%2F11t2f4_t24!19sChIJi8aoavL9Lj4RRKF_6LERh5s?authuser=0&hl=en&rclk=1,False,,
1452,الايمان,4.1,16,https://www.google.com/maps/place/%D8%A7%D9%84%D8%A7%D9%8A%D9%85%D8%A7%D9%86%E2%80%AD/data=!4m7!3m6!1s0x3e2f117dc77cdca5:0xe3a38d2147aa4b06!8m2!3d24.566332!4d46.6676809!16s%2Fg%2F11gtsw4d2j!19sChIJpdx8x30RLz4RBkuqRyGNo-M?authuser=0&hl=en&rclk=1,False,,


fixing some errors

In [23]:
cafes.loc[231 ,['longitude','latitude']] =  46.70953304818867,24.620092326545336
cafes.loc[647 ,['longitude','latitude']] =  46.70764972712889,24.62060317492925
cafes.loc[1950 ,['longitude','latitude']] = 46.70831194957225,24.620874906611203

missing coordinates cleaning

In [24]:
missing_coords = cafes[cafes['longitude'].isnull() | cafes['latitude'].isnull()]
missing_coords

Unnamed: 0,coffeeName,rating,rating_count,url,24_hours,longitude,latitude
363,استراحه الامير,4.6,8,https://www.google.com/maps/place/%D8%A7%D8%B3%D8%AA%D8%B1%D8%A7%D8%AD%D8%A9+%D8%A7%D9%84%D8%A7%D9%85%D9%8A%D8%B1%E2%80%AD/data=!4m7!3m6!1s0x3e2effb3ee940f6b:0x9d98c2d3272180d!8m2!3d24.8144107!4d46.7653716!16s%2Fg%2F11h3wybjw5!19sChIJaw-U7rP_Lj4RDRhyMi2M2Qk?authuser=0&hl=en&rclk=1,False,,
427,veloce,4.5,22,https://www.google.com/maps/place/Veloce+Cafe/data=!4m7!3m6!1s0x3e2efdf26aa8c68b:0x9b8711b1e87fa144!8m2!3d24.7622419!4d46.7227265!16s%2Fg%2F11t2f4_t24!19sChIJi8aoavL9Lj4RRKF_6LERh5s?authuser=0&hl=en&rclk=1,False,,
1452,الايمان,4.1,16,https://www.google.com/maps/place/%D8%A7%D9%84%D8%A7%D9%8A%D9%85%D8%A7%D9%86%E2%80%AD/data=!4m7!3m6!1s0x3e2f117dc77cdca5:0xe3a38d2147aa4b06!8m2!3d24.566332!4d46.6676809!16s%2Fg%2F11gtsw4d2j!19sChIJpdx8x30RLz4RBkuqRyGNo-M?authuser=0&hl=en&rclk=1,False,,


In [25]:
print(missing_coords[['coffeeName', 'url']])
pd.set_option('display.max_colwidth', None)


          coffeeName  \
363   استراحه الامير   
427           veloce   
1452         الايمان   

                                                                                                                                                                                                                                                                                            url  
363   https://www.google.com/maps/place/%D8%A7%D8%B3%D8%AA%D8%B1%D8%A7%D8%AD%D8%A9+%D8%A7%D9%84%D8%A7%D9%85%D9%8A%D8%B1%E2%80%AD/data=!4m7!3m6!1s0x3e2effb3ee940f6b:0x9d98c2d3272180d!8m2!3d24.8144107!4d46.7653716!16s%2Fg%2F11h3wybjw5!19sChIJaw-U7rP_Lj4RDRhyMi2M2Qk?authuser=0&hl=en&rclk=1  
427                                                                               https://www.google.com/maps/place/Veloce+Cafe/data=!4m7!3m6!1s0x3e2efdf26aa8c68b:0x9b8711b1e87fa144!8m2!3d24.7622419!4d46.7227265!16s%2Fg%2F11t2f4_t24!19sChIJi8aoavL9Lj4RRKF_6LERh5s?authuser=0&hl=en&rclk=1  
1452                             

Removing bad data --> .. is not a place and url doesnt work

In [26]:
cafes = cafes[cafes['coffeeName'] != '..']
cafes.head()

Unnamed: 0,coffeeName,rating,rating_count,url,24_hours,longitude,latitude
0,cacti,4.2,2212,https://www.google.com/maps/place/Cacti+Cafe/data=!4m7!3m6!1s0x3e2efde0d2059f1d:0xfca400b51ca140cc!8m2!3d24.8339855!4d46.7356133!16s%2Fg%2F11mwzvn0zx!19sChIJHZ8F0uD9Lj4RzEChHLUApPw?authuser=0&hl=en&rclk=1,True,46.735613,24.833986
1,فازا قهوه مختصه,4.3,889,https://www.google.com/maps/place/%D9%81%D8%A7%D8%B2%D8%A7+%D9%82%D9%87%D9%88%D8%A9+%D9%85%D8%AE%D8%AA%D8%B5%D8%A9%E2%80%AD/data=!4m7!3m6!1s0x3e2efdc6c840b3ed:0x792ad260e10b7e!8m2!3d24.7724577!4d46.6973369!16s%2Fg%2F11mwkdclc9!19sChIJ7bNAyMb9Lj4RfgvhYNIqeQA?authuser=0&hl=en&rclk=1,False,46.697337,24.772458
2,ناريز,3.8,39,https://www.google.com/maps/place/%D9%86%D8%A7%D8%B1%D9%8A%D8%B2%E2%80%AD/data=!4m7!3m6!1s0x3e2faacd291e6591:0x3e908558d2f0f63f!8m2!3d24.8072766!4d46.8521092!16s%2Fg%2F11c5_5k_zz!19sChIJkWUeKc2qLz4RP_bw0liFkD4?authuser=0&hl=en&rclk=1,False,46.852109,24.807277
3,fc lounge اف سي لاونج,3.5,539,https://www.google.com/maps/place/Fc+Lounge+-+%D8%A7%D9%81+%D8%B3%D9%8A+%D9%84%D8%A7%D9%88%D9%86%D8%AC%E2%80%AD/data=!4m7!3m6!1s0x3e2eff79d7dd770d:0xbd5029a7e12e0c2b!8m2!3d24.8131149!4d46.7680585!16s%2Fg%2F11lgkw1ffk!19sChIJDXfd13n_Lj4RKwwu4acpUL0?authuser=0&hl=en&rclk=1,True,46.768059,24.813115
4,peaks,4.6,25,https://www.google.com/maps/place/PEAKS/data=!4m7!3m6!1s0x3e2ee380585f0151:0xab784cd32a1e3d85!8m2!3d24.742045!4d46.6348092!16s%2Fg%2F11s0qh89n0!19sChIJUQFfWIDjLj4RhT0eKtNMeKs?authuser=0&hl=en&rclk=1,False,46.634809,24.742045


clicking url in google maps, filling in correct coordinates for null values

In [27]:
cafes.loc[363 ,['longitude','latitude']] =  46.76535014232869, 24.814398561155688
cafes.loc[427 ,['longitude','latitude']] =  46.7227265, 24.762256513537807 
cafes.loc[1452 ,['longitude','latitude']] = 46.66768090000001, 24.566332000000113

In [28]:
cafes

Unnamed: 0,coffeeName,rating,rating_count,url,24_hours,longitude,latitude
0,cacti,4.2,2212,https://www.google.com/maps/place/Cacti+Cafe/data=!4m7!3m6!1s0x3e2efde0d2059f1d:0xfca400b51ca140cc!8m2!3d24.8339855!4d46.7356133!16s%2Fg%2F11mwzvn0zx!19sChIJHZ8F0uD9Lj4RzEChHLUApPw?authuser=0&hl=en&rclk=1,True,46.735613,24.833986
1,فازا قهوه مختصه,4.3,889,https://www.google.com/maps/place/%D9%81%D8%A7%D8%B2%D8%A7+%D9%82%D9%87%D9%88%D8%A9+%D9%85%D8%AE%D8%AA%D8%B5%D8%A9%E2%80%AD/data=!4m7!3m6!1s0x3e2efdc6c840b3ed:0x792ad260e10b7e!8m2!3d24.7724577!4d46.6973369!16s%2Fg%2F11mwkdclc9!19sChIJ7bNAyMb9Lj4RfgvhYNIqeQA?authuser=0&hl=en&rclk=1,False,46.697337,24.772458
2,ناريز,3.8,39,https://www.google.com/maps/place/%D9%86%D8%A7%D8%B1%D9%8A%D8%B2%E2%80%AD/data=!4m7!3m6!1s0x3e2faacd291e6591:0x3e908558d2f0f63f!8m2!3d24.8072766!4d46.8521092!16s%2Fg%2F11c5_5k_zz!19sChIJkWUeKc2qLz4RP_bw0liFkD4?authuser=0&hl=en&rclk=1,False,46.852109,24.807277
3,fc lounge اف سي لاونج,3.5,539,https://www.google.com/maps/place/Fc+Lounge+-+%D8%A7%D9%81+%D8%B3%D9%8A+%D9%84%D8%A7%D9%88%D9%86%D8%AC%E2%80%AD/data=!4m7!3m6!1s0x3e2eff79d7dd770d:0xbd5029a7e12e0c2b!8m2!3d24.8131149!4d46.7680585!16s%2Fg%2F11lgkw1ffk!19sChIJDXfd13n_Lj4RKwwu4acpUL0?authuser=0&hl=en&rclk=1,True,46.768059,24.813115
4,peaks,4.6,25,https://www.google.com/maps/place/PEAKS/data=!4m7!3m6!1s0x3e2ee380585f0151:0xab784cd32a1e3d85!8m2!3d24.742045!4d46.6348092!16s%2Fg%2F11s0qh89n0!19sChIJUQFfWIDjLj4RhT0eKtNMeKs?authuser=0&hl=en&rclk=1,False,46.634809,24.742045
...,...,...,...,...,...,...,...
2604,double one,4.1,319,https://www.google.com/maps/place/Double+one/data=!4m7!3m6!1s0x3e2efdc09016a0f3:0x39d25db1f2f226de!8m2!3d24.8274338!4d46.7279354!16s%2Fg%2F11nmzvjbb1!19sChIJ86AWkMD9Lj4R3iby8rFd0jk?authuser=0&hl=en&rclk=1,False,46.727935,24.827434
2605,saws speciality coffee ساوس للقهوه المختصه,4.5,214,https://www.google.com/maps/place/Saws+speciality+coffee+%7C+%D8%B3%D8%A7%D9%88%D8%B3+%D9%84%D9%84%D9%82%D9%87%D9%88%D8%A9+%D8%A7%D9%84%D9%85%D8%AE%D8%AA%D8%B5%D8%A9%E2%80%AD/data=!4m7!3m6!1s0x3e2f1d615042e92b:0xdf1ff4bd6f4575fa!8m2!3d24.7223206!4d46.6524094!16s%2Fg%2F11rxpsmxg2!19sChIJK-lCUGEdLz4R-nVFb730H98?authuser=0&hl=en&rclk=1,False,46.652409,24.722321
2606,coffee address,3.9,121,https://www.google.com/maps/place/%D8%B9%D9%86%D9%88%D8%A7%D9%86+%D8%A7%D9%84%D9%82%D9%87%D9%88%D8%A9%E2%80%AD/data=!4m7!3m6!1s0x3e2f017495adf78d:0x6a9048b2a09ae824!8m2!3d24.7496334!4d46.7647296!16s%2Fg%2F11s_0g4ljg!19sChIJjfetlXQBLz4RJOiaoLJIkGo?authuser=0&hl=en&rclk=1,False,46.764730,24.749633
2607,مقهي سوداني maqaa sudaniun,4.8,19,https://www.google.com/maps/place/%D9%85%D9%82%D9%87%D9%89+%D8%B3%D9%88%D8%AF%D8%A7%D9%86%D9%8A+maqaa+sudaniun%E2%80%AD/data=!4m7!3m6!1s0x3e2ee3f121fabcd7:0x492e9f9519d6edc!8m2!3d24.7522761!4d46.6666399!16s%2Fg%2F11t5ljs9nj!19sChIJ17z6IfHjLj4R3G6dUfnpkgQ?authuser=0&hl=en&rclk=1,False,46.666640,24.752276


scoring formula

In [33]:
# Normalize rating and rating_count
cafes['rating_norm'] = (cafes['rating'] - cafes['rating'].min()) / (cafes['rating'].max() - cafes['rating'].min())
cafes['popularity_norm'] = (cafes['rating_count'] - cafes['rating_count'].min()) / (cafes['rating_count'].max() - cafes['rating_count'].min())

# Weight for rating 
w = 0.8

# Final score out of 5
cafes['score(out_of_5)'] = 5 * (w * cafes['rating_norm'] + (1 - w) * cafes['popularity_norm'])

In [34]:
cafes_gpd = gpd.GeoDataFrame(cafes, geometry = gpd.points_from_xy(
    cafes['longitude'],
    cafes['latitude']
))
cafes_gpd= cafes_gpd.set_crs(epsg=4326)

In [35]:
cafes_gpd["location"] = cafes_gpd.apply(
    lambda row: f"""<a href="{row['url']}" target="_blank">Open in Google Maps</a>""", axis=1
)




In [38]:
m = cafes_gpd.explore(
    column="score(out_of_5)",
    cmap="viridis",
    legend=True,
    tooltip=["coffeeName", "rating", "rating_count", "score(out_of_5)"],
)

    
m

In [39]:
import plotly.express as px

fig = px.scatter_mapbox(
    cafes_gpd,
    lat="latitude",
    lon="longitude",
    color="score(out_of_5)",  # Color markers by score
    color_continuous_scale="viridis",  # Match cmap
    hover_data=["coffeeName", "rating", "rating_count", "score(out_of_5)"],  # Tooltips
    zoom=11,
    height=600
)

fig.update_layout(
    mapbox_style="open-street-map",  # No token required
    margin={"r": 0, "t": 0, "l": 0, "b": 0},
    coloraxis_colorbar=dict(title="Coffee Score")
)

fig.show()