# Descriptive Analysis for ATMs in Saudi Arabia
### Asrar AlJuhani, Nouf AlZahrani, Zahra'a Hamwi

## 1- Import libraries and dataset:

In [1]:
import pandas as pd
import numpy as np
from fuzzywuzzy import fuzz,process
import gmaps
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')
%matplotlib inline



In [2]:
#Google Maps API Key
gmaps.configure(api_key="ENTER YOUR API KEY HERE")

In [3]:
#Import ATM dataset
atms = pd.read_excel('dataset.xlsx',sep='\t')

## 2- Data Exploration:

In [4]:
atms.head()

Unnamed: 0,Site,الموقع,City English,City Arabic,Reg,Brn,Site Type,X GIS Coordinates,Y GIS Coordinates
0,CA-AKIK BRANCH BAHA 2,فرع العقيق الباحة 2,AL-AQIQ- BAHA,العقيق - الباحة,AL-BAHA,Brn,Room-Window,41.6538,20.2704
1,CA-AKIK BRANCH BAHA 1,فرع العقيق الباحة 1,AL-AQIQ- BAHA,العقيق - الباحة,AL-BAHA,Brn,Room-Window,41.6538,20.2704
2,AKIK BRANCH BAHA 3,فرع العقيق الباحة 3,AL-AQIQ- BAHA,العقيق - الباحة,AL-BAHA,Brn,Room-Window,41.6538,20.2704
3,AL-WAFA GAS STATION,محطة الوفاء - العقيق,AL-AQIQ- BAHA,العقيق - الباحة,AL-BAHA,Of Site,drive up - Island,41.6327,20.2356
4,ALTAYAR GAS STATION KARA,محطة الطيار - مركز كرا,AL-AQIQ- BAHA,العقيق - الباحة,AL-BAHA,Of Site,drive up - Island,41.5977,20.3888


In [5]:
atms.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15779 entries, 0 to 15778
Data columns (total 9 columns):
Site                 11172 non-null object
الموقع               13419 non-null object
City English         11178 non-null object
City Arabic          7320 non-null object
 Reg                 8768 non-null object
Brn                  4898 non-null object
Site Type            8768 non-null object
X GIS Coordinates    15778 non-null object
Y GIS Coordinates    15778 non-null object
dtypes: object(9)
memory usage: 1.1+ MB


In [6]:
atms.isnull().sum()

Site                  4607
الموقع                2360
City English          4601
City Arabic           8459
 Reg                  7011
Brn                  10881
Site Type             7011
X GIS Coordinates        1
Y GIS Coordinates        1
dtype: int64

In [7]:
atms.describe()

Unnamed: 0,Site,الموقع,City English,City Arabic,Reg,Brn,Site Type,X GIS Coordinates,Y GIS Coordinates
count,11172,13419,11178,7320,8768,4898,8768,15778.0,15778.0
unique,10619,11903,625,325,19,3,26,12953.0,12948.0
top,VEGETABLE MARKET,King Abdul Aziz Road,Riyadh,الرياض,Eastern,Of Site,Room-Window,50.170639,26.305472
freq,7,44,1427,1888,1441,3128,2226,15.0,15.0


- Rename columns for easier access: 

In [8]:
atms.rename(columns=lambda x:x.replace(' ','_').lower(), inplace=True)

In [9]:
atms.rename(columns={'x_gis_coordinates':'lon','y_gis_coordinates':'lat','الموقع':'site_ar','_reg':'reg'},inplace=True);

In [10]:
atms['city_english'].unique()

array(['AL-AQIQ- BAHA', 'AL-BAHA', 'AL-MANDIQ', 'BEADAH', 'BILGORSHI',
       'JARAB AQIQ', 'QULWA', 'AL-BOSAYTA', 'AL-EISAWAYAH', 'AL-HADIASAH',
       'AL-JOUF', 'ALNABK ABU QASR', 'AL-QORYAT', 'DAWMAT AL-JANDL',
       'MAYQOOH', 'TABARJAL', 'ABHA', 'AFRAA-SABIT AL-ALYAH', 'AL-Amwah',
       'ALAREEN', 'AL-BASHAYIR', 'AL-BERK', 'AL-FARASHA', 'AL-HAFAYER',
       'AL-HAFEERAH', 'AL-HARJA', 'AL-HEMA', 'AL-HOREADAH',
       'AL-JEABAH- Bisha', 'AL-MAGARDAH', 'AL-MAUTHA', 'AL-NAMAS',
       'AL-NAQEA', 'AL-QAHMA', 'AL-SOBEAKHAH', 'AL-WADEIAN', 'BALAHMAR',
       'BALASMER', 'BANI AMRO', 'BARIQ', 'BISHA', 'JASH-TATHLETH',
       'K. MUSHEET', 'K.AL-BAHR', 'KHAYBR AL-GANOUB', 'MAHIL', 'MURBA',
       'RIJAL-ALMA', 'SABIT AL-ALYAH', 'SAMKH', 'SARAT OBEADAH',
       'TANDAHA', 'TANOMA', 'TATHLETH', 'THALOTH AL-MANDHER',
       'THARAN AL-GANOUB', 'TUREEB', 'UHID ROFYADA', 'WADY HASBAL',
       'YAARA', 'ABQIQ', 'AIN DAR', 'AL-BATHA Burder  ', 'AL-JUBEAL',
       'AL-JUBEAL Indust.', 'AL-KHA

## 3- Data Cleaning:

- Drop duplicated records

In [11]:
#Drop duplicated records that are  identical in all colounm values
atms.drop_duplicates(keep='first',inplace=True)
atms.shape

(15440, 9)

In [12]:
atms.duplicated(subset=['lon','lat']).sum()

2372

In [13]:
#Drop duplicated records that have the same X and Y Coordinates
atms.drop_duplicates(subset=['lon','lat'],keep='first',inplace=True);

In [14]:
atms.shape

(13068, 9)

- Cleaning Cities and Region Columns:
   - a.  Upper case all cities
   - b.  Remove white spaces
   - c.  Uniform naming style: convert all (AL-) to (AL)
   - d.  Check similarity using FuzzyWuzzy library
   - e.  Manual replacement for incorrect spellings 

In [15]:
# For city_english column:

In [16]:
# Upper case all cities
atms['city_english'] = atms['city_english'].str.upper()

In [17]:
# Checking the change in number of cities in city_english column
atms['city_english'].nunique()

576

In [18]:
# Remove white spaces
atms['city_english']=atms['city_english'].str.strip();
atms['city_english'].nunique()

566

In [19]:
# Uniform naming style: convert all (AL-) to (AL)
atms.replace(to_replace ='AL-', value = 'AL', regex = True,inplace=True) 
atms.replace(to_replace ='AL ', value = 'AL', regex = True,inplace=True)
atms['city_english'].nunique()

538

In [20]:
def similar(column,score,limit=0):
    
    """ Print cities with similar spellings by specifying the ratio of similarity between each two cities 
        using FuzzyWuzzy library by looping through unique cities list 
        to ease handling different spelling of cities names  
    """
    
    unique_list=atms[column].unique()
    unique_list2=np.asarray(unique_list)
    if limit==0:
        for i in range(len(unique_list)):
            for j in range(len(unique_list2)-1,-1,-1):
                if(isinstance(unique_list[i],str) and isinstance(unique_list2[j],str) and i!=j):
                    if (fuzz.ratio(unique_list[i],unique_list2[j]) > score ):
                        print(unique_list[i],"is similar to",unique_list2[j]);
    else:
        for i in range(len(unique_list)):
            for j in range(len(unique_list2)-1,-1,-1):
                if(isinstance(unique_list[i],str) and isinstance(unique_list2[j],str) and i!=j):
                    if (fuzz.ratio(unique_list[i],unique_list2[j]) >= score and fuzz.ratio(unique_list[i],unique_list2[j]) < limit ):
                        print(unique_list[i],"is similar to",unique_list2[j])
                        

In [21]:
# List of cities with similar spellings, similarity ratio= 95
similar('city_english',95)

HAFR ALBATIN is similar to HAFAR ALBATIN
HAFR ALBATIN is similar to HAFER ALBATIN
RAS TANOURAH is similar to RAS TANNOURAH
RIYAD ALKHABRA is similar to RIYADH ALKHABRA
OYOUN ALJWA is similar to OYOUN ALJIWA
HAFER ALBATIN is similar to HAFR ALBATIN
RIYADH ALKABRA is similar to RIYADH ALKHABRA
HAFR ALBATEN is similar to HAFER ALBATEN
HAFER ALBATEN is similar to HAFR ALBATEN
ALQUWAYIYAH is similar to ALQUWAY'IYAH
RAS TANNURAH is similar to RAS TANNOURAH
HAFAR ALBATIN is similar to HAFR ALBATIN
ALQUWAY'IYAH is similar to ALQUWAYIYAH
OYOUN ALJIWA is similar to OYOUN ALJWA
RIYADH ALKHABRA is similar to RIYADH ALKABRA
RIYADH ALKHABRA is similar to RIYAD ALKHABRA
RAS TANNOURAH is similar to RAS TANNURAH
RAS TANNOURAH is similar to RAS TANOURAH


In [22]:
# Choose one spelling for each city by replacing bad spelling with the chosen one
atms.replace(to_replace =['HAFAR ALBATIN','RAS TANOURAH','WADI AL DAWASIR','OYOUN AL JWA','RIYADH ALKABRA '], 
             value =['HAFR ALBATIN','RAS TANNOURAH','WADI ALDAWASIR','OYOUN AL JIWA','RIYADH ALKHABRA'], 
             regex = False,inplace=True) 

In [23]:
similar('city_english',95)

HAFR ALBATIN is similar to HAFER ALBATIN
RAS TANNOURAH is similar to RAS TANNURAH
RIYAD ALKHABRA is similar to RIYADH ALKHABRA
OYOUN ALJWA is similar to OYOUN ALJIWA
HAFER ALBATIN is similar to HAFR ALBATIN
RIYADH ALKABRA is similar to RIYADH ALKHABRA
HAFR ALBATEN is similar to HAFER ALBATEN
HAFER ALBATEN is similar to HAFR ALBATEN
ALQUWAYIYAH is similar to ALQUWAY'IYAH
RAS TANNURAH is similar to RAS TANNOURAH
ALQUWAY'IYAH is similar to ALQUWAYIYAH
OYOUN ALJIWA is similar to OYOUN ALJWA
RIYADH ALKHABRA is similar to RIYADH ALKABRA
RIYADH ALKHABRA is similar to RIYAD ALKHABRA


In [24]:
atms.replace(to_replace =['RAS TANNURAH','RIYAD ALKHABRA','RIYADH ALKABRA','RIYADH AL KHABRA'], 
             value =['RAS TANNOURAH','RIYADH ALKHABRA','RIYADH ALKHABRA','RIYADH ALKHABRA'], 
             regex = False,inplace=True) 

In [25]:
similar('city_english',94)

RIJALALMA is similar to RIJALALMAE
HAFR ALBATIN is similar to HAFER ALBATIN
ALQUNFUDHA is similar to ALQUNFUDHAH
HALAT AMAR is similar to HALAT AMMAR
OYOUN ALJWA is similar to OYOUN ALJIWA
HAFER ALBATIN is similar to HAFR ALBATIN
HAFR ALBATEN is similar to HAFER ALBATEN
HAFER ALBATEN is similar to HAFR ALBATEN
ALQUWAYIYAH is similar to ALQUWAY'IYAH
ALMAJMAAH is similar to ALMAJMA'AH
ALQUWAY'IYAH is similar to ALQUWAYIYAH
ALMAJMA'AH is similar to ALMAJMAAH
HALAT AMMAR is similar to HALAT AMAR
ALARIDHAH is similar to ALHARIDHAH
ALQUNFUDHAH is similar to ALQUNFUDHA
RIJALALMAE is similar to RIJALALMA
ALHARIDHAH is similar to ALARIDHAH
OYOUN ALJIWA is similar to OYOUN ALJWA


In [26]:
atms.replace(to_replace =['RIJALALMA','RIJALALMAE','HAFER ALBATIN','ALQUNFUDHA','HALAT AMAR',"ALQUWAY'IYAH","ALMAJMA'AH",'HAFER ALBATEN'], 
             value =['RIJAL ALMA','RIJAL ALMA','HAFR ALBATIN','ALQUNFUDHAH','HALAT AMMAR','ALQUWAYIYAH','ALMAJMAAH','HAFR ALBATEN'], 
             regex = False,inplace=True) 

In [27]:
similar('city_english',93)

ALMAJMAH is similar to ALMAJMAAH
OYOUN ALJWA is similar to OYOUN ALJIWA
AHAD ALMUSARIHAH is similar to AHAD ALMASARIHAH
ALARIDAH is similar to ALARIDHAH
ALMAJMAAH is similar to ALMAJMAH
ALDHHRAN is similar to ALDHAHRAN
ALDHAHRAN is similar to ALDHHRAN
AHAD ALMASARIHAH is similar to AHAD ALMUSARIHAH
ALARIDHAH is similar to ALHARIDHAH
ALARIDHAH is similar to ALARIDAH
ALHARIDHAH is similar to ALARIDHAH
OYOUN ALJIWA is similar to OYOUN ALJWA


In [28]:
atms.replace(to_replace =['ALMAJMAH','BURAIDAH ','AHAD ALMUSARIHAH','ALDHHRAN','ALBADAYA ','ALARIDAH'], 
             value = ['ALMAJMAAH','BURAIDAH','AHAD ALMASARIHAH','ALDHAHRAN','ALBADAYA','ALARIDHAH'], 
             regex = False,inplace=True) 

In [29]:
similar('city_english',92)

ALMANDIQ is similar to ALMANDQ
ALKHARJ is similar to ALKHARAJ
WADI ALDAWASIR is similar to WADI ADDAWASIR
KHAMIS MUSHAIT is similar to KHAMIS MUSHIET
KHAMIS MUSHAIT is similar to KHAMIS MUSHAYT
MUBARAZ is similar to MUBARRAZ
OYOUN ALJWA is similar to OYOUN ALJIWA
DOMAT ALJANDAL is similar to DUMAT ALJANDAL
ALMANDAQ is similar to ALMANDQ
KHAMIS MUSHAYT is similar to KHAMIS MUSHAIT
ALARIDHAH is similar to ALHARIDHAH
WADI ADDAWASIR is similar to WADI ALDAWASIR
DUMAT ALJANDAL is similar to DOMAT ALJANDAL
ALMANDQ is similar to ALMANDAQ
ALMANDQ is similar to ALMANDIQ
ALHARIDHAH is similar to ALARIDHAH
OYOUN ALJIWA is similar to OYOUN ALJWA
ALKHARAJ is similar to ALKHARJ
KHAMIS MUSHIET is similar to KHAMIS MUSHAIT
MUBARRAZ is similar to MUBARAZ


In [30]:
atms.replace(to_replace =['ALMANDQ','ALKHARAJ','WADI ADDAWASIR','KHAMIS MUSHIET','KHAMIS MUSHAYT','MADINAH ','MUBARAZ','DOMAT ALJANDAL'], 
             value = ['ALMANDIQ','ALKHARJ','WADI ALDAWASIR','KHAMIS MUSHAIT','KHAMIS MUSHAIT','MADINAH','MUBARRAZ','DUMAT ALJANDAL'], 
             regex = False,inplace=True) 

In [31]:
similar('city_english',91)

SABIT ALALYAH is similar to SABT ALALAYAH
SABIT ALALYAH is similar to SABT ALALYA
TANOMA is similar to TANOMAH
DHAHRAN is similar to DAHRAN
HAFR ALBATIN is similar to HAFR ALBATEN
ALRAYIS is similar to ALAYIS
MADINA is similar to MADINAH
SHAQRA is similar to SHAQRAA
MADINAH is similar to MADINA
ONAIZAH is similar to ONAIZA
ONAIZAH is similar to ONIZAH
OYOUN ALJWA is similar to OYOUN ALJIWA
HAFR ALBATEN is similar to HAFR ALBATIN
BIESHAH is similar to BISHAH
ALAHASA is similar to ALAHSA
ONIZAH is similar to ONAIZAH
ALARIDHAH is similar to ALHARIDHAH
BISHAH is similar to BIESHAH
ALAHSA is similar to ALAHASA
TANOMAH is similar to TANOMA
SABT ALALYA is similar to SABT ALALAYAH
SABT ALALYA is similar to SABIT ALALYAH
SARAT ABIEDAH is similar to SARAT ABEEDAH
ALJOMOM is similar to ALJMOM
ALJMOM is similar to ALJOMOM
SHAQRAA is similar to SHAQRA
ALAYIS is similar to ALRAYIS
SABT ALALAYAH is similar to SABT ALALYA
SABT ALALAYAH is similar to SABIT ALALYAH
ALHARIDHAH is similar to ALARIDHAH
SAR

In [32]:
atms.replace(to_replace =['DAHRAN','ONAIZA','SARAT ABEEDAH','SABT ALALYA','SABIT ALALYAH','SHAQRA','ALJMOM','TANOMA','ALAHASA'], 
                 value = ['DHAHRAN','ONAIZAH','SARAT ABIEDAH','SABT ALALAYAH','SABT ALALAYAH','SHAQRAA','ALJOMOM','TANOMAH','ALAHSA'], 
             regex = False,inplace=True) 

In [33]:
similar('city_english',91)

HAFR ALBATIN is similar to HAFR ALBATEN
ALRAYIS is similar to ALAYIS
MADINA is similar to MADINAH
MADINAH is similar to MADINA
ONAIZAH is similar to ONIZAH
OYOUN ALJWA is similar to OYOUN ALJIWA
HAFR ALBATEN is similar to HAFR ALBATIN
BIESHAH is similar to BISHAH
ONIZAH is similar to ONAIZAH
ALARIDHAH is similar to ALHARIDHAH
BISHAH is similar to BIESHAH
ALAYIS is similar to ALRAYIS
ALHARIDHAH is similar to ALARIDHAH
OYOUN ALJIWA is similar to OYOUN ALJWA


In [34]:
atms.replace(to_replace =['BIESHAH','ONIZAH','JUBAIL ',' RIYADH','HAFR ALBATEN','RIYADH '], 
                 value = ['BISHAH','ONAIZAH','JUBAIL','RIYADH','HAFR ALBATIN','RIYADH'], 
             regex = False,inplace=True) 

In [35]:
similar('city_english',90)

ALBERK is similar to ALBRK
BISHA is similar to BISHAH
SIEHAT is similar to SEHAT
BAQAA is similar to BAQA'A
DAMAD is similar to DHAMAD
SAMTAH is similar to SAMTA
ALAIS is similar to ALAYIS
ALRAYIS is similar to ALAYIS
MADINA is similar to MADINAH
ALQUNFUDHAH is similar to ALQUNFIDHAH
ALQUWAYAYAH is similar to ALQUWAYIYAH
DURMA is similar to DHURMA
TABOUK is similar to TABUK
TAYMAA is similar to TAYMA
MADINAH is similar to MADINA
OYOUN ALJWA is similar to OYOUN ALJIWA
SAKAKA is similar to SKAKA
SKAKA is similar to SAKAKA
SAMTA is similar to SAMTAH
DHAMAD is similar to DAMAD
BISHAH is similar to BISHA
MARAT is similar to MARRAT
TABUK is similar to TABOUK
ALARIDHAH is similar to ALHARIDHAH
ALBRK is similar to ALBARK
ALBRK is similar to ALBERK
ALQUNFIDHAH is similar to ALQUNFUDHAH
ALQUWAYIYAH is similar to ALQUWAYAYAH
SEYHAT is similar to SEHAT
TAYMA is similar to TAYMAA
DHURMA is similar to DURMA
MARRAT is similar to MARAT
ALAYIS is similar to ALRAYIS
ALAYIS is similar to ALAIS
ALBARK is 

In [36]:
atms.replace(to_replace =['SIEHAT','SEHAT','ALAYIS','MARAT','DURMA','UMLUJ ','RAMAH ','TAYMA','ALQUWAYAYAH','ALQUNFIDHAH','TABUK','JAZAN '], 
                 value = ['SEYHAT','SEYHAT','ALAIS','MARRAT','DHURMA','UMLUJ','RAMAH','TAYMAA','ALQUWAYIYAH','ALQUNFUDHAH','TABOUK','JAZAN'], 
             regex = False,inplace=True) 

In [37]:
similar('city_english',90)

ALBERK is similar to ALBRK
BISHA is similar to BISHAH
BAQAA is similar to BAQA'A
DAMAD is similar to DHAMAD
SAMTAH is similar to SAMTA
MADINA is similar to MADINAH
MADINAH is similar to MADINA
OYOUN ALJWA is similar to OYOUN ALJIWA
SAKAKA is similar to SKAKA
SKAKA is similar to SAKAKA
SAMTA is similar to SAMTAH
DHAMAD is similar to DAMAD
BISHAH is similar to BISHA
ALARIDHAH is similar to ALHARIDHAH
ALBRK is similar to ALBARK
ALBRK is similar to ALBERK
ALBARK is similar to ALBRK
ALHARIDHAH is similar to ALARIDHAH
OYOUN ALJIWA is similar to OYOUN ALJWA
BAQA'A is similar to BAQAA


In [38]:
atms.replace(to_replace =['BISHA',"BAQA'A",'DAMAD','SAMTA','MADINA','SAKAKA','ALBERK','ALBARK'], 
                 value = ['BISHAH','BAQAA','DHAMAD','SAMTAH','MADINAH','SKAKA','ALBRK','ALBRK'], 
             regex = False,inplace=True) 

In [39]:
similar('city_english',89)

ALMAGARDAH is similar to ALMAJARDAH
ALQAYSOMAH is similar to ALKAYSOMAH
UM ALSAHIK is similar to UM ALSAHEK
BEER BIN HERMAS is similar to BIR BIN HERMAS
OYOUN ALJWA is similar to OYOUN ALJIWA
ALARIDHAH is similar to ALHARIDHAH
ZAHRAN ALJANUB is similar to DHAHRAN ALJANUB
ALMAJARDAH is similar to ALMAGARDAH
BILJURASHI is similar to BALJURASHI
ALNUAYRIYAH is similar to ALNAYRIAH
ALNAYRIAH is similar to ALNUAYRIYAH
UM ALSAHEK is similar to UM ALSAHIK
ALKAYSOMAH is similar to ALQAYSOMAH
BIR BIN HERMAS is similar to BEER BIN HERMAS
BALJURASHI is similar to BILJURASHI
ALHARIDHAH is similar to ALARIDHAH
DHAHRAN ALJANUB is similar to ZAHRAN ALJANUB
OYOUN ALJIWA is similar to OYOUN ALJWA


In [40]:
atms.replace(to_replace =['ALMAGARDAH','ALKAYSOMAH','UM ALSAHEK','BIR BIN HERMAS','ZAHRAN ALJANUB','BILJURASHI','ALNAYRIAH'], 
                 value = ['ALMAJARDAH','ALQAYSOMAH','UM ALSAHIK','BEER BIN HERMAS','DHAHRAN ALJANUB','BALJURASHI','ALNUAYRIYAH'], 
             regex = False,inplace=True) 

In [41]:
similar('city_english',88)

ALMAJARDAH is similar to ALMJARDH
ABQIQ is similar to AQIQ
BAQAA is similar to BAQA
ABU ARISH is similar to ABO ARISH
BADR is similar to BADER
ALMEKHWAH is similar to ALMAKHWAH
ALMEKHWAH is similar to ALMUKHWAH
ALDIRIYAH is similar to ALDORIYAH
RAMAH is similar to RMAH
MUBARRAZ is similar to ALMUBARRAZ
OYOUN ALJWA is similar to OYOUN ALJIWA
RMAH is similar to RIMAH
RMAH is similar to RAMAH
ABO ARISH is similar to ABU ARISH
ALARIDHAH is similar to ALHARIDHAH
ALMUKHWAH is similar to ALMAKHWAH
ALMUKHWAH is similar to ALMEKHWAH
ASSULAYYIL is similar to SULAYYIL
BAQA is similar to BAQAA
ALQURYYAT is similar to ALQURAYAT
ALMIDHNAB is similar to ALMITHNAB
ALDORIYAH is similar to ALDIRIYAH
ALMUBARRAZ is similar to MUBARRAZ
BADER is similar to BADR
RIMAH is similar to RMAH
SULAYYIL is similar to ASSULAYYIL
ALQURAYAT is similar to ALQURYYAT
AQIQ is similar to AQAIQ
AQIQ is similar to ABQIQ
AQAIQ is similar to AQIQ
ALMAKHWAH is similar to ALMUKHWAH
ALMAKHWAH is similar to ALMEKHWAH
ALHARIDHAH is 

In [42]:
atms.replace(to_replace =['ALMJARDAH','ALMIDHNAB','ALMEKHWAH','AQAIQ','ALQURYYAT','ASSULAYYIL','RIMAH','BADR','MUBARRAZ'], 
                 value = ['ALMAJARDAH','ALMITHNAB','ALMAKHWAH','AQIQ','ALQURAYAT','SULAYYIL','RMAH','BADER','ALMUBARRAZ'], 
             regex = False,inplace=True) 

In [43]:
similar('city_english',88)

ALMAJARDAH is similar to ALMJARDH
ABQIQ is similar to AQIQ
BAQAA is similar to BAQA
ABU ARISH is similar to ABO ARISH
ALMAKHWAH is similar to ALMUKHWAH
ALDIRIYAH is similar to ALDORIYAH
RAMAH is similar to RMAH
OYOUN ALJWA is similar to OYOUN ALJIWA
RMAH is similar to RAMAH
ABO ARISH is similar to ABU ARISH
ALARIDHAH is similar to ALHARIDHAH
ALMUKHWAH is similar to ALMAKHWAH
BAQA is similar to BAQAA
ALDORIYAH is similar to ALDIRIYAH
AQIQ is similar to ABQIQ
ALHARIDHAH is similar to ALARIDHAH
ALMJARDH is similar to ALMAJARDAH
OYOUN ALJIWA is similar to OYOUN ALJWA


In [44]:
atms.replace(to_replace =['ALMJARDH','BAQA','ABO ARISH','ALMUKHWAH','ALDORIYAH','RMAH','RAMAH'], 
                 value = ['ALMAJARDAH','BAQAA','ABU ARISH','ALMAKHWAH','ALDIRIYAH','RUMAH','RUMAH'], 
             regex = False,inplace=True) 

In [45]:
similar('city_english',87)

ALMANDIQ is similar to ALMANDAQ
ABQIQ is similar to AQIQ
ALJUBEAL is similar to ALJUBAIL
ALJUBEAL is similar to ALJUBAYL
ALKHUBAR is similar to ALKHOBAR
DHAHRAN is similar to ALDHAHRAN
ALSHAMLY is similar to ALSHAMLI
ALARDAH is similar to ALARIDHAH
MADINAH is similar to ALMADINAH
ALBADAYIE is similar to ALBADIE
ALKHOBRA is similar to ALKHOBAR
BUREIDAH is similar to BURAIDAH
ALBADIE is similar to ALBADAYIE
HOTIT BANI TAMIM is similar to HOTAT - BANI TAMIM
BURAIDAH is similar to BURAYDAH
BURAIDAH is similar to BUREIDAH
OYOUN ALJWA is similar to OYOUN ALJIWA
ALSHAMLI is similar to ALSHAMLY
ALHAFOUF is similar to ALHAFOOF
ALMANDAQ is similar to ALMANDIQ
ALKHOBAR is similar to ALKHOBRA
ALKHOBAR is similar to ALKHUBAR
ALARIDHAH is similar to ALHARIDHAH
ALARIDHAH is similar to ALARDAH
ALMUZAHIMIYAH is similar to ALMUZAHMIYYA
ALHAFOOF is similar to ALHAFOUF
ALDHAHRAN is similar to DHAHRAN
BURAYDAH is similar to BURAIDAH
ALJUBAYL is similar to ALJUBAIL
ALJUBAYL is similar to ALJUBEAL
ALMADINAH 

In [46]:
atms.replace(to_replace =['ALJUBEAL','ALJUBAYL','BURAYDAH','DHAHRAN','ALHAFOUF','ALMUZAHMIYYA','ALKHOBAR','ALMANDIQ','ALSHAMLY','BUREIDAH','HOTAT - BANI TAMIM','HOTIT BANI TAMIM'], 
                 value = ['ALJUBAIL','ALJUBAIL','BURAIDAH','ALDHAHRAN','ALHAFOOF','ALMUZAHIMIYAH','ALKHUBAR','ALMANDAQ','ALSHAMLI','BURAIDAH','HOTAT BANI TAMIM','HOTAT BANI TAMIM'], 
             regex = False,inplace=True) 

In [47]:
similar('city_english',87)

ABQIQ is similar to AQIQ
ALARDAH is similar to ALARIDHAH
MADINAH is similar to ALMADINAH
ALBADAYIE is similar to ALBADIE
ALBADIE is similar to ALBADAYIE
HOTAT BANI TAMIM is similar to HAWTAT BANI TAMIM
OYOUN ALJWA is similar to OYOUN ALJIWA
ALARIDHAH is similar to ALHARIDHAH
ALARIDHAH is similar to ALARDAH
HAWTAT BANI TAMIM is similar to HOTAT BANI TAMIM
ALMADINAH is similar to MADINAH
AQIQ is similar to ABQIQ
ALHARIDHAH is similar to ALARIDHAH
OYOUN ALJIWA is similar to OYOUN ALJWA


In [48]:
atms.replace(to_replace =['MADINAH','HAWTAT BANI TAMIM'], 
                 value = ['ALMADINAH','HOTAT BANI TAMIM'], 
             regex = False,inplace=True) 

In [49]:
similar('city_english',86)

ABQIQ is similar to AQIQ
RAS TANNOURAH is similar to RAS TANURA
ALARDAH is similar to ALARIDHAH
ALBADAYIE is similar to ALBADIE
ALNABHANYAH is similar to ALNABHABIYAH
ALBADIE is similar to ALBADAYIE
OYOUN ALJWA is similar to OYOUN ALJIWA
AHAD ALMESARHA is similar to AHAD ALMASARIHAH
AHAD ALMASARIHAH is similar to AHAD ALMESARHA
ALARIDHAH is similar to ALHARIDHAH
ALARIDHAH is similar to ALARDAH
ALNABHABIYAH is similar to ALNABHANYAH
RAS TANURA is similar to RAS TANNOURAH
AQIQ is similar to ABQIQ
ALHARIDHAH is similar to ALARIDHAH
OYOUN ALJIWA is similar to OYOUN ALJWA


In [50]:
atms.replace(to_replace =['RAS TANURA','ALARDAH','AHAD ALMESARHA','ALNABHABIYAH'], 
                 value = ['RAS TANNOURAH','ALARIDHAH','AHAD ALMASARIHAH','ALNABHANYAH'], 
             regex = False,inplace=True) 

In [51]:
similar('city_english',86,87)

DAWMAT ALJANDL is similar to DUMAT ALJANDAL
ALAMWAH is similar to ALADWAH
ALHARJA is similar to ALKHARJ
ALNAMAS is similar to ANNAMAS
ALNAQEA is similar to ALNAQRA
ALJUBAIL is similar to JUBAIL
ALKHUBAR is similar to KHUBAR
ALADWAH is similar to ALAMWAH
ALEDABI is similar to ALIDABI
ALHANAKYAH is similar to ALHINAKIYAH
ALATAWALAH is similar to ALARTAWAYAH
ALKAMEL is similar to ALKAMIL
ALQARIN is similar to BALQARN
ALNAQRA is similar to ALNAQEA
ALAFLAG is similar to ALAFLAJ
ALARTAWAYAH is similar to ALARTAWIAH
ALARTAWAYAH is similar to ALATAWALAH
ALBEJADYAH is similar to ALBJADEYYAH
ALKHARJ is similar to ALHARJA
ALBADA is similar to ALBADAYA
KHUBAR is similar to ALKHUBAR
DUMAT ALJANDAL is similar to DAWMAT ALJANDL
JUBAIL is similar to ALJUBAIL
ALIDABI is similar to ALEDABI
ANNAMAS is similar to ALNAMAS
ALAFLAJ is similar to ALAFLAG
KHULAYS is similar to KHULAIS
ALHINAKIYAH is similar to ALHANAKYAH
BALQARN is similar to ALQARIN
ALBADAYA is similar to ALBADA
KHULAIS is similar to KHULAYS


In [52]:
atms.replace(to_replace =['ALKAMEL','ALBEJADYAH','ALARTAWIAH','KHULAYS','ALHINAKIYAH','ALAFLAG','ANNAMAS','ALEDABI','JUBAIL','DUMAT ALJANDAL','KHUBAR'], 
                 value = ['ALKAMIL','ALBJADEYYAH','ALARTAWAYAH','KHULAIS','ALHANAKYAH','ALAFLAJ','ALNAMAS','ALIDABI','ALJUBAIL','DAWMAT ALJANDL','ALKHUBAR'], 
             regex = False,inplace=True) 

In [53]:
similar('city_english',84,86)

ALMAJARDAH is similar to ALMAJMAAH
RIJAL ALMA is similar to RAJALALMA
SARAT OBEADAH is similar to SARAT ABIEDAH
ABU ARISH is similar to ABU AREESH
ALMAJMAAH is similar to ALMAJARDAH
RAJALALMA is similar to RIJAL ALMA
ABU AREESH is similar to ABU ARISH


In [54]:
atms.replace(to_replace =['SARAT ABIEDAH','RAJALALMA','ABU ARISH'], 
                 value = ['SARAT OBEADAH','RIJAL ALMA','ABU AREESH'], 
             regex = False,inplace=True) 

In [55]:
similar('city_english',83,86)

ALBAHA is similar to ALAHSA
ALBAHA is similar to ALBADA
ALMAJARDAH is similar to ALMAJMAAH
TUREEB is similar to TAREEB
TUREEB is similar to TUREEF
ALQATIF is similar to QATIF
QARYAH is similar to DARYAH
SALWAH is similar to ALWAJH
ALHAIT is similar to ALGHAT
ALDARB is similar to ALHDAR
ALAIS is similar to ALRAYIS
ALRAYIS is similar to ALAIS
RABEGH is similar to RABIGH
THEWAL is similar to THUWAL
TUREEF is similar to TUREEB
ALRASS is similar to ARRASS
DARYAH is similar to QARYAH
ALGHAT is similar to ALHAIT
ALHDAR is similar to ALDARB
ALKHARJ is similar to KHARJ
ALMAJMAAH is similar to ALMAJARDAH
ALRAIN is similar to ALRAYN
ALZULFI is similar to ZULFI
THADIK is similar to THADIQ
TUMAIR is similar to TUMAER
ALBADA is similar to ALBAHA
ALWAJH is similar to SALWAH
KHARJ is similar to ALKHARJ
THADIQ is similar to THADIK
ZULFI is similar to ALZULFI
ALAHSA is similar to ALBAHA
QATIF is similar to ALQATIF
RABIGH is similar to RABEGH
ALRAYN is similar to ALRAIN
ARRASS is similar to ALRASS
HOFUF 

In [56]:
atms.replace(to_replace =['TAREEB','TUMAER','THEWAL','HOFUF','ARRASS','ALRAIN','RABEGH','QATIF','ZULFI','THADIK','KHARJ'], 
                 value = ['TUREEB','TUMAIR','THUWAL','ALHOFUF','ALRASS','ALRAYN','RABIGH','ALQATIF','ALZULFI','THADIQ','ALKHARJ'], 
             regex = False,inplace=True) 

In [57]:
similar('city_english',82,83)

ALQORYAT is similar to ALQURAYAT
ALRAFEAH is similar to ALRABEAIH
ALTHOQBA is similar to ALTHUQBAH
ALGHAZALAH is similar to GAZALAH
ALBADAYIE is similar to ALBADAYA
ALBEAKIRYAH is similar to ALBUKIRIYAH
ALBEAKIRYAH is similar to ALBEKERIYAH
ALRABEAIH is similar to ALRAFEAH
ALMAJMAAH is similar to MAJMA'AH
ALOUUANAH is similar to ALUYANAH
ALQUWAYIYAH is similar to ALNUAYRIYAH
MAJMA'AH is similar to ALMAJMAAH
GAZALAH is similar to ALGHAZALAH
ALUYANAH is similar to ALOUUANAH
ALQURAYAT is similar to ALQORYAT
ALNUAYRIYAH is similar to ALQUWAYIYAH
ALBEKERIYAH is similar to ALBUKIRIYAH
ALBEKERIYAH is similar to ALBEAKIRYAH
ALBADAYA is similar to ALBADAYIE
ALTHUQBAH is similar to ALTHOQBA
ALBUKIRIYAH is similar to ALBEKERIYAH
ALBUKIRIYAH is similar to ALBEAKIRYAH


In [58]:
atms.replace(to_replace =['ALBEKERIYAH','ALBEAKIRYAH','ALTHOQBA','ALBADAYA','ALQORYAT','ALOUUANAH','GHAZALAH','GAZALAH',"MAJMA'AH",'ALBADIE'], 
                 value = ['ALBUKIRIYAH','ALBUKIRIYAH','ALTHUQBAH','ALBADAYIE','ALQURAYAT','ALUYANAH','ALGHAZALAH','ALGHAZALAH','ALMAJMAAH','ALBADAYIE'], 
             regex = False,inplace=True) 

In [59]:
similar('city_english',80,82)

ALBAHA is similar to ABHA
ALEISAWAYAH is similar to AISAWIYAH
ABHA is similar to ALBAHA
ALAMWAH is similar to ALUTAMAH
ALQAHMA is similar to BALAHMAR
BALAHMAR is similar to ALQAHMA
BARIQ is similar to BQAIQ
BARIQ is similar to BAREQ
KHAYBR ALGANOUB is similar to THARAN ALGANOUB
THARAN ALGANOUB is similar to DHAHRAN ALJANUB
THARAN ALGANOUB is similar to KHAYBR ALGANOUB
ABQIQ is similar to BQAIQ
ALKHAFJI is similar to ALKHARJ
ALKHUBAR is similar to ALKHARJ
ALNEARYAH is similar to ALNUAYRIYAH
ALROQAIY is similar to ALRUQAI
ALHAIT is similar to ALHADITHA
ALSHANAN is similar to ALSAHNA
HAIL is similar to MHAYIL
JAZAN is similar to JEZAN
JAZAN is similar to JIZAN
SABYA is similar to SABIA
UHID ALMASARHA is similar to AHAD ALMASARIHAH
ALOLA is similar to ALULA
ALUTAMAH is similar to ALAMWAH
YANBU ALBAHR is similar to YANBU ALNAKHL
YANBU ALNAKHL is similar to YANBU ALBAHR
ALJAMOUM is similar to ALJOMOM
TAIF is similar to TURAIF
TAIF is similar to ALTAIF
SHARORA is similar to SHARURAH
ARAR is s

In [60]:
atms.replace(to_replace =['JEZAN','AISAWIYAH','DOUBA',"ARA'AR",'ALRUQAI','ALJOMOM','ALJILAH','SHARORA','ALNEARYAH','ALOLA'], 
                 value = ['JIZAN','ALEISAWAYAH','DHUBA','ARAR','ALROQAIY','ALJAMOUM','ALJELLAH','SHARURAH','ALNUAYRIYAH','ALULA'], 
             regex = False,inplace=True) 

In [61]:
similar('city_english',80,82)

ALBAHA is similar to ABHA
ABHA is similar to ALBAHA
ALAMWAH is similar to ALUTAMAH
ALQAHMA is similar to BALAHMAR
BALAHMAR is similar to ALQAHMA
BARIQ is similar to BQAIQ
BARIQ is similar to BAREQ
KHAYBR ALGANOUB is similar to THARAN ALGANOUB
THARAN ALGANOUB is similar to DHAHRAN ALJANUB
THARAN ALGANOUB is similar to KHAYBR ALGANOUB
ABQIQ is similar to BQAIQ
ALKHAFJI is similar to ALKHARJ
ALKHUBAR is similar to ALKHARJ
ALHAIT is similar to ALHADITHA
ALSHANAN is similar to ALSAHNA
HAIL is similar to MHAYIL
JAZAN is similar to JIZAN
SABYA is similar to SABIA
UHID ALMASARHA is similar to AHAD ALMASARIHAH
ALUTAMAH is similar to ALAMWAH
YANBU ALBAHR is similar to YANBU ALNAKHL
YANBU ALNAKHL is similar to YANBU ALBAHR
TAIF is similar to TURAIF
TAIF is similar to ALTAIF
ALDIRIYAH is similar to ALBUKIRIYAH
ALBADAYIE is similar to BADAIE
ALBADAYIE is similar to ALBADA
ALBUKIRIYAH is similar to BUKAIRIAH
ALBUKIRIYAH is similar to ALDIRIYAH
ALKHOBRA is similar to ALKABRA
ALRASS is similar to RASS

In [62]:
atms.replace(to_replace =['ABQIQ','ALMEAZAHMYAH','TAIF','BAREQ','AHAD RAFIDAH','THARAN ALGANOUB','DAWDMI','SABIA','ALKABRA','UHID ALMASARHA','BADAIE','RASS','BUKAIRIAH'], 
                 value = ['BQAIQ','ALMUZAHIMIYAH','ALTAIF','BARIQ','AHAD RIFAYDAH','DHAHRAN ALJANUB','ALDAWADMI','SABYA','ALKHOBRA','AHAD ALMASARIHAH','ALBADAYIE','ALRASS','ALBUKIRIYAH'], 
             regex = False,inplace=True) 

In [63]:
similar('city_english',80,82)

ALBAHA is similar to ABHA
ABHA is similar to ALBAHA
ALAMWAH is similar to ALUTAMAH
ALQAHMA is similar to BALAHMAR
BALAHMAR is similar to ALQAHMA
BARIQ is similar to BQAIQ
BQAIQ is similar to BARIQ
ALKHAFJI is similar to ALKHARJ
ALKHUBAR is similar to ALKHARJ
ALHAIT is similar to ALHADITHA
ALSHANAN is similar to ALSAHNA
HAIL is similar to MHAYIL
JAZAN is similar to JIZAN
ALUTAMAH is similar to ALAMWAH
YANBU ALBAHR is similar to YANBU ALNAKHL
YANBU ALNAKHL is similar to YANBU ALBAHR
ALDIRIYAH is similar to ALBUKIRIYAH
ALBADAYIE is similar to ALBADA
ALBUKIRIYAH is similar to ALDIRIYAH
ALKHARJ is similar to ALKHUBAR
ALKHARJ is similar to ALKHAFJI
ALSAHNA is similar to ALSHANAN
ALBADA is similar to ALBADAYIE
MHAYIL is similar to HAIL
BLLHAMR is similar to BLLSAMAR
BLLSAMAR is similar to BLLHAMR
ALHADITHA is similar to ALHAIT
JIZAN is similar to JAZAN


In [64]:
atms.replace(to_replace =['YANBU ALNAKHL','YANBU ALBAHR'], 
                 value = ['YANBU','YANBU'], 
             regex = False,inplace=True) 

In [65]:
similar('city_english',79,80)

HAFR ALBATIN is similar to HAFER ALBATEN M.
HAFER ALBATEN M. is similar to HAFR ALBATIN


In [66]:
atms.replace(to_replace =['HAFER ALBATEN M.'], 
                 value = ['HAFR ALBATIN'], 
             regex = False,inplace=True) 

In [67]:
similar('city_english',78,80)

ALFARASHA is similar to ALFAWARAH
ALHAFEERAH is similar to ALRAFEAH
ALRAFEAH is similar to ALHAFEERAH
BEECH JAZAN is similar to BIESH -JAZAN
ALMADINAH is similar to ALMELADAH
ALFAWARAH is similar to ALQOWARAH
ALFAWARAH is similar to ALFARASHA
ALMELADAH is similar to ALMADINAH
ALQOWARAH is similar to ALFAWARAH
ALMUZAHIMIYAH is similar to MUZAHMEYAH
MUZAHMEYAH is similar to ALMUZAHIMIYAH
BIESH -JAZAN is similar to BEECH JAZAN
MHAIL ASEER is similar to MAHAYIL ASIR
MAHAYIL ASIR is similar to MHAIL ASEER


In [68]:
atms.replace(to_replace =['MAHAYIL ASIR','MHAIL ASEER','MHAIL','BIESH -JAZAN','BEECH JAZAN','MUZAHMEYAH'], 
                 value = ['MAHAYIL','MAHAYIL','MAHAYIL','BEESH','BEESH','ALMUZAHIMIYAH'], 
             regex = False,inplace=True) 

In [69]:
similar('city_english',77,80)

ALBAHA is similar to ALSAHNA
ALBAHA is similar to ALKAHFA
ALBAHA is similar to ALQAHMA
ALJOUF is similar to ALHOFUF
ALAMWAH is similar to ALWAJH
ALAMWAH is similar to ALMAHD
ALAMWAH is similar to SALWAH
ALFARASHA is similar to ALFAWARAH
ALHAFEERAH is similar to ALRAFEAH
ALHARJA is similar to ALHDAR
ALHEMA is similar to ALQAHMA
ALQAHMA is similar to ALAHSA
ALQAHMA is similar to ALHEMA
ALQAHMA is similar to ALBAHA
WADY HASBAL is similar to WADI BN HASHBAL
ALQATIF is similar to ALTAIF
ALRAFEAH is similar to ALHAFEERAH
SALWAH is similar to ALADWAH
SALWAH is similar to ALAMWAH
ALADWAH is similar to ALWAJH
ALADWAH is similar to ALBADA
ALADWAH is similar to SALWAH
ALAJFUR is similar to ALJAFR
ALHAIT is similar to ALHARIQ
ALKAHFA is similar to ALAHSA
ALKAHFA is similar to ALBAHA
SAMEERA is similar to NAMERA
ALDARB is similar to AD-DARB
ALDARB is similar to ALIDABI
ALIDABI is similar to ALDARB
ALMAHD is similar to ALAMWAH
ALRAYIS is similar to ALRAYN
ALRAYIS is similar to ALRASS
KHAYBAR is simi

In [70]:
atms.replace(to_replace =['ALDELAM','ALLITH','AQLAT ALSQOR','WADY HASBAL','AD-DARB','ALHASSA','SHAGRA','ONAYZA'], 
                 value = ['ALDALM','ALLEATH','OQLIT ALSOUQOR','WADI BN HASHBAL','ALDARB','ALAHSA','SHAQRAA','ONAIZAH'], 
             regex = False,inplace=True) 

In [71]:
similar('city_english',76,77)

JASH-TATHLETH is similar to TATHLETH
TATHLETH is similar to JASH-TATHLETH
ALHANAKYAH is similar to ALNABHANYAH
ALNABHANYAH is similar to ALHANAKYAH


In [72]:
atms.replace(to_replace =['JASH-TATHLETH'], 
                 value = ['JASH'], 
             regex = False,inplace=True) 

In [73]:
similar('city_english',75,76)

ALBAHA is similar to ALBASHAYIR
AFRAA-SABIT ALALYAH is similar to SABT ALALAYAH
ALAMWAH is similar to ALMAJMAAH
ALAMWAH is similar to ALFAWARAH
ALAMWAH is similar to ALMAKHWAH
ALBASHAYIR is similar to ALBAHA
ALHARJA is similar to ALDHAHRAN
ALMAUTHA is similar to ALMAHANI
BALAHMAR is similar to BALASMER
BALASMER is similar to BALAHMAR
SABT ALALAYAH is similar to AFRAA-SABIT ALALYAH
ALKHUBAR is similar to ALKHOBRA
ALKHUBAR is similar to ALKHURMA
ALRAFEAH is similar to ALRAFAEE
ALDHAHRAN is similar to ALHARJA
MELEAJAH is similar to MULEEJAH
ALADWAH is similar to ALMELADAH
ALADWAH is similar to ALFAWARAH
ALADWAH is similar to ALMAKHWAH
ALADWAH is similar to ALMADINAH
ALADWAH is similar to ALARIDHAH
ALKHOTTA is similar to ALKHOBRA
HAIL is similar to HAQL
ALARIDHAH is similar to ALADWAH
ALFAQEER is similar to ALRAFAEE
ALUTAMAH is similar to ALUYANAH
ALMADINAH is similar to ALADWAH
ALKHURMA is similar to ALKHOBRA
ALKHURMA is similar to ALKHUBAR
ALMAHANI is similar to ALMAUTHA
ALMAKHWAH is sim

In [74]:
atms.replace(to_replace =['DERYIAH','METHNAB','SAR-NAJRAN','AFRAA-SABIT ALALYAH'], 
                 value = ['ALDAREEYAH','ALMITHNAB','SAR','AFRAA'], 
             regex = False,inplace=True) 

In [75]:
similar('city_english',73,74)

QULWA is similar to QILWAH
ALEISAWAYAH is similar to ALARTAWAYAH
BISHAH is similar to BIESH
MAHIL is similar to MHAYIL
SAMKH is similar to SAMTAH
BQAIQ is similar to BUQAYQ
ALNUAYRIYAH is similar to ALBUKIRIYAH
ALSARAR is similar to ARAR
DAMMAM is similar to DALAM
ORAYRA is similar to RANYA
SAFUAH is similar to SAFWA
SALWAH is similar to SAFWA
ALHAIT is similar to ALAIS
ALSOULEMI HAIL is similar to ALSOULIL
HAIL is similar to MAHAYIL
SAMTAH is similar to SAMKH
ALAIS is similar to ALAHSA
ALAIS is similar to ALQASB
ALAIS is similar to ALRASS
ALAIS is similar to ALTAIF
ALAIS is similar to ALQOIS
ALAIS is similar to ALHAIT
ALQOIS is similar to ALQOZ
ALQOIS is similar to ALAIS
NAMERA is similar to MERAT
RANYA is similar to ORAYRA
ALTAIF is similar to ALAIS
ARAR is similar to ALSARAR
ALBUKIRIYAH is similar to ALNUAYRIYAH
ALRASS is similar to ALAIS
ALARTAWAYAH is similar to ALROWAYADAH
ALARTAWAYAH is similar to ALEISAWAYAH
ALDALM is similar to DALAM
ALQASB is similar to ALAIS
ALROWAYADAH is s

In [76]:
atms.replace(to_replace =['ALHADIASAH','ALDAREEYAH','MAHIL','BISHAH','BIESH','SAFUAH','BUQAYQ','ULWA','ALQOIS','MERAT','DALM'], 
                 value = ['ALHADITHA','ALDIRIYAH','MHAYIL','BEESHAH','BEESH','SAFWA','BQAIQ','QILWAH','ALQOZ','MARRAT','ALDALM'], 
             regex = False,inplace=True) 

In [77]:
atms.city_english.nunique()

367

In [78]:
# For city_arabic column:

In [79]:
# Remove white spaces
atms['city_arabic']=atms['city_arabic'].str.strip();

In [80]:
similar('city_arabic',95)

المدينة المنورة is similar to المدينة النورة
المدينة النورة is similar to المدينة المنورة
وادي الدواسر is similar to وادي الدوسر
وادي الدوسر is similar to وادي الدواسر


In [81]:
atms.replace(to_replace =['المدينة النورة','وادي الدوسر'], 
             value =['المدينة المنورة','وادي الدواسر'], regex = False,inplace=True) 

In [82]:
similar('city_arabic',94)

ينبع النخل is similar to ينبع النخيل
ينبع النخيل is similar to ينبع النخل


In [83]:
atms.replace(to_replace =['ينبع النخل'], value =['ينبع النخيل'], regex = False,inplace=True) 

In [84]:
similar('city_arabic',90)

العرين is similar to الرين
الجبيل is similar to الجبيلة
احد المسارحة is similar to أحد المسارحة
احد المسارحة is similar to احد المسارحه
القرين is similar to الرين
الرين is similar to القرين
الرين is similar to العرين
وادى الدواسر is similar to وادي الدواسر
الجبيلة is similar to الجبيل
احد المسارحه is similar to احد المسارحة
وادي الدواسر is similar to وادى الدواسر
أحد المسارحة is similar to احد المسارحة


In [85]:
atms.replace(to_replace =['احد المسارحة','احد المسارحه','وادى الدواسر'], 
                value =['أحد المسارحة','أحد المسارحة','وادي الدواسر'], regex = False,inplace=True) 

In [86]:
similar('city_arabic',85)

العرين is similar to الرين
الحفاير is similar to الحفيرة
الحفيرة is similar to الحفاير
الحريضة is similar to الرويضة
بيشة is similar to بيش
محايل is similar to حايل
سراة عبيدة is similar to سرات عبيدة
سراة عبيدة is similar to سراة عبيده
احد رفيدة is similar to أحد رفيدة
ابقيق is similar to بقيق
الجبيل is similar to الجبيلة
الاحساء is similar to الأحساء
ابو عريش is similar to أبو عريش
بيش is similar to بيشة
صبيا is similar to صبياء
المدينة المنورة is similar to المدينه المنوره
قيا - الطائف is similar to ثقيف - الطائف
ثقيف - الطائف is similar to قيا - الطائف
الديرية is similar to الدرعية
القرين is similar to الرين
البدائع is similar to البدايع
الفوارة is similar to القوارة
القوارة is similar to الفوارة
الافلاج is similar to الأفلاج
الدوادمى is similar to الدوادمي
الرين is similar to القرين
الرين is similar to العرين
الرويضة is similar to الحريضة
أحد رفيدة is similar to احد رفيدة
الجبيلة is similar to الجبيل
المدينه المنوره is similar to المدينة المنورة
الأحساء is similar to الاحساء
بقيق 

In [87]:
atms.replace(to_replace =['سرات عبيدة','سراة عبيده','احد رفيدة','ابقيق','الاحساء','ابو عريش','الدوادمى',' الدوادمي'], 
                  value =['سراة عبيدة','سراة عبيدة','أحد رفيدة','بقيق','الأحساء','أبو عريش','الدوادمي','الدوادمي'], regex = False,inplace=True) 

In [88]:
atms.replace(to_replace =['المدينه المنوره',  'البدايع', 'الافلاج','الدوادمى','صبيا'], value =['المدينة المنورة', 'البدائع', 'الأفلاج', 'الداودمي','صبياء'], regex = False,inplace=True) 

In [89]:
similar('city_arabic',80)

العرين is similar to الرين
العرين is similar to القرين
الحفاير is similar to الحفيرة
الحفيرة is similar to الحفاير
الحريضة is similar to الرويضة
بالحمر is similar to بالسمر
بالسمر is similar to بالحمر
بيشة is similar to بيش
محايل is similar to حايل
الجبيل is similar to الجبيلة
الخبر is similar to الخبراء
القطيف is similar to العطيف
راس تنورة is similar to راس تنوه
العظيم - حائل is similar to السليمى - حائل
السليمى - حائل is similar to العظيم - حائل
بيش is similar to بيشة
العطيف is similar to القطيف
مكة المكرمة is similar to مكه المكرمه
قيا - الطائف is similar to ثقيف - الطائف
قيا - الطائف is similar to شقصان - الطائف
شقصان - الطائف is similar to قيا - الطائف
ثقيف - الطائف is similar to قيا - الطائف
الديرية is similar to الدرعية
القرين is similar to الرين
القرين is similar to العرين
الفوارة is similar to القوارة
الخبراء is similar to الخبر
القوارة is similar to الفوارة
الجلة is similar to الجبيلة
الرين is similar to القرين
الرين is similar to العرين
الرويضة is similar to الحريضة
الزلفى 

In [90]:
atms.replace(to_replace =['بالحمر', 'راس تنوه','مكه المكرمه','الزلفى'], value =['بالسمر', 'راس تنورة', 'مكة المكرمة','الزلفي'], regex = False,inplace=True) 

In [91]:
similar('city_arabic',75)

القريات is similar to الرياض
القريات is similar to القرين
الأمواه is similar to المويه
العرين is similar to الرين
العرين is similar to العيينة
العرين is similar to القرين
البرك is similar to الخبر
الفرشة is similar to الفوارة
الفرشة is similar to الحفيرة
الحفاير is similar to الفقير
الحفاير is similar to الحفيرة
الحفيرة is similar to الفقير
الحفيرة is similar to الحرجة
الحفيرة is similar to الحفاير
الحفيرة is similar to الفرشة
الحرجة is similar to الحريضة
الحرجة is similar to الحفيرة
الحريضة is similar to الرياض
الحريضة is similar to الرويضة
الحريضة is similar to الحريق
الحريضة is similar to الحرجة
المظه is similar to المهد
بيشة is similar to بيش
محايل is similar to حايل
يعرة is similar to عريعرة
منفذ البطحاء is similar to منفذ الطوال
الجبيل is similar to الجبيلة
الخبر is similar to الخرج
الخبر is similar to الخبراء
الخبر is similar to البرك
النعيرية is similar to العيينة
النعيرية is similar to الديرية
النعيرية is similar to السعيرة
القطيف is similar to العطيف
الرفيعة is similar to الر

In [92]:
atms.replace(to_replace =['صامطه','يعرة' , 'بريده','عنيزه'], value =['صامطة','عريعرة', 'بريدة','عنيزة'], regex = False,inplace=True) 

In [93]:
similar('city_arabic',70)

العقيق - الباحة is similar to بيدة - الباحة
الباحة is similar to البجادية
المندق is similar to المهد
بيدة - الباحة is similar to العقيق - الباحة
العيساوية is similar to العيينة
العيساوية is similar to النعيرية
الحديثة is similar to الدرعية
الحديثة is similar to الشيحية
الحديثة is similar to الديرية
الحديثة is similar to الحريضة
الحديثة is similar to الحفيرة
الجوف is similar to الهفوف
الجوف is similar to الجموم
الجوف is similar to الاجفر
القريات is similar to الرياض
القريات is similar to القرين
ابها is similar to أبها
عفراء - سبت العلايا is similar to سبت العلايا
الأمواه is similar to المويه
الأمواه is similar to المخواة
العرين is similar to الرين
العرين is similar to العيينة
العرين is similar to القرين
العرين is similar to العيص
العرين is similar to النعيرية
البشائر is similar to البدائع
البرك is similar to المبرز
البرك is similar to الخبر
الفرشة is similar to الفوارة
الفرشة is similar to الحفيرة
الحفاير is similar to الفوارة
الحفاير is similar to المحاني
الحفاير is similar to الفقير
ا

In [94]:
atms.replace(to_replace =['عفراء - سبت العلايا','وادى بن هشبل' , 'الحيمة - وادى بن هشبل','ابها', 'الحوطة ','رياض الخبراء','جده', 'جاش - تثليث','محايل'], value =['سبت العلايا','وادي بن هشبل', 'وادي بن هشبل','أبها','الحوطة ' , 'الخبراء', 'جدة','تثليث','محايل عسير'], regex = False,inplace=True) 

In [95]:
# For reg column:

In [96]:
#Upper case all regions
atms['reg'] = atms['reg'].str.upper()

In [97]:
atms['reg'].unique()

array(['ALBAHA', 'ALJOUF', 'ASIR', 'EASTERN', 'HAIL', 'JAZAN', 'MADINA',
       'MAKKAH', 'NAJRAN', 'NORTH BORDER', 'QASSIM', 'RIYADH', 'TABOUK',
       nan, 'WESTERN', 'SOUTHERN', 'CENTRAL', 'NORTHERN'], dtype=object)

In [98]:
# Remove duplicated reigons 
atms.reg.replace(to_replace =['NORTHERN','CENTRAL','MAKKAH','MADINA','ALBAHA','JAZAN','NAJRAN'], 
             value = ['NORTH BORDER','RIYADH','WESTERN','WESTERN','SOUTHERN','SOUTHERN','SOUTHERN'], 
             regex = False,inplace=True) 

In [99]:
# Store all cities along with their reigons in a dictionary
grouping_by = atms[atms.reg.notnull()].groupby(['reg','city_english'])
city_reg={}

for (i,j) in enumerate(grouping_by):
    city_reg.update({j[0][1]: j[0][0]})

In [100]:
# Filling NaN reigons by the city from the city_reg dictionary
length = (atms.shape[0])
for i in range(length):
    if (isinstance(atms['reg'].iloc[i], str)):
        pass
    else:
        city= atms['city_english'].iloc[i]
        region= city_reg.get(city)
        atms['reg'].iloc[i]= region

- Cleaning Coordinates columns:
   - a.  Convert coordinates to decimal format
   - b.  Remove duplicated records again, based on typical coordinates

In [101]:
def check_coord(coordinate):
    """
    Return correct coordinates
    """
    if isinstance(coordinate, float):
        if coordinate<90 and coordinate>-90:
            return coordinate
        else:
            return None
    else: 
        return None

In [102]:
# Replace improper coordinates format  
atms[['lon','lat']] = atms[['lat','lon']].applymap(lambda coordinate: check_coord(coordinate))

In [103]:
# Remove duplicated records again, based on typical coordinates
coordinates=atms[['lon','lat']].dropna(subset=['lon','lat']).values


End of data cleaning...

## 4- Data Visualization

- Menu class to eplorate and visualiz cleaned data:
    - Show the number of ATMs in a specified city
    - The city that has the maximum number of ATMs
    - Bar plot for top cities with largest number of ATMs
    - Mark ATMs on Google Maps
    - Heat map to show the distribution of ATMs in Saudi Arabia
    - Scatter plot to show the distribution of ATMs in Saudi Arabia
    - Pie chart for ATMs per reigon

In [104]:
class Menu:
    """ Offer data visualization by functions
    """
    
    locations = [tuple(x) for x in coordinates]
    
    def city_atms(self,city):
        """ Return number of ATMs in a specific city
        """
        print(atms.city_english.value_counts()[city])
        
    def max_no_of_atms(self):
        """ Return the city with maximum number of ATMs
        """
        return atms.city_english.value_counts().head(1)
    
    def vis_top_atms(self,no=7):
        """ Plot a bar chart for top cities with largest number of ATMs
        """
        atms.city_english.value_counts().head(no).plot(kind='bar',figsize=(15,8));
        plt.title('Top Cities with Largest Number of ATMs',fontsize=25)
        plt.xticks(rotation=0,fontsize=20)
        plt.yticks(fontsize=20)

        return None
        
    def atms_scatter(self):
        """ Scatter plot to show the distribution of ATMs in Saudi Arabia
        """
        x=atms['lon']
        y=atms['lat']
        x_l=atms[(atms['lon'] < 32) & (atms['lon'] > 10) & (atms['lat'] < 50) & (atms['lat'] > 32)]['lon']
        y_l=atms[(atms['lon'] < 32) & (atms['lon'] > 10) & (atms['lat'] < 50) & (atms['lat'] > 32)]['lat']
        plt.figure(figsize=(8, 6))
        plt.plot(y_l,x_l,'o');
        plt.title('ATMs Distribution in Saudi Arabia',fontsize=20)
        return None
    
    def reg_pie(self):
        """ Plot pie chart for ATMs per reigon
        """
        plt.title('ATMs per Reigon',fontsize=20)
        atms.reg.value_counts().plot.pie(figsize=(10,10),textprops={'fontsize': 14});
    
    def atm_marker(self):
        """ Mark ATMs on Google Maps
        """
        fig = gmaps.figure()
        markers = gmaps.marker_layer(Menu.locations)
        fig.add_layer(markers)
        return fig
    
    def heat_map(self):
        """ Show heat map for the distribution of ATMs in Saudi Arabia
        """
        fig = gmaps.figure()
        fig.add_layer(gmaps.heatmap_layer(Menu.locations))
        return fig
    
    

- Represent the menu to the user

In [106]:
menu=Menu()
def print_menu(): 
    print("\n\nMenu:",
          "\n1- Number of atms in a city",
          "\n2- The city with Max. number of ATMs",
          "\n3- Bar plot for top cities with largest number of ATMs",
          "\n4- Scatter plot for ATMs' distribution in Saudi Arabia",
          "\n5- Pie chart shows ATMs per reigon",
          "\nEnter Q to quit")
print_menu()

option=input('Enter a number from the above menu: ')
if option=='1':
    city_u=input('Enter a city name: ')
    menu.city_atms(city_u)


elif option=='2':
    print('\nmaximum number of atms is in:', menu.max_no_of_atms())

elif option=='3':
    menu.vis_top_atms()

elif option=='4':
    menu.atms_scatter()
    
elif option=='5':
    menu.reg_pie()
        
elif option=='Q':
    pass
else:
    print('Invalid option')



Menu: 
1- Number of atms in a city 
2- The city with Max. number of ATMs 
3- Bar plot for top cities with largest number of ATMs 
4- Scatter plot for ATMs' distribution in Saudi Arabia 
5- Pie chart shows ATMs per reigon 
Enter Q to quit


Enter a number from the above menu:  1
Enter a city name:  NAJRAN


102


In [107]:
menu.heat_map()

Figure(layout=FigureLayout(height='420px'))

In [108]:
menu.atm_marker()

Figure(layout=FigureLayout(height='420px'))