In [1]:
import pandas as pd
import numpy as np
import re
import warnings

from geopy import Nominatim

warnings.filterwarnings("ignore")


## Pubs Data

In [2]:
#import all pubs csv for data wrangling
pubs_df = pd.read_csv("/content/raw-london-pubs.csv", index_col=[0])
pubs_detailed = pd.read_csv("/content/raw-london-pubs-detailed-data.csv", index_col=[0])

### Cleaning Pubs data

In [3]:
# check raw pub core data
pubs_df.head()

Unnamed: 0,fsq_id,categories,geocodes,location,name
0,4bc1e42eabf49521c690c193,"[{'id': 13018, 'name': 'Pub', 'icon': {'prefix...","{'main': {'latitude': 51.653075, 'longitude': ...","{'address': '92 Wood St', 'admin_region': 'Eng...",The Black Horse
1,4b995bccf964a5209f7535e3,"[{'id': 13018, 'name': 'Pub', 'icon': {'prefix...","{'main': {'latitude': 51.652979, 'longitude': ...","{'address': '58 High St', 'admin_region': 'Eng...",Ye Olde Mitre Inne
2,4bb4da3fa7059521b8cc1bce,"[{'id': 13018, 'name': 'Pub', 'icon': {'prefix...","{'main': {'latitude': 51.652533, 'longitude': ...","{'address': 'Barnet Rd', 'admin_region': 'Engl...",The Arkley
3,4dc436e5ae608779d11bd561,"[{'id': 13018, 'name': 'Pub', 'icon': {'prefix...","{'main': {'latitude': 51.650059, 'longitude': ...","{'address': '3 East Barnet Rd', 'admin_region'...",Railway Tavern
4,4d72b1e78e12b1f793863f05,"[{'id': 13018, 'name': 'Pub', 'icon': {'prefix...","{'main': {'latitude': 51.653499, 'longitude': ...","{'address': '84 High St', 'admin_region': 'Eng...",The Kings Head


In [4]:
# check duplicated rows of core pub dataframe
pubs_df.duplicated().value_counts()

False    1159
True      569
dtype: int64

In [5]:
# drop duplicated rows of core pub dataframe
pubs_df = pubs_df.drop_duplicates()
pubs_df.shape

(1159, 5)

In [6]:
# extract selected data from column with values in dictionaries
def extract_value(df, column, regex):
    return df[column].str.extract(regex)

In [7]:
pubs_df["geocodes"] = extract_value(pubs_df, "geocodes", "{'main':.\{(.*?)\}")
pubs_df["address"] = extract_value(pubs_df, "location", "{'address':.'(.*?)',")
pubs_df["locality"] = extract_value(pubs_df, "location", "'locality':.'(.*?)',")
pubs_df["neighborhood"] = extract_value(pubs_df, "location", "'neighborhood':.\['(.*)'\]")
pubs_df["postcode"] = extract_value(pubs_df, "location", "'postcode':.'(.*?)'")
pubs_df["category"] = extract_value(pubs_df, "categories", "'name':.'(.*?)'")

In [8]:
pubs_df.head()

Unnamed: 0,fsq_id,categories,geocodes,location,name,address,locality,neighborhood,postcode,category
0,4bc1e42eabf49521c690c193,"[{'id': 13018, 'name': 'Pub', 'icon': {'prefix...","'latitude': 51.653075, 'longitude': -0.206657","{'address': '92 Wood St', 'admin_region': 'Eng...",The Black Horse,92 Wood St,London,Barnet,EN5 4BW,Pub
1,4b995bccf964a5209f7535e3,"[{'id': 13018, 'name': 'Pub', 'icon': {'prefix...","'latitude': 51.652979, 'longitude': -0.199367","{'address': '58 High St', 'admin_region': 'Eng...",Ye Olde Mitre Inne,58 High St,Hertfordshire,Barnet,EN5 5SJ,Pub
2,4bb4da3fa7059521b8cc1bce,"[{'id': 13018, 'name': 'Pub', 'icon': {'prefix...","'latitude': 51.652533, 'longitude': -0.219573","{'address': 'Barnet Rd', 'admin_region': 'Engl...",The Arkley,Barnet Rd,Barnet,Barnet,EN5 3EP,Pub
3,4dc436e5ae608779d11bd561,"[{'id': 13018, 'name': 'Pub', 'icon': {'prefix...","'latitude': 51.650059, 'longitude': -0.174692","{'address': '3 East Barnet Rd', 'admin_region'...",Railway Tavern,3 East Barnet Rd,Barnet,Barnet,EN4 8RR,Pub
4,4d72b1e78e12b1f793863f05,"[{'id': 13018, 'name': 'Pub', 'icon': {'prefix...","'latitude': 51.653499, 'longitude': -0.200979","{'address': '84 High St', 'admin_region': 'Eng...",The Kings Head,84 High St,Barnet,Barnet,EN5 5SN,Pub


In [9]:
# check for null values in geocodes column
pubs_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1159 entries, 0 to 1724
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   fsq_id        1159 non-null   object
 1   categories    1159 non-null   object
 2   geocodes      1142 non-null   object
 3   location      1159 non-null   object
 4   name          1159 non-null   object
 5   address       1115 non-null   object
 6   locality      1134 non-null   object
 7   neighborhood  989 non-null    object
 8   postcode      1154 non-null   object
 9   category      1159 non-null   object
dtypes: object(10)
memory usage: 99.6+ KB


In [10]:
pubs_df["geocodes"].isnull().sum()
empty_geocodes = pubs_df[pubs_df["geocodes"].isnull()]

In [11]:
# get coordinate to fill the null values in the geocodes column
geolocator = Nominatim(user_agent="women_safety")

In [12]:
def get_geocodes():
    for col, row in empty_geocodes.iterrows():
        get_geocodes_by_street(row)


# try to find coordinates from addres
def get_geocodes_by_street(row):
    address = row["address"]
    try:
        location = geolocator.geocode(f"{address}, London")
        row[
            "geocodes"
        ] = f"'latitude': {location.latitude}, 'longitude': {location.longitude}"
    except:
        get_geocodes_by_postcode(row)


# try to find coordinates from postcode
def get_geocodes_by_postcode(row):
    id = row["fsq_id"]
    try:
        location = geolocator.geocode(row["postcode"])
        row[
            "geocodes"
        ] = f"'latitude': {location.latitude}, 'longitude': {location.longitude}"
    except:
        row["geocodes"] = np.nan
        print(f"It was not possible to find coordinates for {id}")

In [13]:
# get missing geocodes using geopy library
get_geocodes()

In [14]:
# check for missing data
print(empty_geocodes["geocodes"].isnull().sum())

0


In [15]:
# assign the geocodes found to the main dataframe
pubs_df["geocodes"][pubs_df["geocodes"].isnull()] = empty_geocodes["geocodes"]
print(pubs_df["geocodes"].isnull().sum())

0


In [16]:
#find empty string in the postcode column and replace with numpy nan
pubs_df["postcode"][pubs_df["postcode"] == ""] = np.nan

In [17]:
# divide the geocode column into two new columns: latitude and longitude
def get_lat_long(df):
    lat_list = []
    long_list = []
    geocodes_split = df["geocodes"].str.split(",")
    for lat, longi in geocodes_split:
        lat = re.search("latitude':.(.*)", lat).group(1)
        lat_list.append(lat)
        longi = re.search("longitude':.(.*)", longi).group(1)
        long_list.append(longi)
    df["latitude"] = lat_list
    df["longitude"] = long_list

In [18]:
get_lat_long(pubs_df)

In [19]:
pubs_df.head()

Unnamed: 0,fsq_id,categories,geocodes,location,name,address,locality,neighborhood,postcode,category,latitude,longitude
0,4bc1e42eabf49521c690c193,"[{'id': 13018, 'name': 'Pub', 'icon': {'prefix...","'latitude': 51.653075, 'longitude': -0.206657","{'address': '92 Wood St', 'admin_region': 'Eng...",The Black Horse,92 Wood St,London,Barnet,EN5 4BW,Pub,51.653075,-0.206657
1,4b995bccf964a5209f7535e3,"[{'id': 13018, 'name': 'Pub', 'icon': {'prefix...","'latitude': 51.652979, 'longitude': -0.199367","{'address': '58 High St', 'admin_region': 'Eng...",Ye Olde Mitre Inne,58 High St,Hertfordshire,Barnet,EN5 5SJ,Pub,51.652979,-0.199367
2,4bb4da3fa7059521b8cc1bce,"[{'id': 13018, 'name': 'Pub', 'icon': {'prefix...","'latitude': 51.652533, 'longitude': -0.219573","{'address': 'Barnet Rd', 'admin_region': 'Engl...",The Arkley,Barnet Rd,Barnet,Barnet,EN5 3EP,Pub,51.652533,-0.219573
3,4dc436e5ae608779d11bd561,"[{'id': 13018, 'name': 'Pub', 'icon': {'prefix...","'latitude': 51.650059, 'longitude': -0.174692","{'address': '3 East Barnet Rd', 'admin_region'...",Railway Tavern,3 East Barnet Rd,Barnet,Barnet,EN4 8RR,Pub,51.650059,-0.174692
4,4d72b1e78e12b1f793863f05,"[{'id': 13018, 'name': 'Pub', 'icon': {'prefix...","'latitude': 51.653499, 'longitude': -0.200979","{'address': '84 High St', 'admin_region': 'Eng...",The Kings Head,84 High St,Barnet,Barnet,EN5 5SN,Pub,51.653499,-0.200979


In [20]:
# drop columns that will not be use in this project
pubs_df = pubs_df.drop(columns=["categories", "geocodes", "location"], axis=1)

In [21]:
pubs_df.head()

Unnamed: 0,fsq_id,name,address,locality,neighborhood,postcode,category,latitude,longitude
0,4bc1e42eabf49521c690c193,The Black Horse,92 Wood St,London,Barnet,EN5 4BW,Pub,51.653075,-0.206657
1,4b995bccf964a5209f7535e3,Ye Olde Mitre Inne,58 High St,Hertfordshire,Barnet,EN5 5SJ,Pub,51.652979,-0.199367
2,4bb4da3fa7059521b8cc1bce,The Arkley,Barnet Rd,Barnet,Barnet,EN5 3EP,Pub,51.652533,-0.219573
3,4dc436e5ae608779d11bd561,Railway Tavern,3 East Barnet Rd,Barnet,Barnet,EN4 8RR,Pub,51.650059,-0.174692
4,4d72b1e78e12b1f793863f05,The Kings Head,84 High St,Barnet,Barnet,EN5 5SN,Pub,51.653499,-0.200979


### Cleaning detailed data and merging all pubs data into one dataframe

In [22]:
# check pubs detailed data dataframe
pubs_detailed.head()

Unnamed: 0,fsq_id,popularity,price,rating
0,4bc1e42eabf49521c690c193,0.939551,1.0,8.1
1,4b995bccf964a5209f7535e3,0.982768,1.0,7.6
2,4bb4da3fa7059521b8cc1bce,0.987375,1.0,7.7
3,4dc436e5ae608779d11bd561,0.950895,1.0,7.4
4,4d72b1e78e12b1f793863f05,0.377777,1.0,6.6


In [23]:
# check for duplicated data
pubs_detailed.duplicated().value_counts()

False    1159
True      569
dtype: int64

In [24]:
# drop the duplicated rows
pubs_detailed = pubs_detailed.drop_duplicates()

In [25]:
#check if the two dataframe have the same number of rows
print(pubs_df.shape)
print(pubs_detailed.shape)

(1159, 9)
(1159, 4)


In [26]:
# merge the pubs updated dataframe with the pubs detailed data dataframe
pubs_data = pubs_df.merge(pubs_detailed, how="left", on="fsq_id")

In [27]:
pubs_data.head()

Unnamed: 0,fsq_id,name,address,locality,neighborhood,postcode,category,latitude,longitude,popularity,price,rating
0,4bc1e42eabf49521c690c193,The Black Horse,92 Wood St,London,Barnet,EN5 4BW,Pub,51.653075,-0.206657,0.939551,1.0,8.1
1,4b995bccf964a5209f7535e3,Ye Olde Mitre Inne,58 High St,Hertfordshire,Barnet,EN5 5SJ,Pub,51.652979,-0.199367,0.982768,1.0,7.6
2,4bb4da3fa7059521b8cc1bce,The Arkley,Barnet Rd,Barnet,Barnet,EN5 3EP,Pub,51.652533,-0.219573,0.987375,1.0,7.7
3,4dc436e5ae608779d11bd561,Railway Tavern,3 East Barnet Rd,Barnet,Barnet,EN4 8RR,Pub,51.650059,-0.174692,0.950895,1.0,7.4
4,4d72b1e78e12b1f793863f05,The Kings Head,84 High St,Barnet,Barnet,EN5 5SN,Pub,51.653499,-0.200979,0.377777,1.0,6.6


In [28]:
pubs_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1159 entries, 0 to 1158
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   fsq_id        1159 non-null   object 
 1   name          1159 non-null   object 
 2   address       1115 non-null   object 
 3   locality      1134 non-null   object 
 4   neighborhood  989 non-null    object 
 5   postcode      1114 non-null   object 
 6   category      1159 non-null   object 
 7   latitude      1159 non-null   object 
 8   longitude     1159 non-null   object 
 9   popularity    1159 non-null   float64
 10  price         1143 non-null   float64
 11  rating        865 non-null    float64
dtypes: float64(3), object(9)
memory usage: 117.7+ KB


### Resolving missing values

In [29]:
pubs_data.isnull().sum()

fsq_id            0
name              0
address          44
locality         25
neighborhood    170
postcode         45
category          0
latitude          0
longitude         0
popularity        0
price            16
rating          294
dtype: int64

In [30]:
#drop rows with null values in rating
pubs_data.dropna(subset=["rating"], axis=0, inplace=True)

In [31]:
pubs_data.shape

(865, 12)

In [32]:
#assign mode to missing values in price
pubs_data["price"].value_counts()

1.0    698
2.0    141
3.0     13
Name: price, dtype: int64

In [33]:
pubs_data["price"].fillna(1, inplace=True)

In [34]:
pubs_data[pubs_data["postcode"].isnull()]

Unnamed: 0,fsq_id,name,address,locality,neighborhood,postcode,category,latitude,longitude,popularity,price,rating
5,4c0bab78ffb8c9b6e9886d61,The Monken Holt,,,Monken Hadley,,Pub,51.658291,-0.201368,0.246146,1.0,6.8
593,4f2188efe4b0cebcfb6406aa,"Ortaköy, Shisha Lounge Bar",83 Mayes Rd,London,,,Pub,51.594699,-0.112428,0.926055,1.0,6.7
674,4c653ed03f69c9b6fea28ad9,The General Elliott,,,,,Pub,51.542912,-0.488453,0.913205,1.0,7.6
808,4c75413c1b30a09338c4ee09,The Limping Fox,Rushett Rd,Long Ditton,,,Pub,51.386031,-0.323829,0.990826,1.0,7.5
818,562d09d4498eb208288dda65,The Railway Pub,3 Victoria Rd,London,,,Pub,51.424276,-0.332793,0.916892,1.0,6.9


In [35]:
pubs_data.loc[5, "postcode"] = "EN5 5SU"

In [36]:
pubs_data.loc[593, "postcode"] = "N22 6UP"

In [37]:
pubs_data.loc[674, "postcode"] = "UB8 2UR"

In [38]:
pubs_data.loc[808, "postcode"] = "KT7 0UX"

In [39]:
pubs_data.loc[818, "postcode"] = "TW11 0BB"

In [40]:
#drop columns that will not be used anymore
pubs_data.drop(columns=["address", "locality", "neighborhood", "category", "fsq_id"], inplace=True)

In [41]:
pubs_data.head(10)

Unnamed: 0,name,postcode,latitude,longitude,popularity,price,rating
0,The Black Horse,EN5 4BW,51.653075,-0.206657,0.939551,1.0,8.1
1,Ye Olde Mitre Inne,EN5 5SJ,51.652979,-0.199367,0.982768,1.0,7.6
2,The Arkley,EN5 3EP,51.652533,-0.219573,0.987375,1.0,7.7
3,Railway Tavern,EN4 8RR,51.650059,-0.174692,0.950895,1.0,7.4
4,The Kings Head,EN5 5SN,51.653499,-0.200979,0.377777,1.0,6.6
5,The Monken Holt,EN5 5SU,51.658291,-0.201368,0.246146,1.0,6.8
6,The Green Dragon,EN5 4RE,51.674789,-0.216312,0.972166,1.0,7.2
7,The Red Lion,EN5 5UW,51.652453,-0.199044,0.967387,1.0,6.1
8,The Orange Tree,N20 8NX,51.629934,-0.197419,0.951498,1.0,6.5
12,Queens Arms,EN5 1AB,51.64669,-0.188042,0.888001,1.0,5.6


In [42]:
pubs_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 865 entries, 0 to 1157
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   name        865 non-null    object 
 1   postcode    865 non-null    object 
 2   latitude    865 non-null    object 
 3   longitude   865 non-null    object 
 4   popularity  865 non-null    float64
 5   price       865 non-null    float64
 6   rating      865 non-null    float64
dtypes: float64(3), object(4)
memory usage: 86.4+ KB


In [43]:
#saving cleaned data for the next stage
pubs_data.to_csv("data-wrangling-pubs-data.csv")

## Police data

In [44]:
police_df = pd.read_csv("/content/raw-london-police-station-data.csv", index_col=[0])

In [45]:
police_df.head()

Unnamed: 0,fsq_id,categories,geocodes,location,name
0,4f75c889e4b0b24b0c1c4c98,"[{'id': 12072, 'name': 'Police Station', 'icon...","{'main': {'latitude': 51.652283, 'longitude': ...","{'address': '26-32 High St', 'admin_region': '...",Barnet Police Station
1,886e605708d446fd6af7be02,"[{'id': 12072, 'name': 'Police Station', 'icon...","{'main': {'latitude': 51.654345, 'longitude': ...","{'admin_region': 'England', 'country': 'GB', '...",Barnet Police Station
2,7687cbfe5d9743d0bcfd91b6,"[{'id': 12072, 'name': 'Police Station', 'icon...","{'main': {'latitude': 51.655584, 'longitude': ...","{'address': '111 High St', 'admin_region': 'En...",Metropolitan Police
3,dd8ab317b29242cec99126f6,"[{'id': 12072, 'name': 'Police Station', 'icon...","{'main': {'latitude': 51.64317, 'longitude': -...","{'address': 'Snt Base, 13 Cat Hill', 'admin_re...",Cat Hill
4,fa389098b2a249fb9381e3f3,"[{'id': 12072, 'name': 'Police Station', 'icon...","{'main': {'latitude': 51.643139, 'longitude': ...","{'address': '13 Cat Hill', 'admin_region': 'En...",Metropolitan Police Service


In [46]:
# check for duplicated rows
police_df.duplicated().value_counts()

False    246
True     224
dtype: int64

In [47]:
# drop duplicated rows
police_df = police_df.drop_duplicates()

In [48]:
# extract selected data from column with values in dictionaries
police_df["geocodes"] = extract_value(police_df, "geocodes", "{'main':.\{(.*?)\}")
police_df["address"] = extract_value(police_df, "location", "{'address':.'(.*?)',")
police_df["locality"] = extract_value(police_df, "location", "'locality':.'(.*?)',")
police_df["neighborhood"] = extract_value(police_df, "location", "'neighborhood':.\['(.*)'\]")
police_df["postcode"] = extract_value(police_df, "location", "'postcode':.'(.*?)'")
police_df["category"] = extract_value(police_df, "categories", "'name':.'(.*?)'")

In [49]:
police_df.head()

Unnamed: 0,fsq_id,categories,geocodes,location,name,address,locality,neighborhood,postcode,category
0,4f75c889e4b0b24b0c1c4c98,"[{'id': 12072, 'name': 'Police Station', 'icon...","'latitude': 51.652283, 'longitude': -0.198046","{'address': '26-32 High St', 'admin_region': '...",Barnet Police Station,26-32 High St,Barnet,Barnet,EN5 5RU,Police Station
1,886e605708d446fd6af7be02,"[{'id': 12072, 'name': 'Police Station', 'icon...","'latitude': 51.654345, 'longitude': -0.201383","{'admin_region': 'England', 'country': 'GB', '...",Barnet Police Station,,London,,,Police Station
2,7687cbfe5d9743d0bcfd91b6,"[{'id': 12072, 'name': 'Police Station', 'icon...","'latitude': 51.655584, 'longitude': -0.202649","{'address': '111 High St', 'admin_region': 'En...",Metropolitan Police,111 High St,Barnet,,EN5 5XY,Police Station
3,dd8ab317b29242cec99126f6,"[{'id': 12072, 'name': 'Police Station', 'icon...","'latitude': 51.64317, 'longitude': -0.162268","{'address': 'Snt Base, 13 Cat Hill', 'admin_re...",Cat Hill,"Snt Base, 13 Cat Hill",Barnet,,EN4 8HG,Police Station
4,fa389098b2a249fb9381e3f3,"[{'id': 12072, 'name': 'Police Station', 'icon...","'latitude': 51.643139, 'longitude': -0.161936","{'address': '13 Cat Hill', 'admin_region': 'En...",Metropolitan Police Service,13 Cat Hill,Barnet,,EN4 8HG,Police Station


In [50]:
# check for missing data
police_df[police_df["geocodes"].isnull()]

Unnamed: 0,fsq_id,categories,geocodes,location,name,address,locality,neighborhood,postcode,category
50,52e92a52498e8f6d4ed62bfc,"[{'id': 12072, 'name': 'Police Station', 'icon...",,"{'address': '109 Lambeth Rd', 'admin_region': ...",Metropolitan Police Central Communications Com...,109 Lambeth Rd,London,,SE1 7JP,Police Station


In [51]:
# assign the geocedes data missing
location = geolocator.geocode("109 Lambeth Rd, London")
police_df.loc[
    50, "geocodes"
] = f"'latitude': {location.latitude}, 'longitude': {location.longitude}"

In [52]:
police_df.loc[50, "geocodes"]

"'latitude': 51.49420595, 'longitude': -0.11762509407853665"

In [53]:
##divide the geocode column into two new columns: latitude and longitude
get_lat_long(police_df)

In [54]:
police_df.head()

Unnamed: 0,fsq_id,categories,geocodes,location,name,address,locality,neighborhood,postcode,category,latitude,longitude
0,4f75c889e4b0b24b0c1c4c98,"[{'id': 12072, 'name': 'Police Station', 'icon...","'latitude': 51.652283, 'longitude': -0.198046","{'address': '26-32 High St', 'admin_region': '...",Barnet Police Station,26-32 High St,Barnet,Barnet,EN5 5RU,Police Station,51.652283,-0.198046
1,886e605708d446fd6af7be02,"[{'id': 12072, 'name': 'Police Station', 'icon...","'latitude': 51.654345, 'longitude': -0.201383","{'admin_region': 'England', 'country': 'GB', '...",Barnet Police Station,,London,,,Police Station,51.654345,-0.201383
2,7687cbfe5d9743d0bcfd91b6,"[{'id': 12072, 'name': 'Police Station', 'icon...","'latitude': 51.655584, 'longitude': -0.202649","{'address': '111 High St', 'admin_region': 'En...",Metropolitan Police,111 High St,Barnet,,EN5 5XY,Police Station,51.655584,-0.202649
3,dd8ab317b29242cec99126f6,"[{'id': 12072, 'name': 'Police Station', 'icon...","'latitude': 51.64317, 'longitude': -0.162268","{'address': 'Snt Base, 13 Cat Hill', 'admin_re...",Cat Hill,"Snt Base, 13 Cat Hill",Barnet,,EN4 8HG,Police Station,51.64317,-0.162268
4,fa389098b2a249fb9381e3f3,"[{'id': 12072, 'name': 'Police Station', 'icon...","'latitude': 51.643139, 'longitude': -0.161936","{'address': '13 Cat Hill', 'admin_region': 'En...",Metropolitan Police Service,13 Cat Hill,Barnet,,EN4 8HG,Police Station,51.643139,-0.161936


In [55]:
# drop columns that will not be use in this project
police_df = police_df.drop(columns=["categories", "geocodes", "location"], axis=1)

In [56]:
police_df.head()

Unnamed: 0,fsq_id,name,address,locality,neighborhood,postcode,category,latitude,longitude
0,4f75c889e4b0b24b0c1c4c98,Barnet Police Station,26-32 High St,Barnet,Barnet,EN5 5RU,Police Station,51.652283,-0.198046
1,886e605708d446fd6af7be02,Barnet Police Station,,London,,,Police Station,51.654345,-0.201383
2,7687cbfe5d9743d0bcfd91b6,Metropolitan Police,111 High St,Barnet,,EN5 5XY,Police Station,51.655584,-0.202649
3,dd8ab317b29242cec99126f6,Cat Hill,"Snt Base, 13 Cat Hill",Barnet,,EN4 8HG,Police Station,51.64317,-0.162268
4,fa389098b2a249fb9381e3f3,Metropolitan Police Service,13 Cat Hill,Barnet,,EN4 8HG,Police Station,51.643139,-0.161936


In [57]:
# save dataframe into csv
police_df.to_csv("data-wrangling-police_data.csv")