In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import geopandas as gpd

# ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
file_path = 'tourism.geojson'
gdf = gpd.read_file(file_path)
gdf.head(5)

Unnamed: 0,id,@id,access,accessibility,addr:city,addr:country,addr:district,addr:floor,addr:full,addr:housename,...,wheelchair:description,wheelchair:rooms,wifi,wikidata,wikimedia_commons,wikipedia,year_of_construction,zoo,@geometry,geometry
0,relation/1341944,relation/1341944,,,New York,,,,,,...,,,,Q829806,,en:The Dakota,,,center,POINT (-73.97615 40.77678)
1,relation/2100751,relation/2100751,,,,,,,,,...,,,,,,,,,center,POINT (-73.97755 40.76161)
2,relation/2154552,relation/2154552,,,,,,,,,...,,,,,,,,,center,POINT (-73.97618 40.75854)
3,relation/2399300,relation/2399300,,,New York,US,,,,,...,,,,,,,,,center,POINT (-73.98047 40.76429)
4,relation/3343754,relation/3343754,,,New York,,,,,,...,,,,,,,,,center,POINT (-73.99866 40.73250)


In [4]:
gdf.describe()

Unnamed: 0,id,@id,access,accessibility,addr:city,addr:country,addr:district,addr:floor,addr:full,addr:housename,...,wheelchair:description,wheelchair:rooms,wifi,wikidata,wikimedia_commons,wikipedia,year_of_construction,zoo,@geometry,geometry
count,1590,1590,12,1,540,42,1,12,3,3,...,2,1,1,339,6,264,5,1,455,1590
unique,1590,1590,4,1,2,1,1,9,3,3,...,2,1,1,326,6,253,4,1,1,1588
top,relation/1341944,relation/1341944,yes,yes,New York,US,Soho,2,Between 40th and 42nd Streets at 6th Avenue,Flatiron,...,Very wide sidewalks and plenty of areas to vie...,10,LinkNYC Free Public Wi-Fi,Q4018413,File:The London NYC 8Av 54St sun jeh.jpg,en:Gagosian Gallery,1972..1974,enclosure,center,POINT (-73.9775451 40.761612)
freq,1,1,6,1,527,42,1,3,1,1,...,1,1,1,3,1,3,2,1,455,2


## Check Data Types
Data types will need to be changed into their appropriate types as they are all undefined objects at the moment

In [5]:
pd.set_option('display.max_rows', None)  # Display all rows
pd.set_option('display.max_columns', None)  # Display all columns
print(gdf.dtypes)

id                               object
@id                              object
access                           object
accessibility                    object
addr:city                        object
addr:country                     object
addr:district                    object
addr:floor                       object
addr:full                        object
addr:housename                   object
addr:housenumber                 object
addr:place                       object
addr:postcode                    object
addr:state                       object
addr:street                      object
addr:unit                        object
aerialway                        object
air_conditioning                 object
alt_name                         object
alt_name:cs                      object
alt_name:en                      object
alt_name:es                      object
alt_name:fr                      object
amenity                          object
architect                        object


## Checking for feature names
Total of 353 features. Most are irrelevant but need to be filtered down more so that data types can be assigned to each one

In [6]:
feauture_names = gdf.columns.tolist()

In [7]:
print(feauture_names)

['id', '@id', 'access', 'accessibility', 'addr:city', 'addr:country', 'addr:district', 'addr:floor', 'addr:full', 'addr:housename', 'addr:housenumber', 'addr:place', 'addr:postcode', 'addr:state', 'addr:street', 'addr:unit', 'aerialway', 'air_conditioning', 'alt_name', 'alt_name:cs', 'alt_name:en', 'alt_name:es', 'alt_name:fr', 'amenity', 'architect', 'architect:wikidata', 'architect:wikipedia', 'area', 'artist', 'artist:wikidata', 'artist:wikipedia', 'artist_name', 'artwork_subject', 'artwork_type', 'attraction', 'backrest', 'bar', 'barrier', 'bench', 'bicycle', 'bicycle_parking', 'bin', 'board:title', 'board_type', 'branch', 'brand', 'brand:wikidata', 'brand:wikipedia', 'breakfast', 'bridge', 'bridge:structure', 'building', 'building:architecture', 'building:colour', 'building:levels', 'building:material', 'building:name', 'building:part', 'building:start_date', 'building:use', 'building:wikidata', 'building:wikipedia', 'business_centre', 'capacity', 'cat', 'check_date', 'check_date:

## Filtering features to make dataset smaller
Once all features are decided on, datatypes will be set 

- 'name', 
- 'amenity', 
- building', 
- 'capacity',
- 'description'
- 'operator', 
- 'opening_hours'
- 'source'
- 'tourism'
- 'website'
- 'building:name'

In [8]:
relevant_features = ['name', 'tourism','description','opening_hours','website','geometry']

In [9]:
new_gdf = gdf[relevant_features]

In [10]:
print(new_gdf.columns)

Index(['name', 'tourism', 'description', 'opening_hours', 'website',
       'geometry'],
      dtype='object')


In [11]:
total_rows = gdf.shape[0]
print("Total rows:", total_rows)
gdf.shape


Total rows: 1590


(1590, 353)

In [12]:
new_gdf.head(5)

Unnamed: 0,name,tourism,description,opening_hours,website,geometry
0,The Dakota,attraction,,,,POINT (-73.97615 40.77678)
1,MoMA,attraction,,,,POINT (-73.97755 40.76161)
2,Saint Patrick’s Cathedral,attraction,,06:30-20:45,,POINT (-73.97618 40.75854)
3,Wellington Hotel,hotel,,,https://www.wellingtonhotel.com,POINT (-73.98047 40.76429)
4,Washington Square Hotel,hotel,,,https://washingtonsquarehotel.com,POINT (-73.99866 40.73250)


In [13]:
nan_count = new_gdf.isnull().sum()
print(nan_count)

name              372
tourism             0
description      1549
opening_hours    1367
website           903
geometry            0
dtype: int64


## Converting features into appropriate data types

In [14]:
new_gdf.dtypes

name               object
tourism            object
description        object
opening_hours      object
website            object
geometry         geometry
dtype: object

In [15]:
new_gdf.select_dtypes(["object"]).describe().T

Unnamed: 0,count,unique,top,freq
name,1218,1091,Link NYC,35
tourism,1590,17,hotel,426
description,41,36,Has USB charging feature as well as arrival ti...,6
opening_hours,223,113,Tu-Sa 10:00-18:00,66
website,687,671,https://www.milesmcenery.com,4


## Dealing with duplicated values

In [16]:
duplicated_names = new_gdf['name'].duplicated()

# Filter the GeoDataFrame to show only the duplicated rows
duplicated_rows = new_gdf[duplicated_names]

# Print the duplicated rows
print(duplicated_rows['name'])

70                                Holiday Inn
88                              Fairfield Inn
97                                        NaN
98                    Four Points by Sheraton
125                       Holiday Inn Express
126                                   Hampton
127                         Hilton Garden Inn
172                                 Courtyard
180                               Hyatt Place
181                                   Hampton
182                       Holiday Inn Express
184                               Holiday Inn
185                             Fairfield Inn
193                         Hilton Garden Inn
214                         Hilton Garden Inn
241                 Saint Patrick’s Cathedral
244                    Fairfield Inn & Suites
245                   Four Points by Sheraton
248                         Hilton Garden Inn
250                                   Hampton
256                         Hilton Garden Inn
316                         Hilton

# Dropping 

In [17]:
unique_values = gdf['tourism'].unique()
print(unique_values)

['attraction' 'hotel' 'museum' 'yes' 'artwork' 'motel' 'zoo' 'picnic_site'
 'gallery' 'hostel' 'information' 'viewpoint' 'apartment' 'guest_house'
 'sightseeing' 'aquarium' 'camp_site']


In [18]:
hotels = new_gdf[new_gdf['tourism']== 'hotel']
print(hotels)

                                                   name tourism  \
3                                      Wellington Hotel   hotel   
4                               Washington Square Hotel   hotel   
5                                              The Jane   hotel   
7                                     Astor on the Park   hotel   
8                                       Park View Hotel   hotel   
17                                            The Plaza   hotel   
28                                          Holiday Inn   hotel   
29                              Four Points by Sheraton   hotel   
33                                Holiday Inn Manhattan   hotel   
34                                            Manhattan   hotel   
37                                Waldorf-Astoria Hotel   hotel   
45                                 The Hotel Edison NYC   hotel   
46                    InterContinental New York Barclay   hotel   
47                              Hilton New York Midtown   hote

In [19]:
hotels.count()

name             418
tourism          426
description        5
opening_hours      8
website          296
geometry         426
dtype: int64

In [20]:
print(hotels.isna().sum())

name               8
tourism            0
description      421
opening_hours    418
website          130
geometry           0
dtype: int64


In [21]:
information = new_gdf[new_gdf['tourism']== 'information']
print(information)

                                                   name      tourism  \
207            The Dairy (Visitor Center and Gift Shop)  information   
335                               Dana Discovery Center  information   
338                                         NYC Audubon  information   
367               Roosevelt Island Visitor Center Kiosk  information   
389                Times Square Museum & Visitor Center  information   
408                                      Visitor Center  information   
427                                                 NaN  information   
469                                                 NaN  information   
497            Statue of Liberty & Ellis Island Tickets  information   
517                                                 NaN  information   
535                                                 NaN  information   
539                                                 NaN  information   
542                      United Nations Visitors Centre  informa

In [22]:
information.count()

name             112
tourism          277
description        8
opening_hours      2
website            7
geometry         277
dtype: int64

In [23]:
information.isna().sum()

name             165
tourism            0
description      269
opening_hours    275
website          270
geometry           0
dtype: int64

In [24]:
new_gdf = new_gdf[new_gdf['tourism']!= 'hotel']

In [25]:
new_gdf = new_gdf[new_gdf['tourism']!= 'information']

In [26]:
motels = new_gdf[new_gdf['tourism']== 'motel']
print(motels)
motels.count()

                    name tourism description opening_hours  \
30               Super 8   motel         NaN           NaN   
199          Econo Lodge   motel         NaN           NaN   
328  Liberty View Suites   motel         NaN           NaN   
459       Westwood Motel   motel         NaN           NaN   

                                               website  \
30                                                 NaN   
199  https://www.choicehotels.com/new-york/new-york...   
328                      https://www.liberty-view.com/   
459                                                NaN   

                       geometry  
30   POINT (-96.55582 39.18118)  
199  POINT (-73.98810 40.76047)  
328  POINT (-74.01664 40.70766)  
459  POINT (-96.59129 39.17349)  


name             4
tourism          4
description      0
opening_hours    0
website          2
geometry         4
dtype: int64

In [27]:
motels.isna().sum()

name             0
tourism          0
description      4
opening_hours    4
website          2
geometry         0
dtype: int64

In [28]:
new_gdf = new_gdf[new_gdf['tourism']!= 'motel']

In [29]:
apartment = new_gdf[new_gdf['tourism']== 'apartment']
print(apartment)
apartment.count()

                              name    tourism description opening_hours  \
415                35 Hudson Yards  apartment         NaN           NaN   
477                            NaN  apartment         NaN           NaN   
620                  Stonehenge 57  apartment         NaN           NaN   
845                         WeLive  apartment         NaN           NaN   
875                          Ergie  apartment         NaN           NaN   
898              the Humphrey loft  apartment         NaN           NaN   
925                  Green’s house  apartment         NaN           NaN   
992                           Novo  apartment         NaN           NaN   
1066         78-80 Mulberry Street  apartment         NaN           NaN   
1084  Oakwood Furnished Apartments  apartment         NaN           NaN   

                                                website  \
415                                                 NaN   
477                                                 NaN 

name              9
tourism          10
description       0
opening_hours     0
website           2
geometry         10
dtype: int64

In [30]:
apartment.isna().sum()

name              1
tourism           0
description      10
opening_hours    10
website           8
geometry          0
dtype: int64

In [31]:
new_gdf = new_gdf[new_gdf['tourism']!= 'apartment']

In [32]:
guest_house = new_gdf[new_gdf['tourism']== 'guest_house']
print(guest_house)
guest_house.count()

                                    name      tourism description  \
625                           House ZAZA  guest_house         NaN   
796                   Lotus Accomodation  guest_house         NaN   
816      La Maison d'Art Private Gallery  guest_house         NaN   
892  Bancroft first floor resident rooms  guest_house         NaN   
905                   Saint Nicholas Inn  guest_house         NaN   

    opening_hours website                    geometry  
625           NaN     NaN  POINT (-73.94384 40.78612)  
796           NaN     NaN  POINT (-73.95119 40.82412)  
816           NaN     NaN  POINT (-73.94681 40.81430)  
892           NaN     NaN  POINT (-73.95977 40.81060)  
905           NaN     NaN  POINT (-73.94186 40.83046)  


name             5
tourism          5
description      0
opening_hours    0
website          0
geometry         5
dtype: int64

In [33]:
guest_house.isna().sum()

name             0
tourism          0
description      5
opening_hours    5
website          5
geometry         0
dtype: int64

In [34]:
new_gdf = new_gdf[new_gdf['tourism']!= 'guest_house']

In [35]:
guest_house = new_gdf[new_gdf['tourism']== 'hostel']
print(guest_house)
guest_house.count()

                                        name tourism  \
122                                Leo house  hostel   
249                             Equity Point  hostel   
303   Hostelling International New York City  hostel   
305                         Jazz on the Park  hostel   
396                           West Side YMCA  hostel   
438                                L-Hostels  hostel   
449                                      NaN  hostel   
559                         Central Park Inn  hostel   
664                         The Bowery House  hostel   
797             Chelsea international hostel  hostel   
798           Jazz on Columbus Circle Hostel  hostel   
903           American Dream Bed & Breakfast  hostel   
1022            International Student Center  hostel   
1035                  Manhattan Bowery Lodge  hostel   
1058                                     NaN  hostel   

                                            description opening_hours  \
122                           

name             13
tourism          15
description       1
opening_hours     0
website          11
geometry         15
dtype: int64

In [36]:
new_gdf = new_gdf[new_gdf['tourism']!= 'hostel']

In [37]:
information = new_gdf[new_gdf['tourism']== 'information']
print(information)
information.count()

Empty GeoDataFrame
Columns: [name, tourism, description, opening_hours, website, geometry]
Index: []


name             0
tourism          0
description      0
opening_hours    0
website          0
geometry         0
dtype: int64

In [38]:
picnic_site = new_gdf[new_gdf['tourism']== 'picnic_site']
print(picnic_site)
picnic_site.count()

                                        name      tourism  \
72    Picnic Area on Marcus Garvey Pool Deck  picnic_site   
371                                      NaN  picnic_site   
372                                      NaN  picnic_site   
373                                      NaN  picnic_site   
374                                      NaN  picnic_site   
375                                      NaN  picnic_site   
376                                      NaN  picnic_site   
377                                      NaN  picnic_site   
378                                      NaN  picnic_site   
414                                      NaN  picnic_site   
418                                      NaN  picnic_site   
426                                      NaN  picnic_site   
486                                      NaN  picnic_site   
510                            Sherman Creek  picnic_site   
594                                      NaN  picnic_site   
595                     

name              2
tourism          31
description       1
opening_hours     1
website           0
geometry         31
dtype: int64

In [39]:
picnic_site.isna().sum()

name             29
tourism           0
description      30
opening_hours    30
website          31
geometry          0
dtype: int64

In [40]:
yes = new_gdf[new_gdf['tourism']== 'yes']
print(yes)
yes.count()

                                                   name tourism description  \
11                                      Washington Mews     yes         NaN   
13                                    Saint Lukes Place     yes         NaN   
23                                    Chrysler Building     yes         NaN   
24                                  Daily News Building     yes         NaN   
31                       The Rink at Rockefeller Center     yes         NaN   
49                                       Met Life Tower     yes         NaN   
50                                        Alphabet City     yes         NaN   
52                                           Tudor City     yes         NaN   
97                                                  NaN     yes         NaN   
205                                         Trump Tower     yes         NaN   
217                                       Japan Society     yes         NaN   
219                                     Ford Foundat

name             25
tourism          26
description       0
opening_hours     4
website           8
geometry         26
dtype: int64

In [41]:
nan_count = new_gdf.isnull().sum()
print(nan_count)

name             196
tourism            0
description      826
opening_hours    640
website          484
geometry           0
dtype: int64


In [42]:
new_gdf.describe()

Unnamed: 0,name,tourism,description,opening_hours,website,geometry
count,657,853,27,213,369,853
unique,629,11,27,111,354,851
top,Miles McEnery Gallery,artwork,Hint: Saturdays 18:00-20:00 Pay-What-You-Wish ...,Tu-Sa 10:00-18:00,https://www.milesmcenery.com,POINT (-73.9775451 40.761612)
freq,4,304,1,66,4,2


# Null Values

In [43]:
nan_names= new_gdf[new_gdf['name'].isna()]
nan_info = nan_names[['name', 'tourism', 'description']]


In [44]:
pd.set_option('display.max_rows', None)

print(nan_info)

     name      tourism                                        description
97    NaN          yes                                                NaN
337   NaN       museum  Once home to the U.S. First Army, it became th...
339   NaN       museum                                                NaN
371   NaN  picnic_site                                                NaN
372   NaN  picnic_site                                                NaN
373   NaN  picnic_site                                                NaN
374   NaN  picnic_site                                                NaN
375   NaN  picnic_site                                                NaN
376   NaN  picnic_site                                                NaN
377   NaN  picnic_site                                                NaN
378   NaN  picnic_site                                                NaN
411   NaN      artwork                                                NaN
414   NaN  picnic_site                

In [45]:
new_gdf = new_gdf.dropna(subset=['name'])

In [46]:
nan_info.count()

name             0
tourism        196
description      2
dtype: int64

In [47]:
nan_count = new_gdf.isnull().sum()
print(nan_count)

name               0
tourism            0
description      632
opening_hours    446
website          289
geometry           0
dtype: int64


In [48]:
new_gdf['tourism'].unique()

array(['attraction', 'museum', 'yes', 'artwork', 'zoo', 'picnic_site',
       'gallery', 'viewpoint', 'sightseeing', 'aquarium', 'camp_site'],
      dtype=object)

## Checking for Unique Values

In [49]:
unique_values = gdf['opening_hours'].unique()
print(unique_values)

[nan '06:30-20:45' '10:00-17:00; Fr-Sa 10:00-21:00; We off'
 'Mo-Su 07:00-19:00' 'Mo-Su 07:00-23:00' 'Mo-Su 08:00-02:00'
 'Oct-Apr 08:30-00:00' 'Mo-Su 09:30-17:00'
 'Tu, We, Fr-Su 10:00-18:00; Th 10:00-21:00' '24/7'
 'Mo, We-Fr, Su 11:00-18:00; Th off; Sa 11:00-20:00'
 '"Temporarily closed"'
 'Sep-May Th-Su 13:00-17:00; Jun-Aug We-Su 13:00-17:00; PH off'
 '09:00-20:00' 'Tu-Su 11:00-18:00' '10:00-17:00'
 'Mo,Th 08:00-20:00; Tu,We 08:00-21:00; Fr 08:00-18:00; Sa 10:00-18:00; Su 10:00-17:00'
 'Mo-Su 10:00-17:00' 'Mo-Su 05:30-02:00' '09:30-17:00'
 '"closed for renovation, temporary located 945 Madison Ave"'
 'Mo, Tu, Sa, Su 11:00-17:45; Th 11:00-20:00; Fr 11:00-16:00'
 'Su-Fr 10:00-17:00; Sa 10:00-19:00' 'Mo-Su 10:00-18:00'
 'Th 13:00-16:00; Fr,Sa 11:00-16:00'
 'Mo-Th 09:00-20:00, Fr-Sa 09:00-21:00, Su 09:00-20:00'
 'Mo-Sa,Su 10:30-17:30; Fr 10:30-20:00'
 'Nov-Mar: 10:00-16:00; Apr-Oct: 10:00-17:00' 'We-Su 12:00-17:00'
 'Mo-Su 10:00-17:30' 'Mo-Su 09:00-01:00' 'We-Mo 11:00-18:00, Tu off'
 '

## Replacing Null Values

In [50]:
print(nan_info)

     name      tourism                                        description
97    NaN          yes                                                NaN
337   NaN       museum  Once home to the U.S. First Army, it became th...
339   NaN       museum                                                NaN
371   NaN  picnic_site                                                NaN
372   NaN  picnic_site                                                NaN
373   NaN  picnic_site                                                NaN
374   NaN  picnic_site                                                NaN
375   NaN  picnic_site                                                NaN
376   NaN  picnic_site                                                NaN
377   NaN  picnic_site                                                NaN
378   NaN  picnic_site                                                NaN
411   NaN      artwork                                                NaN
414   NaN  picnic_site                

            Museum

In [51]:
gen_desc =  "Explore the rich history and culture that Manhattan has to offer, showcasing various exhibits and artifacts from different eras."

In [52]:
mask = (new_gdf['tourism'] == 'museum') & (new_gdf['description'].isnull())

In [53]:
new_gdf.loc[mask, 'description'] = gen_desc

In [54]:
nan_count = new_gdf.isnull().sum()
print(nan_count)

name               0
tourism            0
description      543
opening_hours    446
website          289
geometry           0
dtype: int64


In [55]:
nan_names= new_gdf[new_gdf['description'].isna()]
nan_info = nan_names[['tourism', 'description']]


            Artwork

In [56]:
nan_artwork = new_gdf[new_gdf['tourism'] == 'artwork'].isna().sum()
print(nan_artwork)

name               0
tourism            0
description      193
opening_hours    197
website          124
geometry           0
dtype: int64


In [57]:
gen_artwork = "Explore a diverse collection of artwork showcasing various styles, themes, and mediums. "

In [58]:
mask = (new_gdf['tourism'] == 'artwork') & (new_gdf['description'].isnull())
new_gdf.loc[mask, 'description'] = gen_artwork

In [59]:
nan_count = new_gdf.isnull().sum()
print(nan_count)

name               0
tourism            0
description      350
opening_hours    446
website          289
geometry           0
dtype: int64


            Zoo

In [60]:
nan_zoo = new_gdf[new_gdf['tourism'] == 'zoo'].isna().sum()
print(nan_zoo)

name             0
tourism          0
description      3
opening_hours    1
website          1
geometry         0
dtype: int64


In [61]:
gen_zoo = "Discover a captivating world filled with fascinating creatures from across the globe right here in Manhattan!"

In [62]:
mask = (new_gdf['tourism'] == 'zoo') & (new_gdf['description'].isnull())
new_gdf.loc[mask, 'description'] = gen_zoo

In [63]:
nan_count = new_gdf.isnull().sum()
print(nan_count)

name               0
tourism            0
description      347
opening_hours    446
website          289
geometry           0
dtype: int64


                Gallery

In [64]:
print(new_gdf[new_gdf['tourism'] == 'gallery'].isna().sum())


name               0
tourism            0
description      178
opening_hours     46
website           31
geometry           0
dtype: int64


In [65]:
gen_gallery = "Explore the creativity and imagination of local and international artists through captivating exhibitions and thought-provoking displays"

In [66]:
mask = (new_gdf['tourism'] == 'gallery') & (new_gdf['description'].isnull())
new_gdf.loc[mask, 'description'] = gen_gallery

In [67]:
nan_count = new_gdf.isnull().sum()
print(nan_count)

name               0
tourism            0
description      169
opening_hours    446
website          289
geometry           0
dtype: int64


In [68]:
print(new_gdf)


                                                   name      tourism  \
0                                            The Dakota   attraction   
1                                                  MoMA   attraction   
2                             Saint Patrick’s Cathedral   attraction   
6                           New-York Historical Society       museum   
9                    Cathedral of Saint John the Divine   attraction   
10                       The Metropolitan Museum of Art       museum   
11                                      Washington Mews          yes   
12                                           Gay Street   attraction   
13                                    Saint Lukes Place          yes   
14                                        The Cloisters   attraction   
15                                        The High Line   attraction   
16                            Museum of Jewish Heritage       museum   
18                                Serpentine Structures      art

                Attraction
               

In [69]:
print(new_gdf[new_gdf['tourism'] == 'attraction'].isna().sum())


name               0
tourism            0
description      122
opening_hours    112
website           84
geometry           0
dtype: int64


In [70]:
gen_attraction = "Explore this fascinating attraction and discover its unique offerings. With various exhibits and activities, it promises an enriching experience for visitors of all ages."

In [71]:
mask = (new_gdf['tourism'] == 'attraction') & (new_gdf['description'].isnull())
new_gdf.loc[mask, 'description'] = gen_attraction

In [72]:
nan_count = new_gdf.isnull().sum()
print(nan_count)

name               0
tourism            0
description       47
opening_hours    446
website          289
geometry           0
dtype: int64


## List of all Attractions

In [73]:
name_feature = 'name'

In [74]:
name_feature_list = new_gdf[name_feature].tolist()


In [75]:
for names in name_feature_list:
    print(names)

The Dakota
MoMA
Saint Patrick’s Cathedral
New-York Historical Society
Cathedral of Saint John the Divine
The Metropolitan Museum of Art
Washington Mews
Gay Street
Saint Lukes Place
The Cloisters
The High Line
Museum of Jewish Heritage
Serpentine Structures
Bryant Park
Statue of Liberty
Empire State Building
The Blockhouse
Chrysler Building
Daily News Building
United Nations Headquarters
USS Intrepid
Beach Museum of Art
The Rink at Rockefeller Center
Sunset Zoo
Woolworth Building
Fort Jay
Lincoln Center for the Performing Arts
Ellis Island Immigration Museum
Tisch's Children Zoo
Columbus Circle
Museum of Arts and Design
Madison Square Garden
Union Pacific Train Depot
Met Life Tower
Alphabet City
SoHo
Tudor City
Flint Hills Discovery Center
New York City Police Memorial
Paley Center for Media
McGraw-Hill Waterfall Tunnel
Guggenheim Museum
Wavertree
Ambrose
Helen McAllister
Riley County Museum
Cleopatra's Needle
Picnic Area on Marcus Garvey Pool Deck
The Ukrainian Museum
Merchant's House 

In [76]:
new_gdf.count()

name             657
tourism          657
description      610
opening_hours    211
website          368
geometry         657
dtype: int64

In [77]:
new_gdf.dtypes

name               object
tourism            object
description        object
opening_hours      object
website            object
geometry         geometry
dtype: object

In [78]:
print(new_gdf['opening_hours'])

0                                                     NaN
1                                                     NaN
2                                             06:30-20:45
6                                                     NaN
9                                                     NaN
10                 10:00-17:00; Fr-Sa 10:00-21:00; We off
11                                                    NaN
12                                                    NaN
13                                                    NaN
14                                                    NaN
15                                      Mo-Su 07:00-19:00
16                                                    NaN
18                                                    NaN
19                                      Mo-Su 07:00-23:00
20                                                    NaN
21                                      Mo-Su 08:00-02:00
22                                                    NaN
23            

In [79]:
new_gdf['name'] = new_gdf['name'].astype(str)
new_gdf['tourism'] = new_gdf['tourism'].astype(str)
new_gdf['description'] = new_gdf['description'].astype(str)
new_gdf['opening_hours'] = new_gdf['opening_hours'].astype(str)
new_gdf['website'] = new_gdf['website'].astype(str)

In [80]:
new_gdf.dtypes

name               object
tourism            object
description        object
opening_hours      object
website            object
geometry         geometry
dtype: object

In [81]:
print (type(new_gdf['name'].iat[0]))

<class 'str'>


In [82]:
unique_identifier = range(1, len(new_gdf) + 1 )

In [83]:
new_gdf['unique_ID'] = unique_identifier

In [84]:
new_gdf.head()

Unnamed: 0,name,tourism,description,opening_hours,website,geometry,unique_ID
0,The Dakota,attraction,Explore this fascinating attraction and discov...,,,POINT (-73.97615 40.77678),1
1,MoMA,attraction,Explore this fascinating attraction and discov...,,,POINT (-73.97755 40.76161),2
2,Saint Patrick’s Cathedral,attraction,Explore this fascinating attraction and discov...,06:30-20:45,,POINT (-73.97618 40.75854),3
6,New-York Historical Society,museum,Explore the rich history and culture that Manh...,,https://www.nyhistory.org/,POINT (-73.97427 40.77929),4
9,Cathedral of Saint John the Divine,attraction,Explore this fascinating attraction and discov...,,https://www.stjohndivine.org,POINT (-73.96175 40.80377),5


In [None]:
BestTimeData = 

In [81]:
new_gdf.to_file("./UpdatedTourism_Data.geojson", driver="GeoJSON")
