In [20]:
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import pandas_geojson as pdg

In [2]:
buildings_df = pd.read_csv('data/buildings.csv', dtype=object, keep_default_na=False)

In [3]:
buildings_df

Unnamed: 0,Name,Category,ImageURL,Address,Latitude,Longitude,Notes,AKA
0,104 West! (Kosher Dining Hall),Co-ops,,,42.44431073,-76.4878787,Dining (www.campuslife.cornell.edu/campuslife/...,
1,112 Edgemoor,Co-ops,http://images.localist.com/images/main/busines...,,42.44409097,-76.48841178,Housing,
2,120 Maple Ave.,Near Campus,,,42.441336,-76.47553,CIT,
3,301 College Ave.,Collegetown,http://images.localist.com/images/main/busines...,,42.44033831,-76.48499927,,
4,309 College Ave.,Collegetown,,,42.44081657,-76.48497643,The Nines,
...,...,...,...,...,...,...,...,...
436,Wilson Synchrotron Lab & Ring,Engineering,http://images.localist.com/photo/000/017/084/h...,"161 Synchrotron Dr, Ithaca, NY 14850",42.445011,-76.472976,Wilson Laboratory,
437,Wing Hall,East Campus,http://images.localist.com/images/main/busines...,"123 Wing Dr, Ithaca, NY 14850",42.446633,-76.471568,,
438,Winter Garden,Plantations,http://images.localist.com/photo/000/016/662/h...,,42.44910556,-76.47141389,Plantations,
439,Zeta Psi,Greek,,"534 Thurston Ave,. Ithaca, NY 14850",42.454121,-76.481827,Greek: IFC,


In [4]:
buildings_url = "https://en.wikipedia.org/wiki/List_of_Cornell_University_buildings#cite_note-1"
buildings_result = requests.get(buildings_url)
print(buildings_result)

<Response [200]>


In [5]:
with open("buildings.html", "w") as writer:
  writer.write(buildings_result.text)

with open("buildings.html", "r") as reader:
  html_source = reader.read()

page = BeautifulSoup(html_source, "html.parser")

In [6]:
rows = page.findChildren('tr')

In [7]:
names = []
images = []
architects = []
years = []
locations = []
notes = []

for row in rows[1:-30]:
    content = row.findChildren(['td'])

    if (content != []):
        # name
        if (content[0].findChildren('a') != []):
            name = content[0].findChildren('a')[0].string
            names.append(name)
            # print(name)
        else:
            name = content[0].string.strip('\n')
            names.append(name)
            # print(name)

        # image
        if (content[1].findChildren('img') != []):
            image = content[1].findChildren('img')[0]['srcset']\
                .split(',')[-1].split(' ')[1]\
                .strip('//').split('/300px')[0]\
                .replace('thumb/', '')
            images.append(image)
            # print(image)
        else:
            image = ''
            images.append(image)
            # print(image)

        # architect
        if (content[2].findChildren('a') != []):
            architect = content[2].findChildren('a')[0].string
            architects.append(architect)
            # print(architect)
        else:
            architect = content[2].string.strip('\n')
            architects.append(architect)
            # print(architect) 

        # years
        if (content[3].string != None):
            year = content[3].string.strip('\n')
            years.append(year)
            # print(year)
        else:
            year = ''
            years.append(year)
            # print(year)

        # location 
        if (content[4].string != None):
            location = content[4].string.strip('\n')
            locations.append(location)
            # print(location)
        else:
            location = ''
            locations.append(location)
            # print(location)

        # notes
        if (content[5].string != None):
            note = content[5].string.strip('\n')
            notes.append(note)
            # print(note)
        else:
            note = ''
            notes.append(note)
            # print(note)

In [8]:
wiki_buildings_df = pd.DataFrame(data = {"name": names,
                                         "image": images,
                                         "architect": architects,
                                         "year": years,
                                         "location": locations,
                                         "notes" : notes})

In [9]:
wiki_buildings_df

Unnamed: 0,name,image,architect,year,location,notes
0,Andrew Dickson White House,upload.wikimedia.org/wikipedia/commons/2/27/Co...,William Henry Miller,1874,Central Campus,Begun by Miller and finished by Babcock. Built...
1,Anabel Taylor Hall,upload.wikimedia.org/wikipedia/commons/3/34/An...,"Starrett, Van Vleck & Marsh",1953,548 College Avenue,Inside is a memorial to alumni killed in World...
2,Atkinson Hall,upload.wikimedia.org/wikipedia/commons/2/24/At...,Lake Flato Architects,Summer 2024 (expected),Tower Road,
3,Bailey Hall,upload.wikimedia.org/wikipedia/commons/0/05/Co...,Green and Wicks,1912,Central Campus,
4,Baker Laboratory,upload.wikimedia.org/wikipedia/commons/e/ef/Co...,Arthur N. Gibb (B.Arch 1890),1921,Central Campus,
...,...,...,...,...,...,...
165,Ganędagǫ: Hall,upload.wikimedia.org/wikipedia/commons/0/09/Ga...,ikon.5 Architects,,121 Triphammer Road,
166,Toni Morrison,upload.wikimedia.org/wikipedia/commons/7/70/To...,ikon.5 Architects,,18 Sisson Place,
167,Barbara McClintock,upload.wikimedia.org/wikipedia/commons/c/cd/Ba...,ikon.5 Architects,2022,224 Cradit Farm Dr,"103,835 sq ft; 311 beds"
168,Hu Shih,upload.wikimedia.org/wikipedia/commons/9/99/Hu...,ikon.5 Architects,2022,141 Program House Dr,"138,773 sq ft; 429 beds"


In [10]:
wiki_buildings_df.to_csv("data/buildings.csv")

In [26]:
df_a = pdg.read_geojson("data/buildings-geojson.json").to_dataframe()

In [28]:
df_a.to_csv("data/temp.csv")

In [31]:
for i, row in df_a.sort_values("properties.Name").iterrows():
    print(row["properties.Name"], "\t", row["geometry.coordinates"])

104 West! (Kosher Dining Hall) 	 [-76.4878787, 42.44431073]
112 Edgemoor 	 [-76.48841178, 42.44409097]
120 Maple Ave. 	 [-76.47553, 42.441336]
301 College Ave. 	 [-76.48499927, 42.44033831]
309 College Ave. 	 [-76.48497643, 42.44081657]
312 College Ave. 	 [-76.48533034, 42.44089596]
337 Pine Tree Road 	 [-76.46270764, 42.43792222]
341 Pine Tree Road 	 [-76.46308945, 42.43793791]
353 Pine Tree Road 	 [-76.4621544, 42.43813181]
373 Pine Tree Road 	 [-76.46209222, 42.43765919]
377 Pine Tree Road 	 [-76.4631494, 42.4376323]
391 Pine Tree Road 	 [-76.46531944, 42.43904444]
395 Pine Tree Road 	 [-76.4631946, 42.43896308]
4-H Acres 	 [-76.41905012, 42.47316663]
409 College Ave. 	 [-76.484966, 42.44211389]
726 University Ave. 	 [-76.48878611, 42.44956111]
A. D. White Gardens 	 [-76.48188889, 42.44780278]
A. D. White House 	 [-76.48199256, 42.4482754]
AAP Downtown Facility 	 [-76.51018611, 42.44296111]
AAP New York City Center 	 [-73.99462376, 40.73868611]
AAP in Rome 	 [12.47551389, 41.8951666