In [1]:
import pandas as pd
import numpy as np
import re
import requests
import html
from zipfile import ZipFile
from bs4 import BeautifulSoup

In [43]:
zip_file = ZipFile('feeds.zip')
xml_files = [text_file.filename for text_file in zip_file.infolist() if text_file.filename.startswith('permart')]

In [67]:
def get_text(obj):
    return 'NA' if obj is None else obj.string

def get_img(xml_obj, art_num):
    images = xml_obj.find_all(re.compile('art' + str(art_num) + '_image'))
    if len(images) > 0:
        img_links = ['http://web.mta.info/mta/aft/images/permart/' + image.string for image in images]
        return ', '.join(img_links)
    else:
        return np.nan

In [68]:
df = []
for xml_file in xml_files:
    print('start parsing: ', xml_file)
    page = BeautifulSoup(zip_file.open(xml_file), 'lxml')
    stations = page.find_all('station')
    for station in stations:
        agency = get_text(station.find('abbr'))
        line = get_text(station.find('line'))
        sname = get_text(station.find('station_name'))
        art_num = 0
        while True:
            art_num += 1
            title = station.find('art' + str(art_num) + '_title')
            if title is None:
                break
            title = get_text(title)
            date = get_text(station.find('art' + str(art_num) + '_date'))
            fname = get_text(station.find('art' + str(art_num) + '_artist-first'))
            lname = get_text(station.find('art' + str(art_num) + '_artist-last'))
            mat = get_text(station.find('art' + str(art_num) + '_material'))
            des = get_text(station.find('art' + str(art_num) + '_description'))
            img = get_img(station, art_num)
            dataframe = {'art_title':title, 'artist_fname': fname, 'artist_lname': lname, 'date':date, 'material':mat, 
                         'description':des, 'agency':agency,'line':line,'station_name':sname, 'img':img}
            df.append(dataframe)
    print('complete parsing: ', xml_file)

start parsing:  permart-lirr-pennstation.xml
end parsing:  permart-lirr-pennstation.xml
start parsing:  permart-lirr-portjeffersonbranch.xml
end parsing:  permart-lirr-portjeffersonbranch.xml
start parsing:  permart-lirr-portwashingtonbranch.xml
end parsing:  permart-lirr-portwashingtonbranch.xml
start parsing:  permart-lirr-ronkonkomabranch.xml
end parsing:  permart-lirr-ronkonkomabranch.xml
start parsing:  permart-lirr-sbl.xml
end parsing:  permart-lirr-sbl.xml
start parsing:  permart-mnr-grandcentralterminal.xml
end parsing:  permart-mnr-grandcentralterminal.xml
start parsing:  permart-mnr-harlemline.xml
end parsing:  permart-mnr-harlemline.xml
start parsing:  permart-mnr-hudsonline.xml
end parsing:  permart-mnr-hudsonline.xml
start parsing:  permart-mnr-newhavenline.xml
end parsing:  permart-mnr-newhavenline.xml
start parsing:  permart-mnr-sbl.xml
end parsing:  permart-mnr-sbl.xml
start parsing:  permart-nyct-1.xml
end parsing:  permart-nyct-1.xml
start parsing:  permart-nyct-2.xml

In [69]:
arts = pd.DataFrame(df)
arts = arts.drop_duplicates()
arts['artist'] = arts.artist_fname + ' ' + arts.artist_lname

In [75]:
arts.info()
arts.head()
arts.to_csv('mta-arts.csv', index=False)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 235 entries, 0 to 535
Data columns (total 13 columns):
agency          235 non-null object
art_title       235 non-null object
artist_fname    234 non-null object
artist_lname    235 non-null object
date            235 non-null object
description     235 non-null object
img             235 non-null object
line            235 non-null object
material        235 non-null object
station_name    235 non-null object
artist          234 non-null object
wiki1           235 non-null object
wiki2           235 non-null object
dtypes: object(13)
memory usage: 25.7+ KB


Unnamed: 0,agency,art_title,artist_fname,artist_lname,date,description,img,line,material,station_name,artist,wiki1,wiki2
0,LIRR,Ghosts Series,Andrew,Leicester,1994,Andrew Leicester's {Ghost Series} consists of ...,http://web.mta.info/mta/aft/images/permart/Lei...,Penn Station,Terra cotta wall murals in five locations and ...,Penn Station,Andrew Leicester,https://en.wikipedia.org/wiki/Pennsylvania_Sta...,https://en.wikipedia.org/wiki/Pennsylvania_Sta...
1,LIRR,Eclipsed Time,Maya,Lin,1994,In {Eclipsed Time} Maya Lin tries to get commu...,http://web.mta.info/mta/aft/images/permart/Lin...,Penn Station,"Sand-blasted glass, aluminum, stainless steel,...",Penn Station,Maya Lin,https://en.wikipedia.org/wiki/Pennsylvania_Sta...,https://en.wikipedia.org/wiki/Pennsylvania_Sta...
2,LIRR,Overlook,Allan and Ellen,Wexler,2009,The new MTA Long Island Rail Road (LIRR) Atlan...,http://web.mta.info/mta/aft/images/permart/wex...,Babylon Branch,Granite,Atlantic Terminal,Allan and Ellen Wexler,https://en.wikipedia.org/wiki/Atlantic_Terminal,https://en.wikipedia.org/wiki/Atlantic_Terminal
3,LIRR,"Morning Transit, Hempstead Plain & Evening Tra...",Roy,Nicholson,2002,"In the {Morning Transit, Hempstead Plain & Eve...",http://web.mta.info/mta/aft/images/permart/Nic...,Port Jefferson Branch,Glass mosaics on waiting room walls,Hicksville,Roy Nicholson,https://en.wikipedia.org/wiki/Hicksville_station,https://en.wikipedia.org/wiki/Hicksville_station
4,LIRR,"For My Grandfather Noye Pride, a Locomotive En...",Joe,Zucker,1998,"At Huntington, Joe Zucker's 130-foot fantasy t...",http://web.mta.info/mta/aft/images/permart/Zuc...,Port Jefferson Branch,Faceted glass in windscreen,Huntington,Joe Zucker,https://en.wikipedia.org/wiki/Huntington_Stati...,https://en.wikipedia.org/wiki/Huntington_stati...


In [73]:
def wiki_link1(search_string):
    print('Begin looking for: ' + search_string + '...')
    page = requests.get("https://www.google.com/search?q=" + search_string + ' station')
    soup = BeautifulSoup(page.content, 'html.parser')
    links = [link.get('href') for link in soup.find_all('a')]
    wiki = links[[index for index, link in enumerate(links) if 'wikipedia' in link][0]]
    wiki = re.search('/url\?q=(.+?)&sa=U&', wiki).group(1)
    wiki = urllib.parse.unquote(wiki)
    print('Found wiki link for: ' + search_string + '...')
    return wiki

def wiki_link2(x):
    print('Begin looking for: ' + x['station_name'] + '...')
    page = requests.get("https://www.google.com/search?q=" + x['station_name'] + ' station ' + x['agency'])
    soup = BeautifulSoup(page.content, 'html.parser')
    links = [link.get('href') for link in soup.find_all('a')]
    wiki = links[[index for index, link in enumerate(links) if 'wikipedia' in link][0]]
    wiki = re.search('/url\?q=(.+?)&sa=U&', wiki).group(1)
    wiki = urllib.parse.unquote(wiki)
    print('Found wiki link for: ' + x['station_name'] + '...')
    return wiki

In [74]:
arts['wiki1'] = arts.station_name.apply(wiki_link1)
arts['wiki2'] = arts[['agency', 'station_name']].apply(wiki_link2, axis=1)

Begin looking for: Penn Station...
Found wiki link for: Penn Station...
Begin looking for: Penn Station...
Found wiki link for: Penn Station...
Begin looking for: Atlantic Terminal...
Found wiki link for: Atlantic Terminal...
Begin looking for: Hicksville...
Found wiki link for: Hicksville...
Begin looking for: Huntington...
Found wiki link for: Huntington...
Begin looking for: Broadway Station...
Found wiki link for: Broadway Station...
Begin looking for: Bayside Station...
Found wiki link for: Bayside Station...
Begin looking for: Great Neck...
Found wiki link for: Great Neck...
Begin looking for: Hicksville...
Found wiki link for: Hicksville...
Begin looking for: Ronkonkoma...
Found wiki link for: Ronkonkoma...
Begin looking for: Greenport...
Found wiki link for: Greenport...
Begin looking for: Grand Central Terminal...
Found wiki link for: Grand Central Terminal...
Begin looking for: Grand Central Terminal...
Found wiki link for: Grand Central Terminal...
Begin looking for: Grand C

Found wiki link for: 170th Street...
Begin looking for: 167th Street...
Found wiki link for: 167th Street...
Begin looking for: 161st Street-Yankee Stadium...
Found wiki link for: 161st Street-Yankee Stadium...
Begin looking for: 161st Street-Yankee Stadium...
Found wiki link for: 161st Street-Yankee Stadium...
Begin looking for: 125th Street...
Found wiki link for: 125th Street...
Begin looking for: 125th Street...
Found wiki link for: 125th Street...
Begin looking for: 86th Street...
Found wiki link for: 86th Street...
Begin looking for: 59th Street/Lexington Avenue-59th Street...
Found wiki link for: 59th Street/Lexington Avenue-59th Street...
Begin looking for: Grand Central-42nd Street...
Found wiki link for: Grand Central-42nd Street...
Begin looking for: Grand Central-42nd Street...
Found wiki link for: Grand Central-42nd Street...
Begin looking for: Grand Central-42nd Street...
Found wiki link for: Grand Central-42nd Street...
Begin looking for: 14th Street-Union Square...
Foun

Found wiki link for: Crescent Street...
Begin looking for: Norwood Avenue...
Found wiki link for: Norwood Avenue...
Begin looking for: Cleveland Street...
Found wiki link for: Cleveland Street...
Begin looking for: Van Siclen Avenue...
Found wiki link for: Van Siclen Avenue...
Begin looking for: Alabama Avenue...
Found wiki link for: Alabama Avenue...
Begin looking for: Chauncey Street...
Found wiki link for: Chauncey Street...
Begin looking for: Halsey Street...
Found wiki link for: Halsey Street...
Begin looking for: Gates Avenue...
Found wiki link for: Gates Avenue...
Begin looking for: Kosciuszko Street...
Found wiki link for: Kosciuszko Street...
Begin looking for: Myrtle Avenue...
Found wiki link for: Myrtle Avenue...
Begin looking for: Flushing Avenue...
Found wiki link for: Flushing Avenue...
Begin looking for: Lorimer Street...
Found wiki link for: Lorimer Street...
Begin looking for: Hewes Street...
Found wiki link for: Hewes Street...
Begin looking for: Marcy Avenue...
Found

Found wiki link for: Chambers Street/Park Place...
Begin looking for: South Ferry...
Found wiki link for: South Ferry...
Begin looking for: Nereid Avenue 238th Street...
Found wiki link for: Nereid Avenue 238th Street...
Begin looking for: 233rd Street...
Found wiki link for: 233rd Street...
Begin looking for: 225th Street...
Found wiki link for: 225th Street...
Begin looking for: 219th Street...
Found wiki link for: 219th Street...
Begin looking for: Gun Hill Road...
Found wiki link for: Gun Hill Road...
Begin looking for: Burke Avenue...
Found wiki link for: Burke Avenue...
Begin looking for: Allerton Avenue...
Found wiki link for: Allerton Avenue...
Begin looking for: Pelham Parkway...
Found wiki link for: Pelham Parkway...
Begin looking for: Bronx Park East...
Found wiki link for: Bronx Park East...
Begin looking for: West Farms Square-East Tremont Avenue...
Found wiki link for: West Farms Square-East Tremont Avenue...
Begin looking for: 174th Street...
Found wiki link for: 174th S

Found wiki link for: Utica Avenue...
Begin looking for: Broadway Junction...
Found wiki link for: Broadway Junction...
Begin looking for: Rockaway Park-Beach 116th Street...
Found wiki link for: Rockaway Park-Beach 116th Street...
Begin looking for: Tremont Avenue...
Found wiki link for: Tremont Avenue...
Begin looking for: Cathedral Parkway (110th Street)...
Found wiki link for: Cathedral Parkway (110th Street)...
Begin looking for: 81st Street-Museum of Natural History...
Found wiki link for: 81st Street-Museum of Natural History...
Begin looking for: 34th Street-Herald Square...
Found wiki link for: 34th Street-Herald Square...
Begin looking for: 34th Street-Herald Square...
Found wiki link for: 34th Street-Herald Square...
Begin looking for: 34th Street-Herald Square...
Found wiki link for: 34th Street-Herald Square...
Begin looking for: 34th Street-Herald Square...
Found wiki link for: 34th Street-Herald Square...
Begin looking for: DeKalb Avenue...
Found wiki link for: DeKalb Ave

Found wiki link for: 116th Street-Columbia University...
Begin looking for: 86th Street...
Found wiki link for: 86th Street...
Begin looking for: 72nd Street...
Found wiki link for: 72nd Street...
Begin looking for: 66th Street-Lincoln Center...
Found wiki link for: 66th Street-Lincoln Center...
Begin looking for: 59th Street-Columbus Circle...
Found wiki link for: 59th Street-Columbus Circle...
Begin looking for: 50th Street...
Found wiki link for: 50th Street...
Begin looking for: Times Square-46th Street...
Found wiki link for: Times Square-46th Street...
Begin looking for: Times Square-42nd Street...
Found wiki link for: Times Square-42nd Street...
Begin looking for: Times Square-42nd Street...
Found wiki link for: Times Square-42nd Street...
Begin looking for: Times Square-42nd Street...
Found wiki link for: Times Square-42nd Street...
Begin looking for: Times Square-42nd Street...
Found wiki link for: Times Square-42nd Street...
Begin looking for: Times Square-42nd Street...
Foun

Found wiki link for: Jackson Heights-Roosevelt Avenue/74th Street - Broadway...
Begin looking for: Woodside-61st Street...
Found wiki link for: Woodside-61st Street...
Begin looking for: Woodside-61st Street...
Found wiki link for: Woodside-61st Street...
Begin looking for: 46 Street-Bliss Street...
Found wiki link for: 46 Street-Bliss Street...
Begin looking for: 40 Street-Lowery Street...
Found wiki link for: 40 Street-Lowery Street...
Begin looking for: 33 Street-Rawson Street...
Found wiki link for: 33 Street-Rawson Street...
Begin looking for: Queensboro Plaza...
Found wiki link for: Queensboro Plaza...
Begin looking for: 42nd Street-Bryant Park/Fifth Avenue...
Found wiki link for: 42nd Street-Bryant Park/Fifth Avenue...
Begin looking for: Inwood-207th Street...
Found wiki link for: Inwood-207th Street...
Begin looking for: 42nd Street-Port Authority Bus Terminal...
Found wiki link for: 42nd Street-Port Authority Bus Terminal...
Begin looking for: 34th Street-Penn Station...
Found

Found wiki link for: Whitehall Street...
Begin looking for: Park Place...
Found wiki link for: Park Place...
Begin looking for: 23rd Street-Ely Avenue/Long Island City - Court Square...
Found wiki link for: 23rd Street-Ely Avenue/Long Island City - Court Square...
Begin looking for: 23rd Street-Ely Avenue/Long Island City - Court Square...
Found wiki link for: 23rd Street-Ely Avenue/Long Island City - Court Square...
Begin looking for: 42nd Street-Byrant Park/Fifth Avenue...
Found wiki link for: 42nd Street-Byrant Park/Fifth Avenue...
Begin looking for: Norwood Avenue...
Found wiki link for: Norwood Avenue...
Begin looking for: Brooklyn Battery Tunnel...
Found wiki link for: Brooklyn Battery Tunnel...
Begin looking for: Valley Stream...
Found wiki link for: Valley Stream...
Begin looking for: Merrick...
Found wiki link for: Merrick...
Begin looking for: Bellmore...
Found wiki link for: Bellmore...
Begin looking for: Seaford...
Found wiki link for: Seaford...
Begin looking for: Hempstea

In [76]:
def lat_lon(x):
    print('Searching lat/lon for: ' + x['station_name'])
    page = requests.get(x['wiki2'])
    soup = BeautifulSoup(page.content, 'html.parser')
    geo = soup.find('span', class_='geo-dec')
    if geo is None:
        page = requests.get(x['wiki1'])
        soup = BeautifulSoup(page.content, 'html.parser')
        geo = soup.find('span', class_='geo-dec')
        if geo is None:
            lat, lon = np.nan, np.nan
            return lat, lon
    geo = geo.text
    lat = float(geo[:geo.find('N')-1])
    lon = geo[geo.find(' '):].strip()
    lon = -float(lon[:lon.find('W')-1])
    return lat, lon

In [77]:
latlon = arts[['station_name', 'wiki1', 'wiki2']].apply(lat_lon, axis=1)
arts[['lat', 'lon']] = latlon.apply(pd.Series)

Searching lat/lon for: Penn Station
Searching lat/lon for: Penn Station
Searching lat/lon for: Atlantic Terminal
Searching lat/lon for: Hicksville
Searching lat/lon for: Huntington
Searching lat/lon for: Broadway Station
Searching lat/lon for: Bayside Station
Searching lat/lon for: Great Neck
Searching lat/lon for: Hicksville
Searching lat/lon for: Ronkonkoma
Searching lat/lon for: Greenport
Searching lat/lon for: Grand Central Terminal
Searching lat/lon for: Grand Central Terminal
Searching lat/lon for: Grand Central Terminal
Searching lat/lon for: Harlem - 125 Street
Searching lat/lon for: Harlem - 125 Street
Searching lat/lon for: Mount Vernon West
Searching lat/lon for: Fleetwood
Searching lat/lon for: Tuckahoe
Searching lat/lon for: Crestwood
Searching lat/lon for: Scarsdale
Searching lat/lon for: Hartsdale
Searching lat/lon for: North White Plains
Searching lat/lon for: Pleasantville
Searching lat/lon for: Wassaic
Searching lat/lon for: Yankees-E. 153rd Street
Searching lat/lon f

Searching lat/lon for: Crescent Street
Searching lat/lon for: Norwood Avenue
Searching lat/lon for: Cleveland Street
Searching lat/lon for: Van Siclen Avenue
Searching lat/lon for: Alabama Avenue
Searching lat/lon for: Chauncey Street
Searching lat/lon for: Halsey Street
Searching lat/lon for: Gates Avenue
Searching lat/lon for: Kosciuszko Street
Searching lat/lon for: Myrtle Avenue
Searching lat/lon for: Flushing Avenue
Searching lat/lon for: Lorimer Street
Searching lat/lon for: Hewes Street
Searching lat/lon for: Marcy Avenue
Searching lat/lon for: E. 105th Street
Searching lat/lon for: New Lots Avenue
Searching lat/lon for: Livonia Avenue
Searching lat/lon for: Sutter Avenue
Searching lat/lon for: Broadway Junction
Searching lat/lon for: Myrtle-Wyckoff Avenues
Searching lat/lon for: 14th Street-Union Square
Searching lat/lon for: 14th Street/Eight Avenue
Searching lat/lon for: Union Street
Searching lat/lon for: Queensboro Plaza
Searching lat/lon for: Fifth Avenue/59th Street
Searc

In [78]:
arts.loc[arts.station_name=='Franklin Avenue', ['lat', 'lon']] = 40.681126, -73.955712

In [145]:
artists_xml = [text_file for text_file in zip_file.infolist() if text_file.filename.startswith('artist')][0]
soup = BeautifulSoup(zip_file.open(artists_xml), 'lxml')

In [146]:
all_permarts = [permart.string for permart in soup.find_all('permart')]
all_artists = [(permart.parent.first.string, permart.parent.last.string) for permart in soup.find_all('permart')]
all_fnames, all_lnames = [artist[0] for artist in all_artists], [artist[1] for artist in all_artists]
all_permarts = [permart.split(':') for permart in all_permarts]
all_permarts = ['http://web.mta.info/mta/aft/permanentart/permart.html?agency=' + permart[0] + '&line=' + permart[1] 
                + '&artist=' + permart[2] + '&station=' + permart[3] if len(permart) == 4 else np.nan
                for permart in all_permarts]

In [147]:
len(all_permarts)

202

In [137]:
titles = []
count = 0
for permarts in all_permarts:
    if type(permarts) == str:
        print('looking for title for: '+ str(count) + ' ' + permarts)
        page = requests.get(permarts)
        soup = BeautifulSoup(page.content, 'html.parser')
        title = soup.find('p', {'class':'title'})
        if title is None:
            title = np.nan
        else:
            title = title.string
    else:
        title = np.nan
    titles.append(title)
    count += 1

looking for title for: 0 http://web.mta.info/mta/aft/permanentart/permart.html?agency=lirr&line=babylonbranch&artist=1&station=2
looking for title for: 1 http://web.mta.info/mta/aft/permanentart/permart.html?agency=nyct&line=D&artist=1&station=2
looking for title for: 2 http://web.mta.info/mta/aft/permanentart/permart.html?agency=nyct&line=Q&artist=1&station=16
looking for title for: 3 http://web.mta.info/mta/aft/permanentart/permart.html?agency=lirr&line=ronkonkomabranch&artist=1&station=3
looking for title for: 4 http://web.mta.info/mta/aft/permanentart/permart.html?agency=mnr&line=harlemline&artist=2&station=1
looking for title for: 5 http://web.mta.info/mta/aft/permanentart/permart.html?agency=nyct&line=5&artist=1&station=9
looking for title for: 6 http://web.mta.info/mta/aft/permanentart/permart.html?agency=nyct&line=5&artist=1&station=13
looking for title for: 7 http://web.mta.info/mta/aft/permanentart/permart.html?agency=nyct&line=Q&artist=1&station=13
looking for title for: 8 h

looking for title for: 70 http://web.mta.info/mta/aft/permanentart/permart.html?agency=nyct&line=6&artist=1&station=13
looking for title for: 71 http://web.mta.info/mta/aft/permanentart/permart.html?agency=nyct&line=Q&artist=1&station=15
looking for title for: 72 http://web.mta.info/mta/aft/permanentart/permart.html?agency=nyct&line=7&artist=2&station=3
looking for title for: 73 http://web.mta.info/mta/aft/permanentart/permart.html?agency=nyct&line=J&artist=1&station=20
looking for title for: 74 http://web.mta.info/mta/aft/permanentart/permart.html?agency=nyct&line=L&artist=1&station=6
looking for title for: 75 http://web.mta.info/mta/aft/permanentart/permart.html?agency=nyct&line=J&artist=1&station=1
looking for title for: 76 http://web.mta.info/mta/aft/permanentart/permart.html?agency=nyct&line=A&artist=1&station=9
looking for title for: 77 http://web.mta.info/mta/aft/permanentart/permart.html?agency=nyct&line=W&artist=1&station=15
looking for title for: 78 http://web.mta.info/mta/af

looking for title for: 139 http://web.mta.info/mta/aft/permanentart/permart.html?agency=nyct&line=L&artist=1&station=4
looking for title for: 140 http://web.mta.info/mta/aft/permanentart/permart.html?agency=nyct&line=Q&artist=1&station=11
looking for title for: 141 http://web.mta.info/mta/aft/permanentart/permart.html?agency=mnr&line=harlemline&artist=1&station=7
looking for title for: 142 http://web.mta.info/mta/aft/permanentart/permart.html?agency=nyct&line=V&artist=2&station=4
looking for title for: 143 http://web.mta.info/mta/aft/permanentart/permart.html?agency=mnr&line=hudsonline&artist=1&station=2
looking for title for: 144 http://web.mta.info/mta/aft/permanentart/permart.html?agency=nyct&line=5&artist=1&station=17
looking for title for: 145 http://web.mta.info/mta/aft/permanentart/permart.html?agency=nyct&line=4&artist=1&station=6
looking for title for: 146 http://web.mta.info/mta/aft/permanentart/permart.html?agency=nyct&line=L&artist=1&station=9
looking for title for: 147 htt

In [159]:
arts_links = pd.DataFrame({'artist_fname':all_fnames, 'artist_lname':all_lnames, 'art_title_year':titles, 'link':all_permarts})

In [235]:
arts_links = arts_links.fillna('NA')

In [211]:
def find_link(x):
    print(x)
    for art_title in arts_links.art_title_year.tolist():
        if pd.isnull(art_title):
            continue
        else:
            match = re.search(re.escape(x), art_title)
            if match is not None:
                return arts_links[arts_links.art_title_year == art_title].link.values[0]
    return np.nan


In [212]:
a = temp.art_title.apply(find_link)

Ghosts Series
Eclipsed Time
Overlook
Morning Transit, Hempstead Plain & Evening Transit, Hempstead Plain
For My Grandfather Noye Pride, a Locomotive Engineer
Celadon Remnants
Bayside Story
Conductor's Watch and Key Chain
Morning Transit, Hempstead Plain & Evening Transit, Hempstead Plain,
Planting (dedicated to Long Island Tree Farmers)
Illuminated Station
As Above, So Below
A Field of Wild Flowers
Sirshasana
Hear the Lone Whistle Moan
Harlem Encore
Travelin' Time
Time Catcher
The Finder / The Seekers
The Discovery
Travelers
Workers
At the Table
Almost Home
Arrival
The Home of the Stars
Rising and Setting
Muhheakantuck (The River that Flows Two Ways)
Floating Auriculas
North, South and Home
Three Statues (A Short History of the Lower Hudson Valley)
Tranquility
The Four Seasons
Railroads and Rooftops
Magic Realism in Kingsbridge
Flight
Primavera
Fossils
Railrider's Throne
Westside Views
Laced Canopy
Artemis, Acrobats, Divas and Dancers
Whirls and twirls (MTA)
Alice: The Way Out
Times Sq

In [217]:
arts['mta_link'] = a

In [5]:
arts.mta_link.isna().sum()
missing = arts[arts.mta_link.isna()]

74

In [38]:
def find_link2(x):
    if pd.isnull(x.artist_fname):
        artist = x.artist_lname
    if pd.isnull(x.artist_lname):
        artist = x.artist_fname
    if not pd.isnull(x.artist_fname) and not pd.isnull(x.artist_lname):
        artist = x.artist_fname + ' ' + x.artist_lname
    print('looking for mta link: ' + artist)    
    link = soup.find(text=artist)
    if link is None:
        link = soup.find(text=x.artist_fname)
    if link is None:
        link = soup.find(text=x.artist_lname)
    if link is None:
        return np.nan
    link = link.parent.parent.find('a', text='Percent for Art')
    return 'http://web.mta.info/mta/aft' + link['href'][2:]

In [42]:
temp = missing[['artist_fname', 'artist_lname']].apply(find_link2, axis=1)

looking for mta link: Roy Nicholson
looking for mta link: Roy Nicholson
looking for mta link: Alice Adams
looking for mta link: Anita Thacher
looking for mta link: Martha Jackson-Jarvis
looking for mta link: Arthur Gonzalez
looking for mta link: Arthur Gonzalez
looking for mta link: Jane Greengold and Kane Chanh Do
looking for mta link: Robert Taplin
looking for mta link: Tova Snyder
looking for mta link: Nitza Tufiño
looking for mta link: Nancy Spero
looking for mta link: Liliana Porter
looking for mta link: Jane Dickson
looking for mta link: Deborah Brown
looking for mta link: Doug & Mike Starn
looking for mta link: George Crespo
looking for mta link: José Ortega
looking for mta link: Nancy Holt
looking for mta link: Harry Roseman
looking for mta link: Jane Greengold
looking for mta link: Arts for Transit Collaborative
looking for mta link: Louis Delsarte
looking for mta link: Muriel Castanis
looking for mta link: Hugo Consuegra
looking for mta link: Houston Conwill
looking for mta l

In [43]:
arts.loc[arts.mta_link.isna(), 'mta_link'] = a

In [49]:
arts.mta_link.isna().sum()

1

In [48]:
arts.loc[arts.art_title == 'Westside Views', 'mta_link'] = 'http://web.mta.info/mta/aft/permanentart/permart.html?agency=nyct&line=1&artist=1&station=7'
arts.loc[arts.art_title == 'For Want of a Nail', 'mta_link'] = 'http://web.mta.info/mta/aft/permanentart/permart.html?agency=nyct&line=C&artist=1&station=4'
arts.loc[arts.art_title == 'New York City Architectural Artifacts from the Collection of the Brooklyn Museum', 'mta_link'] = 'http://web.mta.info/mta/aft/permanentart/permart.html?agency=nyct&line=3&artist=1&station=19'
arts.loc[arts.artist_lname == 'Atlantic Terra Cotta Company', 'mta_link'] = 'http://web.mta.info/mta/aft/permanentart/permart.html?agency=nyct&line=Q&artist=2&station=23'

In [51]:
arts.to_csv('mta-arts.csv', index=False)
arts_links.to_csv('arts_links.csv', index=False)