this script will create a pandas dataframe (like a table)

pull data from the detroit mural website, one page at a time(20 murals/page) and add to the dataframe

then export to CSV

In [1]:
from bs4 import BeautifulSoup
import requests
import re
import pandas as pd
import numpy as np

In [2]:
#the website we are collecting data from 
page_link = "https://detroitmopa.org/artworks/tag/Murals"
#structure of the database
df =pd.DataFrame(columns = ['muralName', 'artistName', 'address', 'year','imgLink'])


In [7]:
def MuralScrape(page_link, df):
    #function that takes a link and dataframe -like an excel spreadsheet/table -
    #and outputs a dataframe
    page_response = requests.get(page_link, timeout=5)
    #beautiful soup is a useful tool for reading and parsing html
    page_content = BeautifulSoup(page_response.content, "html.parser")
    #finds all html blocks with a certain class name
    chain = [x.find_all('img') for x in page_content.find_all(class_="BlogList-item-image")]
    
    #initialize list to store data
    imagelist = []
    muralnamelist= []
    
    for i in range(len(chain)):
        string = chain[i][0]
        #use regex to get only relevant info
        result = re.search('(?:)data-image="([^"]+)', string.prettify())
        if result:
            imagelist.append(result.group(1))
        else:
            imagelist.append("")
        muralname = re.search('(?<=img alt=")(.*)(?=" data-image=")', string.prettify())
        if muralname:
            muralnamelist.append(muralname.group(1))
        else:
            muralnamelist.append("")
    
    #check to make sure data is added to list 
    #print(muralnamelist)
    #print(imagelist)
    excerpt = [x.find_all('p') for x in page_content.find_all(class_ = "BlogList-item-excerpt")]
    
    addresslist = []
    artistnamelist= []
    yearlist=[]
    
    for i in range(len(excerpt)):
        string = excerpt[i][0]
    
        name = re.search('(?<=Artist:)([\s\S]*)(?=Year)', string.text)
        if name:
            artistnamelist.append(name.group(1).replace(u'\xa0', u' ').strip())
        else:
            artistnamelist.append("")
        
        year = re.search('(?<=Year: )(.*)(?=Address)', string.text.replace(u'\xa0', u' '))
        if year:
            yearlist.append(year.group(1))
        else:
            yearlist.append("")
        address = re.search('(?<=Address: )([\S\s]*)', string.text.replace(u'\xa0', u' '))
        if address:
        
            addresslist.append(address.group(1))
        else:
            addresslist.append("")
    
    #adds data from lists to dataframe
    for i in range(len(addresslist)):
        newlist = [muralnamelist[i],artistnamelist[i],addresslist[i], yearlist[i], imagelist[i]]
        df = df.append(pd.Series(newlist,index=['muralName', 'artistName', 'address', 'year','imgLink']), ignore_index=True)
    return df
    

In [8]:
#call function
df = MuralScrape(page_link, df)

In [9]:
#show first 5 entries in dataframe
df.head()

Unnamed: 0,muralName,artistName,address,year,imgLink
0,DETROIT MURAL #0674,Pat Perry,"2605 Newark Street, Detroit, MI",2018,https://static1.squarespace.com/static/5ab0771...
1,DETROIT MURAL #0694,Tony Whlgn,"19031 Grand River Ave, Detroit, MI",2018,https://static1.squarespace.com/static/5ab0771...
2,DETROIT MURAL #0693,Ledania,"2605 Newark Street, Detroit, MI",2018,https://static1.squarespace.com/static/5ab0771...
3,DETROIT MURAL #0690,FFTY,"8801 Oakland Avenue, Detroit, MI",2018,https://static1.squarespace.com/static/5ab0771...
4,DETROIT MURAL #0689,Laura Finlay,"4126 Third Avenue, Detroit, MI",2018,https://static1.squarespace.com/static/5ab0771...


In [10]:
# scrape next page 
page_link = "https://detroitmopa.org/artworks?offset=1528831925675&tag=Murals"
df = df.append(MuralScrape(page_link, df), ignore_index=True)
df.head()

Unnamed: 0,muralName,artistName,address,year,imgLink
0,DETROIT MURAL #0674,Pat Perry,"2605 Newark Street, Detroit, MI",2018,https://static1.squarespace.com/static/5ab0771...
1,DETROIT MURAL #0694,Tony Whlgn,"19031 Grand River Ave, Detroit, MI",2018,https://static1.squarespace.com/static/5ab0771...
2,DETROIT MURAL #0693,Ledania,"2605 Newark Street, Detroit, MI",2018,https://static1.squarespace.com/static/5ab0771...
3,DETROIT MURAL #0690,FFTY,"8801 Oakland Avenue, Detroit, MI",2018,https://static1.squarespace.com/static/5ab0771...
4,DETROIT MURAL #0689,Laura Finlay,"4126 Third Avenue, Detroit, MI",2018,https://static1.squarespace.com/static/5ab0771...


In [11]:
#drop duplicates and reset index (index may be used as primary key in database)
df = df.drop_duplicates().reset_index(drop=True)

In [12]:
#next page
page_link = "https://detroitmopa.org/artworks?offset=1509132420361&tag=Murals"
df = df.append(MuralScrape(page_link, df), ignore_index=True)
df= df.drop_duplicates().reset_index(drop=True)
df.tail()

Unnamed: 0,muralName,artistName,address,year,imgLink
55,DETROIT MURAL #0281,Julian Spradlin,"9131 Oakland Ave, Detroit, MI",2017,https://static1.squarespace.com/static/5ab0771...
56,DETROIT MURAL #0218,Rashaun Rucker,"2200 Hunt St, Detroit, MI",2017,https://static1.squarespace.com/static/5ab0771...
57,DETROIT MURAL #0081,Unknown,"The Dequindre Cut Greenway, Detroit, MI",2017,https://static1.squarespace.com/static/5ab0771...
58,DETROIT MURAL #0075,Michael Olszewski,"4756 Grand River, Detroit",2017,https://static1.squarespace.com/static/5ab0771...
59,DETROIT MURAL #0071,WC Bevan,"6331 Third Street, Detroit, MI",2017,https://static1.squarespace.com/static/5ab0771...


In [13]:
page_link = "https://detroitmopa.org/artworks/tag/Eastern+Market+Murals"
df = df.append(MuralScrape(page_link, df), ignore_index=True)
df.tail()

Unnamed: 0,muralName,artistName,address,year,imgLink
135,DETROIT MURAL #0271,Shaini Kasztelan & Heidi Barlow,"1807 Alfred St, Detroit, MI",2017,https://static1.squarespace.com/static/5ab0771...
136,DETROIT MURAL #0273,HOXXOH & Brian Lacey,"1807 Alfred St, Detroit, MI (alley)",2017,https://static1.squarespace.com/static/5ab0771...
137,DETROIT MURAL #0269,HOXXOH,"1544 Mack Ave, Detroit, MI",2017,https://static1.squarespace.com/static/5ab0771...
138,DETROIT MURAL #0270,Sydney G. James & Askew,"1461 Erskine St, Detroit, MI",2017,https://static1.squarespace.com/static/5ab0771...
139,DETROIT MURAL #0266,Sydney G. James & Askew,"1461 Erskine St, Detroit, MI",2017,https://static1.squarespace.com/static/5ab0771...


In [14]:
df = df.drop_duplicates()

In [15]:
page_link = "https://detroitmopa.org/artworks?offset=1506454620707&tag=Eastern+Market+Murals"
df = df.append(MuralScrape(page_link, df), ignore_index=True)
df.tail()

Unnamed: 0,muralName,artistName,address,year,imgLink
175,DETROIT MURAL #0186,SharkToof,"1500 Division Street, Detroit, MI",2017,https://static1.squarespace.com/static/5ab0771...
176,DETROIT MURAL #0225,Nivek Monet,"2739 Russell St (Rear), Detroit",2017,https://static1.squarespace.com/static/5ab0771...
177,DETROIT MURAL #0035,Canvas Voice,Unknown,2017,https://static1.squarespace.com/static/5ab0771...
178,DETROIT MURAL #0214,Greg Mike,"2899 Orleans Street, Detroit, MI",2016,https://static1.squarespace.com/static/5ab0771...
179,DETROIT MURAL #0226,Sydney G. James,"Cutters Bar & Grill, Detroit, MI",2016,https://static1.squarespace.com/static/5ab0771...


In [96]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 180 entries, 0 to 179
Data columns (total 5 columns):
muralName     180 non-null object
artistName    180 non-null object
address       180 non-null object
year          180 non-null object
imgLink       180 non-null object
dtypes: object(5)
memory usage: 7.1+ KB


In [16]:
page_link = "https://detroitmopa.org/artworks?offset=1474577460207&tag=Eastern+Market+Murals"
df = df.append(MuralScrape(page_link, df), ignore_index=True)
df.tail()

Unnamed: 0,muralName,artistName,address,year,imgLink
375,DETROIT MURAL #0115,Ghost Beard and Patch Whisky,"1489 Winder Street, Detroit, MI",2016,https://static1.squarespace.com/static/5ab0771...
376,DETROIT MURAL #0114,Felipe Pantone,"2478 Riopelle Street, Detroit, MI",2016,https://static1.squarespace.com/static/5ab0771...
377,DETROIT MURAL #0109,Hebru Brantley,"2418 Orleans Street, Detroit, MI",2016,https://static1.squarespace.com/static/5ab0771...
378,DETROIT MURAL #0162,Mr. Jago & Xenx,"1801 Division Street, Detroit, MI",2016,https://static1.squarespace.com/static/5ab0771...
379,DETROIT MURAL #0227,Slick,"2000 Division Street, Detroit, MI",2016,https://static1.squarespace.com/static/5ab0771...


In [17]:
df= df.drop_duplicates()
df.tail()

Unnamed: 0,muralName,artistName,address,year,imgLink
375,DETROIT MURAL #0115,Ghost Beard and Patch Whisky,"1489 Winder Street, Detroit, MI",2016,https://static1.squarespace.com/static/5ab0771...
376,DETROIT MURAL #0114,Felipe Pantone,"2478 Riopelle Street, Detroit, MI",2016,https://static1.squarespace.com/static/5ab0771...
377,DETROIT MURAL #0109,Hebru Brantley,"2418 Orleans Street, Detroit, MI",2016,https://static1.squarespace.com/static/5ab0771...
378,DETROIT MURAL #0162,Mr. Jago & Xenx,"1801 Division Street, Detroit, MI",2016,https://static1.squarespace.com/static/5ab0771...
379,DETROIT MURAL #0227,Slick,"2000 Division Street, Detroit, MI",2016,https://static1.squarespace.com/static/5ab0771...


In [19]:
df.reset_index(drop=True)
df.tail()

Unnamed: 0,muralName,artistName,address,year,imgLink
375,DETROIT MURAL #0115,Ghost Beard and Patch Whisky,"1489 Winder Street, Detroit, MI",2016,https://static1.squarespace.com/static/5ab0771...
376,DETROIT MURAL #0114,Felipe Pantone,"2478 Riopelle Street, Detroit, MI",2016,https://static1.squarespace.com/static/5ab0771...
377,DETROIT MURAL #0109,Hebru Brantley,"2418 Orleans Street, Detroit, MI",2016,https://static1.squarespace.com/static/5ab0771...
378,DETROIT MURAL #0162,Mr. Jago & Xenx,"1801 Division Street, Detroit, MI",2016,https://static1.squarespace.com/static/5ab0771...
379,DETROIT MURAL #0227,Slick,"2000 Division Street, Detroit, MI",2016,https://static1.squarespace.com/static/5ab0771...


In [20]:
page_link = "https://detroitmopa.org/artworks?offset=1472763120116&tag=Eastern+Market+Murals"
df = df.append(MuralScrape(page_link, df), ignore_index=True)


Unnamed: 0,muralName,artistName,address,year,imgLink
255,DETROIT MURAL #0355,Lauren Harrington,"1799 Antietam Ave, Detroit, MI",2016,https://static1.squarespace.com/static/5ab0771...
256,DETROIT MURAL #0406,WC Bevan,"2469 E Fisher Service Dr, Detroit, MI",2015,https://static1.squarespace.com/static/5ab0771...
257,DETROIT MURAL #0151,Sintex,"2000 Division St, Detroit, MI",2016,https://static1.squarespace.com/static/5ab0771...
258,DETROIT MURAL #0580,Nic Notion,"1352 Gratiot Avenue, Detroit, MI",2016,https://static1.squarespace.com/static/5ab0771...
259,DETROIT MURAL #0229,Hygienic Dress League,"1543 Gratiot Ave, Detroit, MI",2015,https://static1.squarespace.com/static/5ab0771...


In [21]:
df = df.drop_duplicates().reset_index(drop=True)
df.tail()

Unnamed: 0,muralName,artistName,address,year,imgLink
135,DETROIT MURAL #0355,Lauren Harrington,"1799 Antietam Ave, Detroit, MI",2016,https://static1.squarespace.com/static/5ab0771...
136,DETROIT MURAL #0406,WC Bevan,"2469 E Fisher Service Dr, Detroit, MI",2015,https://static1.squarespace.com/static/5ab0771...
137,DETROIT MURAL #0151,Sintex,"2000 Division St, Detroit, MI",2016,https://static1.squarespace.com/static/5ab0771...
138,DETROIT MURAL #0580,Nic Notion,"1352 Gratiot Avenue, Detroit, MI",2016,https://static1.squarespace.com/static/5ab0771...
139,DETROIT MURAL #0229,Hygienic Dress League,"1543 Gratiot Ave, Detroit, MI",2015,https://static1.squarespace.com/static/5ab0771...


In [22]:
page_link = "https://detroitmopa.org/artworks?offset=1445547120498&tag=Eastern+Market+Murals"
df = df.append(MuralScrape(page_link, df), ignore_index=True)
df= df.drop_duplicates().reset_index(drop=True)
df.tail()

Unnamed: 0,muralName,artistName,address,year,imgLink
155,DETROIT MURAL #0213,Nosego & WOES,"2336 Russell Street, Detroit, MI",2015,https://static1.squarespace.com/static/5ab0771...
156,DETROIT MURAL #0212,Jonny Alexander,"2611 Russell Street, Detroit, MI",2015,https://static1.squarespace.com/static/5ab0771...
157,DETROIT MURAL #0191,WOES & Luke Chueh,"1421 Gratiot Avenue, Detroit, MI",2015,https://static1.squarespace.com/static/5ab0771...
158,DETROIT MURAL #0188,Miss Van,"1501 Division Street, Detroit, MI",2015,https://static1.squarespace.com/static/5ab0771...
159,DETROIT MURAL #0159,Ouizi,"1816 Division Street, Detroit, MI",2015,https://static1.squarespace.com/static/5ab0771...


In [23]:
page_link = "https://detroitmopa.org/artworks?offset=1441139160848&tag=Eastern+Market+Murals"
df = df.append(MuralScrape(page_link, df), ignore_index=True)
df= df.drop_duplicates().reset_index(drop=True)
df.tail()

Unnamed: 0,muralName,artistName,address,year,imgLink
170,DETROIT MURAL #0112,Ron Zakrin,"1516 Winder St (In Alley), Detroit, MI",2015,https://static1.squarespace.com/static/5ab0771...
171,DETROIT MURAL #0113,2501,"1550 Winder Street, Detroit, MI",2015,https://static1.squarespace.com/static/5ab0771...
172,DETROIT MURAL #0264,Beau Stanton,"2126 Pierce St, Detroit, MI",2015,https://static1.squarespace.com/static/5ab0771...
173,DETROIT MURAL #0372,Sintex,"2144 Gratiot Ave., Detroit, MI",2014,https://static1.squarespace.com/static/5ab0771...
174,DETROIT MURAL #0371,Sintex,"2144 Gratiot Ave., Detroit, MI",2014,https://static1.squarespace.com/static/5ab0771...


In [24]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 175 entries, 0 to 174
Data columns (total 5 columns):
muralName     175 non-null object
artistName    175 non-null object
address       175 non-null object
year          175 non-null object
imgLink       175 non-null object
dtypes: object(5)
memory usage: 6.9+ KB


In [None]:
#export to csv
df.to_csv("murals.csv")