In [1]:
import requests
from bs4 import BeautifulSoup as BS
import pandas as pd

In [2]:
URL = 'https://ryman.com/events/'

response = requests.get(URL)

In [3]:
type(response)

requests.models.Response

In [4]:
response.status_code

200

In [5]:
response.text



1. Start by using either the inspector or by viewing the page source. Can you identify a tag that might be helpful for finding the names of all performers? For now, just worry about the headliner and don't worry about the opener. (Eg. For Vince Gill, featuring Wendy Moten, we only care about Vince Gill.) Make use of this to create a list containing just the names of each inductee.

In [6]:
ryman_soup = BS(response.text)

In [7]:
#this way of going about it is tricky b/c not all h2 tags have an 'a' tag. you end up having to add the if x.find('a') to the end of the list comp below
#you can use .findAll method b/c it's a beautiful soup object
h2_tags = ryman_soup.findAll('h2')
h2_tags

[<h2 class="tribe-events-visuallyhidden">Events Search and Views Navigation</h2>,
 <h2 class="tribe-events-list-event-title">
 <a class="tribe-event-url" href="https://ryman.com/event/franklin-brentwood-arts-academy/" rel="bookmark" title="Franklin Brentwood Arts Academy 2022 Summer Showcase">
 		Franklin Brentwood Arts Academy 2022 Summer Showcase	</a>
 </h2>,
 <h2 class="tribe-events-list-event-title">
 <a class="tribe-event-url" href="https://ryman.com/event/school-of-rock-072022/" rel="bookmark" title="School of Rock">
 		School of Rock	</a>
 </h2>,
 <h2 class="tribe-events-list-event-title">
 <a class="tribe-event-url" href="https://ryman.com/event/bgnights-072122-steeldrivers/" rel="bookmark" title="The SteelDrivers">
 		The SteelDrivers	</a>
 </h2>,
 <h2 class="tribe-events-list-event-title">
 <a class="tribe-event-url" href="https://ryman.com/event/nhabit-worship-experience/" rel="bookmark" title="The Nhabit Worship Experience">
 		The Nhabit Worship Experience	</a>
 </h2>,
 <h

In [8]:
# h2_tags.findAll('a') NOPE results in error. ResultSet object has no attr 'findAll'

In [9]:
#this only works if you add the if x.find('a') at the end b/c the first element is None and you need to tell it to skip that otherwise it stops at the first element
a_tags = [x.find('a') for x in h2_tags if x.find('a')]
#type(a_tags)
#print(a_tags[0])
a_tags

[<a class="tribe-event-url" href="https://ryman.com/event/franklin-brentwood-arts-academy/" rel="bookmark" title="Franklin Brentwood Arts Academy 2022 Summer Showcase">
 		Franklin Brentwood Arts Academy 2022 Summer Showcase	</a>,
 <a class="tribe-event-url" href="https://ryman.com/event/school-of-rock-072022/" rel="bookmark" title="School of Rock">
 		School of Rock	</a>,
 <a class="tribe-event-url" href="https://ryman.com/event/bgnights-072122-steeldrivers/" rel="bookmark" title="The SteelDrivers">
 		The SteelDrivers	</a>,
 <a class="tribe-event-url" href="https://ryman.com/event/nhabit-worship-experience/" rel="bookmark" title="The Nhabit Worship Experience">
 		The Nhabit Worship Experience	</a>,
 <a class="tribe-event-url" href="https://ryman.com/event/wild-hearts/" rel="bookmark" title="Sharon Van Etten, Julien Baker and Angel Olsen">
 		Sharon Van Etten, Julien Baker and Angel Olsen	</a>,
 <a class="tribe-event-url" href="https://ryman.com/event/andrew-bird-and-iron-wine/" rel=

In [10]:
#we also need if x.get('title') here because not all a tags have a title (the last 4 don't)
titles = [x.get('title') for x in a_tags if x.get('title')]
titles

['Franklin Brentwood Arts Academy 2022 Summer Showcase',
 'School of Rock',
 'The SteelDrivers',
 'The Nhabit Worship Experience',
 'Sharon Van Etten, Julien Baker and Angel Olsen',
 'Andrew Bird and Iron & Wine',
 'Pat Benatar & Neil Giraldo',
 'Ricky Skaggs & Kentucky Thunder',
 'Turnpike Troubadours',
 'Turnpike Troubadours',
 'Joe Bonamassa',
 'Charli XCX',
 'Vince Gill',
 'Vince Gill',
 'Vince Gill',
 'Vince Gill',
 'Why Don’t We',
 'Men At Work',
 'RuPaul’s Drag Race',
 'A.R. Rahman']

In [11]:
#seeing how to extract title from just one h2_tag
h2_tag_2 = h2_tags[1]
h2_tag_2.find('a')['title']

'Franklin Brentwood Arts Academy 2022 Summer Showcase'

In [12]:
#another way- instead of finding h2 tags, we just look for a tags where class=tribe-event-url, which if you look, all of the a tags with band titles have this class
a_tags_2 = ryman_soup.findAll('a', attrs = {'class':'tribe-event-url'})
a_tags_2

[<a class="tribe-event-url" href="https://ryman.com/event/franklin-brentwood-arts-academy/" rel="bookmark" title="Franklin Brentwood Arts Academy 2022 Summer Showcase">
 		Franklin Brentwood Arts Academy 2022 Summer Showcase	</a>,
 <a class="tribe-event-url" href="https://ryman.com/event/school-of-rock-072022/" rel="bookmark" title="School of Rock">
 		School of Rock	</a>,
 <a class="tribe-event-url" href="https://ryman.com/event/bgnights-072122-steeldrivers/" rel="bookmark" title="The SteelDrivers">
 		The SteelDrivers	</a>,
 <a class="tribe-event-url" href="https://ryman.com/event/nhabit-worship-experience/" rel="bookmark" title="The Nhabit Worship Experience">
 		The Nhabit Worship Experience	</a>,
 <a class="tribe-event-url" href="https://ryman.com/event/wild-hearts/" rel="bookmark" title="Sharon Van Etten, Julien Baker and Angel Olsen">
 		Sharon Van Etten, Julien Baker and Angel Olsen	</a>,
 <a class="tribe-event-url" href="https://ryman.com/event/andrew-bird-and-iron-wine/" rel=

In [13]:
#try to extract the title of just one before putting in a loop
a_tag_1 = a_tags_2[0]
#a_tag_1.find('title') DOES NOT WORK
#a_tag_1['title'] this works
a_tag_1.get('title') #works

'Franklin Brentwood Arts Academy 2022 Summer Showcase'

In [14]:
titles_2 = [x.get('title') for x in a_tags_2]
titles_2

['Franklin Brentwood Arts Academy 2022 Summer Showcase',
 'School of Rock',
 'The SteelDrivers',
 'The Nhabit Worship Experience',
 'Sharon Van Etten, Julien Baker and Angel Olsen',
 'Andrew Bird and Iron & Wine',
 'Pat Benatar & Neil Giraldo',
 'Ricky Skaggs & Kentucky Thunder',
 'Turnpike Troubadours',
 'Turnpike Troubadours',
 'Joe Bonamassa',
 'Charli XCX',
 'Vince Gill',
 'Vince Gill',
 'Vince Gill',
 'Vince Gill',
 'Why Don’t We',
 'Men At Work',
 'RuPaul’s Drag Race',
 'A.R. Rahman']

2. Next, try and find a tag that could be used to find the date and time for each show. Extract these into two lists, one containing the date and the other containing the time. (Eg. THURSDAY, AUGUST 4, 2022 AT 8:00 PM CDT should be split into August 4, 2022 and 8:00 PM CDT.) 

In [15]:
time = ryman_soup.findAll('time')
#type(time)
time

[<time datetime="2022-07-19 06:00:00 CDT">Tuesday, July 19, 2022 at 6:00 PM CDT</time>,
 <time datetime="2022-07-20 07:00:00 CDT">Wednesday, July 20, 2022 at 7:00 PM CDT</time>,
 <time datetime="2022-07-21 07:30:00 CDT">Thursday, July 21, 2022 at 7:30 PM CDT</time>,
 <time datetime="2022-07-22 07:00:00 CDT">Friday, July 22, 2022 at 7:00 PM CDT</time>,
 <time datetime="2022-07-23 08:00:00 CDT">Saturday, July 23, 2022 at 8:00 PM CDT</time>,
 <time datetime="2022-07-24 08:00:00 CDT">Sunday, July 24, 2022 at 8:00 PM CDT</time>,
 <time datetime="2022-07-25 07:30:00 CDT">Monday, July 25, 2022 at 7:30 PM CDT</time>,
 <time datetime="2022-07-28 07:30:00 CDT">Thursday, July 28, 2022 at 7:30 PM CDT</time>,
 <time datetime="2022-07-29 08:00:00 CDT">Friday, July 29, 2022 at 8:00 PM CDT</time>,
 <time datetime="2022-07-30 08:00:00 CDT">Saturday, July 30, 2022 at 8:00 PM CDT</time>,
 <time datetime="2022-08-02 08:00:00 CDT">Tuesday, August 2, 2022 at 8:00 PM CDT</time>,
 <time datetime="2022-08-03 0

In [16]:
#extract text only from ResultSet
time_text = [x.text for x in time]
time_text

['Tuesday, July 19, 2022 at 6:00 PM CDT',
 'Wednesday, July 20, 2022 at 7:00 PM CDT',
 'Thursday, July 21, 2022 at 7:30 PM CDT',
 'Friday, July 22, 2022 at 7:00 PM CDT',
 'Saturday, July 23, 2022 at 8:00 PM CDT',
 'Sunday, July 24, 2022 at 8:00 PM CDT',
 'Monday, July 25, 2022 at 7:30 PM CDT',
 'Thursday, July 28, 2022 at 7:30 PM CDT',
 'Friday, July 29, 2022 at 8:00 PM CDT',
 'Saturday, July 30, 2022 at 8:00 PM CDT',
 'Tuesday, August 2, 2022 at 8:00 PM CDT',
 'Wednesday, August 3, 2022 at 8:00 PM CDT',
 'Thursday, August 4, 2022 at 8:00 PM CDT',
 'Friday, August 5, 2022 at 8:00 PM CDT',
 'Saturday, August 6, 2022 at 8:00 PM CDT',
 'Sunday, August 7, 2022 at 8:00 PM CDT',
 'Monday, August 8, 2022 at 7:00 PM CDT',
 'Monday, August 8, 2022 at 7:30 PM CDT',
 'Tuesday, August 9, 2022 at 7:30 PM CDT',
 'Wednesday, August 10, 2022 at 7:30 PM CDT']

extract the SHOWTIME

In [28]:
#try slicing on one before loop, it works
time_1 = time_text[0]
time_1[-11:]

'6:00 PM CDT'

In [18]:
showtime_texts = [x[-11:] for x in time_text]
showtime_texts

['6:00 PM CDT',
 '7:00 PM CDT',
 '7:30 PM CDT',
 '7:00 PM CDT',
 '8:00 PM CDT',
 '8:00 PM CDT',
 '7:30 PM CDT',
 '7:30 PM CDT',
 '8:00 PM CDT',
 '8:00 PM CDT',
 '8:00 PM CDT',
 '8:00 PM CDT',
 '8:00 PM CDT',
 '8:00 PM CDT',
 '8:00 PM CDT',
 '8:00 PM CDT',
 '7:00 PM CDT',
 '7:30 PM CDT',
 '7:30 PM CDT',
 '7:30 PM CDT']

In [19]:
# string = "Username: How are you today?"
#>>> string[:string.index(":")]
#figure out how to slice one date, it works
date_1 = time_text[0]
print(date_1)
date_1[:date_1.index(' at')]

Tuesday, July 19, 2022 at 6:00 PM CDT


'Tuesday, July 19, 2022'

In [20]:
date_texts = [x[:x.index(' at')] for x in time_text]
date_texts

['Tuesday, July 19, 2022',
 'Wednesday, July 20, 2022',
 'Thursday, July 21, 2022',
 'Friday, July 22, 2022',
 'Saturday, July 23, 2022',
 'Sunday, July 24, 2022',
 'Monday, July 25, 2022',
 'Thursday, July 28, 2022',
 'Friday, July 29, 2022',
 'Saturday, July 30, 2022',
 'Tuesday, August 2, 2022',
 'Wednesday, August 3, 2022',
 'Thursday, August 4, 2022',
 'Friday, August 5, 2022',
 'Saturday, August 6, 2022',
 'Sunday, August 7, 2022',
 'Monday, August 8, 2022',
 'Monday, August 8, 2022',
 'Tuesday, August 9, 2022',
 'Wednesday, August 10, 2022']

3. Take the two lists you created on parts 1 and 2 and convert it into a pandas DataFrame.
df = pd.DataFrame({'col':L})
print (df)

In [21]:
ryman_df_pg1 = pd.DataFrame({'band':titles_2, 'showtime':showtime_texts, 'date':date_texts})
ryman_df_pg1

Unnamed: 0,band,showtime,date
0,Franklin Brentwood Arts Academy 2022 Summer Sh...,6:00 PM CDT,"Tuesday, July 19, 2022"
1,School of Rock,7:00 PM CDT,"Wednesday, July 20, 2022"
2,The SteelDrivers,7:30 PM CDT,"Thursday, July 21, 2022"
3,The Nhabit Worship Experience,7:00 PM CDT,"Friday, July 22, 2022"
4,"Sharon Van Etten, Julien Baker and Angel Olsen",8:00 PM CDT,"Saturday, July 23, 2022"
5,Andrew Bird and Iron & Wine,8:00 PM CDT,"Sunday, July 24, 2022"
6,Pat Benatar & Neil Giraldo,7:30 PM CDT,"Monday, July 25, 2022"
7,Ricky Skaggs & Kentucky Thunder,7:30 PM CDT,"Thursday, July 28, 2022"
8,Turnpike Troubadours,8:00 PM CDT,"Friday, July 29, 2022"
9,Turnpike Troubadours,8:00 PM CDT,"Saturday, July 30, 2022"


4. Now, you need to take what you created for the first page and apply it across multiple rest of the pages so that you can scrape all inductees. Notice how the url changes when you click the "More Events" button at the top of the page. Check that the code that you wrote for the first page still works for page 2. Once you have verified that your code will still work, write a for loop that will cycle through the first five pages of events.

In [22]:
URL = 'https://ryman.com/events/list/?tribe_event_display=list&tribe_paged=2'
req = requests.get(URL)
#req.status_code
ryman_soup_2 = BS(req.text)

In [23]:
a_pg2 = ryman_soup_2.findAll('a', attrs = {'class':'tribe-event-url'})
titles_pg2 = [x.get('title') for x in a_pg2]
titles_pg2


['Happy Together',
 'Pod Save America',
 'Lady A',
 'Lady A',
 'Anthrax and Black Label Society',
 'Chris Isaak and Lyle Lovett and his Large Band',
 'The Shins',
 'Local Natives',
 'Mary Chapin Carpenter',
 'Ward Davis',
 'Matthew West & Friends',
 'ACM Honors',
 'Blondie',
 'John Mulaney',
 'John Mulaney',
 'John Mulaney',
 'The Decemberists',
 'John Mulaney',
 'Frankie Valli & the Four Seasons',
 'Lauv']

In [24]:
URL2 = 'https://ryman.com/events/list/?tribe_event_display=list&tribe_paged='
#so you can see what's happening in the loop
for page in range (1,6):
    print(URL2 + str(page))
    print(page)

https://ryman.com/events/list/?tribe_event_display=list&tribe_paged=1
1
https://ryman.com/events/list/?tribe_event_display=list&tribe_paged=2
2
https://ryman.com/events/list/?tribe_event_display=list&tribe_paged=3
3
https://ryman.com/events/list/?tribe_event_display=list&tribe_paged=4
4
https://ryman.com/events/list/?tribe_event_display=list&tribe_paged=5
5


In [33]:
URL2 = 'https://ryman.com/events/list/?tribe_event_display=list&tribe_paged='

artist_list = []

for page in range (1,6): 
    req = requests.get(URL2 + str(page))
    soup = BS(req.text)
    a_tags = soup.findAll('a', attrs = {'class':'tribe-event-url'})
    titles = [x.get('title') for x in a_tags]
    artist_list.extend(titles)    

In [34]:
artist_list

['Franklin Brentwood Arts Academy 2022 Summer Showcase',
 'School of Rock',
 'The SteelDrivers',
 'The Nhabit Worship Experience',
 'Sharon Van Etten, Julien Baker and Angel Olsen',
 'Andrew Bird and Iron & Wine',
 'Pat Benatar & Neil Giraldo',
 'Ricky Skaggs & Kentucky Thunder',
 'Turnpike Troubadours',
 'Turnpike Troubadours',
 'Joe Bonamassa',
 'Charli XCX',
 'Vince Gill',
 'Vince Gill',
 'Vince Gill',
 'Vince Gill',
 'Why Don’t We',
 'Men At Work',
 'RuPaul’s Drag Race',
 'A.R. Rahman',
 'Happy Together',
 'Pod Save America',
 'Lady A',
 'Lady A',
 'Anthrax and Black Label Society',
 'Chris Isaak and Lyle Lovett and his Large Band',
 'The Shins',
 'Local Natives',
 'Mary Chapin Carpenter',
 'Ward Davis',
 'Matthew West & Friends',
 'ACM Honors',
 'Blondie',
 'John Mulaney',
 'John Mulaney',
 'John Mulaney',
 'The Decemberists',
 'John Mulaney',
 'Frankie Valli & the Four Seasons',
 'Lauv',
 'Watchhouse',
 'Watchhouse',
 'Spoon',
 'Jungle',
 'Opry Country Classics at the Ryman',
 'C

In [35]:
URL2 = 'https://ryman.com/events/list/?tribe_event_display=list&tribe_paged='

showtime_list = []

for page in range (1,6):
    req = requests.get(URL2 + str(page))
    soup = BS(req.text)
    time = soup.findAll('time')
    time_text = [x.text for x in time]
    showtimes = [x[-11:] for x in time_text]
    showtime_list.extend(showtimes)

In [36]:
showtime_list

['6:00 PM CDT',
 '7:00 PM CDT',
 '7:30 PM CDT',
 '7:00 PM CDT',
 '8:00 PM CDT',
 '8:00 PM CDT',
 '7:30 PM CDT',
 '7:30 PM CDT',
 '8:00 PM CDT',
 '8:00 PM CDT',
 '8:00 PM CDT',
 '8:00 PM CDT',
 '8:00 PM CDT',
 '8:00 PM CDT',
 '8:00 PM CDT',
 '8:00 PM CDT',
 '7:00 PM CDT',
 '7:30 PM CDT',
 '7:30 PM CDT',
 '7:30 PM CDT',
 '7:30 PM CDT',
 '8:00 PM CDT',
 '8:00 PM CDT',
 '8:00 PM CDT',
 '7:00 PM CDT',
 '7:30 PM CDT',
 '7:30 PM CDT',
 '7:30 PM CDT',
 '8:00 PM CDT',
 '8:00 PM CDT',
 '7:30 PM CDT',
 '6:30 PM CDT',
 '7:30 PM CDT',
 '7:00 PM CDT',
 '7:00 PM CDT',
 '9:30 PM CDT',
 '8:00 PM CDT',
 '7:00 PM CDT',
 '7:30 PM CDT',
 '8:00 PM CDT',
 '8:00 PM CDT',
 '8:00 PM CDT',
 '7:30 PM CDT',
 '7:30 PM CDT',
 '7:00 PM CDT',
 '7:30 PM CDT',
 '7:30 PM CDT',
 '8:00 PM CDT',
 '7:30 PM CDT',
 '7:30 PM CDT',
 '6:30 PM CDT',
 '8:00 PM CDT',
 '8:00 PM CDT',
 '8:00 PM CDT',
 '7:30 PM CDT',
 '7:30 PM CDT',
 '7:30 PM CDT',
 '8:00 PM CDT',
 '8:00 PM CDT',
 '7:00 PM CDT',
 '9:30 PM CDT',
 '7:30 PM CDT',
 '8:00 P

In [37]:
URL2 = 'https://ryman.com/events/list/?tribe_event_display=list&tribe_paged='

date_list = []

for page in range (1,6):
    req = requests.get(URL2 + str(page))
    soup = BS(req.text)
    time = soup.findAll('time')
    time_text = [x.text for x in time]
    dates = [x[:x.index(' at')] for x in time_text]
    date_list.extend(dates)

In [38]:
date_list

['Tuesday, July 19, 2022',
 'Wednesday, July 20, 2022',
 'Thursday, July 21, 2022',
 'Friday, July 22, 2022',
 'Saturday, July 23, 2022',
 'Sunday, July 24, 2022',
 'Monday, July 25, 2022',
 'Thursday, July 28, 2022',
 'Friday, July 29, 2022',
 'Saturday, July 30, 2022',
 'Tuesday, August 2, 2022',
 'Wednesday, August 3, 2022',
 'Thursday, August 4, 2022',
 'Friday, August 5, 2022',
 'Saturday, August 6, 2022',
 'Sunday, August 7, 2022',
 'Monday, August 8, 2022',
 'Monday, August 8, 2022',
 'Tuesday, August 9, 2022',
 'Wednesday, August 10, 2022',
 'Thursday, August 11, 2022',
 'Friday, August 12, 2022',
 'Saturday, August 13, 2022',
 'Sunday, August 14, 2022',
 'Tuesday, August 16, 2022',
 'Wednesday, August 17, 2022',
 'Wednesday, August 17, 2022',
 'Thursday, August 18, 2022',
 'Friday, August 19, 2022',
 'Saturday, August 20, 2022',
 'Sunday, August 21, 2022',
 'Wednesday, August 24, 2022',
 'Wednesday, August 24, 2022',
 'Thursday, August 25, 2022',
 'Friday, August 26, 2022',
 '

In [92]:
ryman_df = pd.DataFrame({'band':artist_list, 'showtime':showtime_list, 'date':date_list})
ryman_df

Unnamed: 0,band,showtime,date
0,Franklin Brentwood Arts Academy 2022 Summer Sh...,6:00 PM CDT,"Tuesday, July 19, 2022"
1,School of Rock,7:00 PM CDT,"Wednesday, July 20, 2022"
2,The SteelDrivers,7:30 PM CDT,"Thursday, July 21, 2022"
3,The Nhabit Worship Experience,7:00 PM CDT,"Friday, July 22, 2022"
4,"Sharon Van Etten, Julien Baker and Angel Olsen",8:00 PM CDT,"Saturday, July 23, 2022"
...,...,...,...
95,Mike Birbiglia,7:30 PM CST,"Wednesday, November 16, 2022"
96,Trombone Shorty,8:00 PM CST,"Friday, November 18, 2022"
97,Dropkick Murphys,7:30 PM CST,"Saturday, November 19, 2022"
98,W.A.S.P.,7:30 PM CST,"Wednesday, November 23, 2022"


5. **Bonus #1:**: Add to your data frame the opening act for all shows that list an opener.

In [107]:
URL = 'https://ryman.com/events/'
req = requests.get(URL)
soup = BS(req.text)

In [119]:
divs = soup.findAll('div', attrs = {'class':"tribe-beside-image"})
#divs_text = [x.text for x in divs]
divs

[<div class="tribe-beside-image">
 <!-- Event Title -->
 <!-- SUBTITLE -->
 <!-- Tag -->
 <h2 class="tribe-events-list-event-title">
 <a class="tribe-event-url" href="https://ryman.com/event/franklin-brentwood-arts-academy/" rel="bookmark" title="Franklin Brentwood Arts Academy 2022 Summer Showcase">
 		Franklin Brentwood Arts Academy 2022 Summer Showcase	</a>
 </h2>
 <!-- OPENER -->
 <!-- Event Meta -->
 <div class="tribe-events-event-meta">
 <div class="author location">
 <!-- Schedule & Recurrence Details -->
 <div class="tribe-event-schedule-details">
 <p><time datetime="2022-07-19 06:00:00 CDT">Tuesday, July 19, 2022 at 6:00 PM CDT</time></p>
 </div>
 </div>
 </div><!-- .tribe-events-event-meta -->
 <a class="smallblackbutton" href="https://ryman.com/event/franklin-brentwood-arts-academy/" rel="bookmark" title="Franklin Brentwood Arts Academy 2022 Summer Showcase">MORE INFO</a>
 </div>,
 <div class="tribe-beside-image">
 <!-- Event Title -->
 <!-- SUBTITLE -->
 <!-- Tag -->
 <h2 c

In [121]:
list_a = []

for x in divs:
            if x.find('span', attrs = {'class':'opener'}):
                x.find('span', attrs = {'class':'opener'})
            else:
                'no opener'
            list_a.extend(x)         

In [122]:
list_a

['\n',
 ' Event Title ',
 '\n',
 ' SUBTITLE ',
 '\n',
 ' Tag ',
 '\n',
 <h2 class="tribe-events-list-event-title">
 <a class="tribe-event-url" href="https://ryman.com/event/franklin-brentwood-arts-academy/" rel="bookmark" title="Franklin Brentwood Arts Academy 2022 Summer Showcase">
 		Franklin Brentwood Arts Academy 2022 Summer Showcase	</a>
 </h2>,
 '\n',
 ' OPENER ',
 '\n',
 ' Event Meta ',
 '\n',
 <div class="tribe-events-event-meta">
 <div class="author location">
 <!-- Schedule & Recurrence Details -->
 <div class="tribe-event-schedule-details">
 <p><time datetime="2022-07-19 06:00:00 CDT">Tuesday, July 19, 2022 at 6:00 PM CDT</time></p>
 </div>
 </div>
 </div>,
 ' .tribe-events-event-meta ',
 '\n',
 <a class="smallblackbutton" href="https://ryman.com/event/franklin-brentwood-arts-academy/" rel="bookmark" title="Franklin Brentwood Arts Academy 2022 Summer Showcase">MORE INFO</a>,
 '\n',
 '\n',
 ' Event Title ',
 '\n',
 ' SUBTITLE ',
 '\n',
 ' Tag ',
 '\n',
 <h2 class="tribe-event

In [114]:
span_tags = [x.find('span') for x in divs if x.find('span')]
span_tags

[<span class="opener">Benefiting Autism Tennessee</span>,
 <span class="opener">Springer Mountain Farms Bluegrass Nights at the Ryman</span>,
 <span class="opener">CHEN presents</span>,
 <span class="opener">The Wild Hearts Tour</span>,
 <span class="opener">with Meshell Ndegeocello</span>,
 <span class="opener">Springer Mountain Farms Bluegrass Nights at the Ryman</span>,
 <span class="opener">with Vandoliers</span>,
 <span class="opener">with 49 Winchester</span>,
 <span class="opener">featuring Wendy Moten</span>,
 <span class="opener">featuring Wendy Moten</span>,
 <span class="opener">featuring Wendy Moten</span>,
 <span class="opener">featuring Wendy Moten</span>,
 <span class="opener">The Good Times Tour</span>,
 <span class="opener">WERQ THE WORLD 2022 TOUR</span>]

In [None]:
for x in h2s:
    if 'span' in x:
        span
    else:
        ('none')

###
first way

In [105]:
URL = 'https://ryman.com/events/'
req = requests.get(URL)
soup = BS(req.text)
openers = soup.findAll('span', attrs = {'class':'opener'})

In [106]:
openers

[<span class="opener">Benefiting Autism Tennessee</span>,
 <span class="opener">Springer Mountain Farms Bluegrass Nights at the Ryman</span>,
 <span class="opener">with Troubadour Blue</span>,
 <span class="opener">CHEN presents</span>,
 <span class="opener">With James Fortune, Kierra Sheard, MAJOR, Anaysha Figueroa-Cooper, Ron Poindexter, Maurette Brown-Clark, Crystal Aikin, Jabari Johnson, and Nate Bean &amp; 4Given</span>,
 <span class="opener">The Wild Hearts Tour</span>,
 <span class="opener">with Meshell Ndegeocello</span>,
 <span class="opener">Springer Mountain Farms Bluegrass Nights at the Ryman</span>,
 <span class="opener">with Vandoliers</span>,
 <span class="opener">with 49 Winchester</span>,
 <span class="opener">featuring Wendy Moten</span>,
 <span class="opener">featuring Wendy Moten</span>,
 <span class="opener">featuring Wendy Moten</span>,
 <span class="opener">featuring Wendy Moten</span>,
 <span class="opener">The Good Times Tour</span>,
 <span class="opener">with 

In [94]:
if soup.findAll('span', attrs = {'class':'opener'})

<!DOCTYPE html>
<!--[if !IE]>
<html class="no-js non-ie" lang="en-US"> <![endif]--><!--[if IE 7 ]>
<html class="no-js ie7" lang="en-US"> <![endif]--><!--[if IE 8 ]>
<html class="no-js ie8" lang="en-US"> <![endif]--><!--[if IE 9 ]>
<html class="no-js ie9" lang="en-US"> <![endif]--><!--[if gt IE 9]><!--><html class="no-js" lang="en-US"> <!--<![endif]-->
<head>
<!-- Google Tag Manager -->
<script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':
new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],
j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=
'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);
})(window,document,'script','dataLayer','GTM-WQ7FQGN');</script>
<!-- End Google Tag Manager -->
<meta charset="utf-8"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<meta content="" name="theme-color"/>
<link href="https://gmpg.org/xfn/11" rel="profile"/>
<link href="https://ryman.com/wp-co

In [93]:
opener_text = [x.text for x in openers]
opener_text

['with special guest Dead Sara',
 '2nd Show Added!',
 'with The Brook & The Bluff',
 'with The Brook & The Bluff',
 'with Night Club',
 'with Shutterdog',
 'with Ashley Cooke and Dylan Marlowe',
 'with Cory Asbury plus special guests',
 'with special guest Ritt Momney',
 'with special guest Ray Fulcher',
 'Dave Landau & Steven Crowder',
 '2nd Show Added',
 'Dave Landau & Steven Crowder',
 ' ',
 'Costume Palooza',
 'with special guests Aaron Cole and Lakewood Music',
 '& Orleans Avenue',
 'This Machine ... Theater Tour',
 'with special guests Jaime Wyatt and Jesse Ahern']

In [79]:
#try if statement with simple string
x='with johnny'
    
if 'with' in x.lower() or 'featuring' in x.lower():
    print(x)
else:
    print('none')

with johnny


a way to check if a string contains any substrings from a list
e.g. to check if 'with' or 'featuring' are in a list of strings like 'featuring Wendy' (yes) or 'the comeback tour' (no)
any([x in YOURSTRING for x in list_of_options])

In [76]:
#if statement applied to list w/ loop
real_openers = []

for x in opener_text:
    if 'with' in x.lower() or 'featuring' in x.lower():
        real_openers.append(x)
    else:
        real_openers.append('none')
        

In [77]:
real_openers

['none',
 'none',
 'with Troubadour Blue',
 'none',
 'With James Fortune, Kierra Sheard, MAJOR, Anaysha Figueroa-Cooper, Ron Poindexter, Maurette Brown-Clark, Crystal Aikin, Jabari Johnson, and Nate Bean & 4Given',
 'none',
 'with Meshell Ndegeocello',
 'none',
 'with Vandoliers',
 'with 49 Winchester',
 'featuring Wendy Moten',
 'featuring Wendy Moten',
 'featuring Wendy Moten',
 'featuring Wendy Moten',
 'none',
 'with special guests The Aces and JVKE',
 'none']

In [81]:
#creating openers list for ryman_df
URL2 = 'https://ryman.com/events/list/?tribe_event_display=list&tribe_paged='

real_openers_list = []

for page in range (1,6):
    req = requests.get(URL2 + str(page))
    soup = BS(req.text)
    openers = soup.findAll('span', attrs = {'class':'opener'})
    opener_text = [x.text for x in openers]
    for x in opener_text:
        if 'with' in x.lower() or 'featuring' in x.lower():
            real_openers_list.append(x)
        else:
            real_openers_list.append('none')

In [89]:
opener_text

['with special guest Dead Sara',
 '2nd Show Added!',
 'with The Brook & The Bluff',
 'with The Brook & The Bluff',
 'with Night Club',
 'with Shutterdog',
 'with Ashley Cooke and Dylan Marlowe',
 'with Cory Asbury plus special guests',
 'with special guest Ritt Momney',
 'with special guest Ray Fulcher',
 'Dave Landau & Steven Crowder',
 '2nd Show Added',
 'Dave Landau & Steven Crowder',
 ' ',
 'Costume Palooza',
 'with special guests Aaron Cole and Lakewood Music',
 '& Orleans Avenue',
 'This Machine ... Theater Tour',
 'with special guests Jaime Wyatt and Jesse Ahern']

In [91]:
len(real_openers_list)

93

In [87]:
ryman_df = pd.DataFrame({'band':artist_list, 'showtime':showtime_list, 'date':date_list})
ryman_df.shape

(100, 3)

the openers list is only 93 instead of 100, this is because not all events have an <opener> tag. you need an if statement when you findAll openers to return a blank placeholder where there is no <opener> tag.