# Getting release url from year list

In [1]:
from bs4 import BeautifulSoup
import requests

In [28]:
def get_release_urls_from_year(year_url):
    '''
    Takes a URL for a particular year on boxofficemojo.com 
    i.e. https://www.boxofficemojo.com/year/2020/?ref_=bo_yl_table_1
    
    Returns a list of URLs suffixes of all releases from that year
    N.B.: please prepend 'www.boxofficemojo.com' to all returned URLs
    '''
    release_urls = []
    
    response = requests.get(year_url)
    print('requests.get status: ',response.status_code)
    page = response.text
    soup = BeautifulSoup(page, "lxml")
    del response, page
    
    for row in soup.find('div', id='table').find('table').find_all('tr')[1:]:
        release_url = row.find(class_='a-link-normal')['href']
        release_urls.append(release_url)
    
    return release_urls

In [3]:
url = 'https://www.boxofficemojo.com/year/2020/?ref_=bo_yl_table_1'
response = requests.get(url)
print('requests.get status: ',response.status_code)
page = response.text
del response

requests.get status:  200


In [4]:
soup = BeautifulSoup(page, "lxml")
print(soup.prettify())

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo">
 <head>
  <script>
   var aPageStart = (new Date()).getTime();
  </script>
  <meta charset="utf-8"/>
  <meta content="width=device-width, initial-scale=1.0" name="viewport"/>
  <meta charset="utf-8"/>
  <title dir="ltr">
   Domestic Box Office For 2020 - Box Office Mojo
  </title>
  <meta content="Domestic Box Office For 2020" name="title"/>
  <meta content="Box Office Mojo" property="og:site_name"/>
  <meta content="telephone=no" name="format-detection"/>
  <link href="https://m.media-amazon.com/images/G/01/boxofficemojo/v2/favicon._CB448965889_.ico" rel="icon" type="image/x-icon"/>
  <link href="https://images-na.ssl-images-amazon.com/images/I/51tax7M48-L._RC|516fcOUE-HL.css,01evdoiemkL.css,01K+Ps1DeEL.css,31pdJv9iSzL.css,01VszOUTO6L.css,11UGC+GXOPL.css,21LK7jaicML.css,11L58Qpo0GL.css,21kyTi1FabL.css,01ruG+gDPFL.css,01YhS3Cs-hL.css,21GwE3cR-yL.css,019SHZnt8RL.css,01wAWQRgXzL.css,21bWcRJYNIL.css,11WgRxUdJRL.css,01dU8+SPlFL.

In [23]:
# print(soup.find('div', id='table').find('table').prettify()) # <-- tr's in here are table rows
release_urls = []

for row in soup.find('div', id='table').find('table').find_all('tr')[1:]:
    url = row.find(class_='a-link-normal')['href']
    release_urls.append(url)
    
release_urls

['/release/rl1182631425/?ref_=bo_yld_table_1',
 '/release/rl2969994753/?ref_=bo_yld_table_2',
 '/release/rl4244997633/?ref_=bo_yld_table_3',
 '/release/rl755467777/?ref_=bo_yld_table_4',
 '/release/rl3305145857/?ref_=bo_yld_table_5',
 '/release/rl3640886785/?ref_=bo_yld_table_6',
 '/release/rl2164295169/?ref_=bo_yld_table_7',
 '/release/rl218596865/?ref_=bo_yld_table_8',
 '/release/rl50628097/?ref_=bo_yld_table_9',
 '/release/rl2533524993/?ref_=bo_yld_table_10',
 '/release/rl3433267713/?ref_=bo_yld_table_11',
 '/release/rl3204875777/?ref_=bo_yld_table_12',
 '/release/rl2424210945/?ref_=bo_yld_table_13',
 '/release/rl1333691905/?ref_=bo_yld_table_14',
 '/release/rl3473442305/?ref_=bo_yld_table_15',
 '/release/rl419792385/?ref_=bo_yld_table_16',
 '/release/rl1258849793/?ref_=bo_yld_table_17',
 '/release/rl1611040257/?ref_=bo_yld_table_18',
 '/release/rl3825763841/?ref_=bo_yld_table_19',
 '/release/rl4278486529/?ref_=bo_yld_table_20',
 '/release/rl1745126913/?ref_=bo_yld_table_21',
 '/rel

# TEST IT

In [27]:
url = 'https://www.boxofficemojo.com/year/2020/?ref_=bo_yl_table_1'
get_release_url_from_year(url)

requests.get status:  200


['/release/rl1182631425/?ref_=bo_yld_table_1',
 '/release/rl2969994753/?ref_=bo_yld_table_2',
 '/release/rl4244997633/?ref_=bo_yld_table_3',
 '/release/rl755467777/?ref_=bo_yld_table_4',
 '/release/rl3305145857/?ref_=bo_yld_table_5',
 '/release/rl3640886785/?ref_=bo_yld_table_6',
 '/release/rl2164295169/?ref_=bo_yld_table_7',
 '/release/rl218596865/?ref_=bo_yld_table_8',
 '/release/rl50628097/?ref_=bo_yld_table_9',
 '/release/rl2533524993/?ref_=bo_yld_table_10',
 '/release/rl3433267713/?ref_=bo_yld_table_11',
 '/release/rl3204875777/?ref_=bo_yld_table_12',
 '/release/rl2424210945/?ref_=bo_yld_table_13',
 '/release/rl1333691905/?ref_=bo_yld_table_14',
 '/release/rl3473442305/?ref_=bo_yld_table_15',
 '/release/rl419792385/?ref_=bo_yld_table_16',
 '/release/rl1258849793/?ref_=bo_yld_table_17',
 '/release/rl1611040257/?ref_=bo_yld_table_18',
 '/release/rl3825763841/?ref_=bo_yld_table_19',
 '/release/rl4278486529/?ref_=bo_yld_table_20',
 '/release/rl1745126913/?ref_=bo_yld_table_21',
 '/rel