In [1]:
import requests
import pandas as pd

from bs4 import BeautifulSoup

In [2]:
base_url = 'https://www.grammy.com'
awards_url = 'https://www.grammy.com/grammys/awards'

In [3]:
req = requests.get(awards_url)

In [4]:
req.status_code

200

In [5]:
soup = BeautifulSoup(req.content, 'html.parser')

In [6]:
awards = soup.find(class_='views-infinite-scroll-content-wrapper')

In [7]:
rows = awards.find_all(class_='views-row')

In [8]:
img = rows[0].a.img['src']

In [9]:
last_edition_url = f"{base_url}{rows[0].a['href']}"
print(last_edition_url)

https://www.grammy.com/grammys/awards/62nd-annual-grammy-awards-2019


In [10]:
last_year = last_edition_url.split('-')[4]
print(last_year)

2019


In [11]:
req = requests.get(last_edition_url)
req.status_code

200

In [12]:
soup = BeautifulSoup(req.content, 'html.parser')
all_editions_url = soup.find(class_='view-show-year').find_all(class_='views-row')
len(all_editions_url)

62

In [13]:
df = pd.DataFrame(columns=['year', 'title', 'published_at', 'updated_at', \
                           'category', 'nominee', 'artist', 'workers', \
                           'img', 'winner'])

In [14]:
def insert_row(df, row):
    insert_loc = df.index.max()

    if pd.isna(insert_loc):
        df.loc[0] = row
    else:
        df.loc[insert_loc + 1] = row

In [15]:
for edition_url in all_editions_url[1:]:
  url = f"{base_url}{edition_url.a['href']}"

  req = requests.get(url)
  if req.status_code == 200:
    soup = BeautifulSoup(req.content, 'html.parser')

    title = soup.find(class_='sub-title').h2.string
    published_at = soup.find("meta",  property="article:published_time")['content']
    updated_at = soup.find("meta",  property="article:modified_time")['content']
    year = soup.find(id='page-title').getText().split(" ")[0]

    articles = soup.find_all(class_='view-grouping')
    for article in articles:
      category = article.find(class_='view-grouping-header').string

      try:
        img = article.find(class_='circular-image').img['src']
      except:
        img = ''

      winner = True
      nominee = article.find(class_='views-field-title').string

      try:
        artist = article.find(class_='views-field-field-description').p.getText()
      except:
        artist = ''

      try:
        workers = article.find(class_='views-field-body-1').p.getText()
      except:
        workers = ''

      insert_row(df, [year, title, published_at, updated_at, category, nominee, \
                  artist, workers, img, winner])

In [16]:
df.loc[df.artist == '']

Unnamed: 0,year,title,published_at,updated_at,category,nominee,artist,workers,img,winner
2,2018,61st Annual GRAMMY Awards (2018),2018-12-06T23:48:49-08:00,2019-09-10T01:06:11-07:00,Song Of The Year,This Is America,,"Donald Glover, Ludwig Göransson & Jeffery Lama...",https://www.grammy.com/sites/com/files/styles/...,True
3,2018,61st Annual GRAMMY Awards (2018),2018-12-06T23:48:49-08:00,2019-09-10T01:06:11-07:00,Best New Artist,Dua Lipa,,,,True
13,2018,61st Annual GRAMMY Awards (2018),2018-12-06T23:48:49-08:00,2019-09-10T01:06:11-07:00,Best Rock Song,Masseduction,,"Jack Antonoff & Annie Clark, songwriters (St. ...",https://www.grammy.com/sites/com/files/styles/...,True
18,2018,61st Annual GRAMMY Awards (2018),2018-12-06T23:48:49-08:00,2019-09-10T01:06:11-07:00,Best R&B Song,Boo'd Up,,"Larrance Dopson, Joelle James, Ella Mai & Dijo...",https://www.grammy.com/sites/com/files/styles/...,True
23,2018,61st Annual GRAMMY Awards (2018),2018-12-06T23:48:49-08:00,2019-09-10T01:06:11-07:00,Best Rap Song,God's Plan,,"Aubrey Graham, Daveon Jackson, Brock Korsan, R...",https://www.grammy.com/sites/com/files/styles/...,True
...,...,...,...,...,...,...,...,...,...,...
4341,1958,1st Annual GRAMMY Awards (1958),2017-11-28T00:03:45-08:00,2019-09-10T01:11:09-07:00,Best Classical Performance - Instrumentalist (...,Tchaikovsky: Piano Concerto No. 1 In B Flat Mi...,,"Van Cliburn, artist (Symphony Of The Air Orche...",,True
4342,1958,1st Annual GRAMMY Awards (1958),2017-11-28T00:03:45-08:00,2019-09-10T01:11:09-07:00,Best Classical Performance - Instrumentalist (...,Segovia Golden Jubilee,,"Andres Segovia, artist",https://www.grammy.com/sites/com/files/styles/...,True
4343,1958,1st Annual GRAMMY Awards (1958),2017-11-28T00:03:45-08:00,2019-09-10T01:11:09-07:00,Best Classical Performance - Chamber Music (In...,Beethoven: Quartet 130,,"Hollywood String Quartet (Alvin Dinkin, Paul S...",,True
4344,1958,1st Annual GRAMMY Awards (1958),2017-11-28T00:03:45-08:00,2019-09-10T01:11:09-07:00,Best Classical Performance - Vocal Soloist (Wi...,Operatic Recital,,,,True


In [17]:
new_df = df.drop(['year', 'published_at', 'updated_at', 'winner'], axis=1)

In [18]:
human_awards = ['Best New Artist', '']

In [19]:
new_df.loc[new_df.category == 'Best New Artist']

Unnamed: 0,title,category,nominee,artist,workers,img
3,61st Annual GRAMMY Awards (2018),Best New Artist,Dua Lipa,,,
87,60th Annual GRAMMY Awards (2017),Best New Artist,Alessia Cara,,,
171,59th Annual GRAMMY Awards (2016),Best New Artist,Chance The Rapper,,,
255,58th Annual GRAMMY Awards (2015),Best New Artist,Meghan Trainor,,,
338,57th Annual GRAMMY Awards (2014),Best New Artist,Sam Smith,,,https://www.grammy.com/sites/com/files/styles/...
421,56th Annual GRAMMY Awards (2013),Best New Artist,Macklemore & Ryan Lewis,,,https://www.grammy.com/sites/com/files/styles/...
503,55th Annual GRAMMY Awards (2012),Best New Artist,Fun.,,,https://www.grammy.com/sites/com/files/styles/...
584,54th Annual GRAMMY Awards (2011),Best New Artist,Bon Iver,,,https://www.grammy.com/sites/com/files/styles/...
662,53rd Annual GRAMMY Awards (2010),Best New Artist,Esperanza Spalding,,,https://www.grammy.com/sites/com/files/styles/...
770,52nd Annual GRAMMY Awards (2009),Best New Artist,Zac Brown Band,,,https://www.grammy.com/sites/com/files/styles/...


In [32]:
categories_df = pd.DataFrame(data=new_df['category'].unique(), columns=['name'])

In [26]:
new_df.to_csv(path_or_buf='grammy_awards.csv', index=False, sep=',')

In [33]:
categories_df.to_csv(path_or_buf='categories.csv', index=False, sep=',')

In [0]:
new_df.winner = True

In [0]:
new_df.loc[new_df.winner == True]

In [55]:
new_df['year'] = new_df.apply(lambda x: x.title[-5:-1], axis=1)
new_df['title'] = new_df.apply(lambda x: x.title[:-7], axis=1)

In [64]:
new_df.head(3)

Unnamed: 0,year,title,category,nominee,artist,workers
0,2018,61st Annual GRAMMY Awards,Record Of The Year,This Is America,Childish Gambino,"Donald Glover & Ludwig Göransson, producers; D..."
1,2018,61st Annual GRAMMY Awards,Album Of The Year,Golden Hour,Kacey Musgraves,"Ian Fitchuk, Kacey Musgraves & Daniel Tashian,..."
2,2018,61st Annual GRAMMY Awards,Song Of The Year,This Is America,,"Donald Glover, Ludwig Göransson & Jeffery Lama..."
