# Use case: Itunes Charts

In [41]:
# 1. import libraries
import sys
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [114]:
# 2. find url and store it in a variable
url = "http://www.itunescharts.net/us/charts/songs/2022/02/14"

#### using request package

In [115]:
# 3. download html with a get request
response = requests.get(url)

In [116]:
response.content

b'<!DOCTYPE html>\n<!--[if lt IE 8]><html lang="en" class="ie lte9 lte8 lte7"><![endif]-->\n<!--[if IE 8]><html lang="en" class="ie ie8 lte9 lte8"><![endif]-->\n<!--[if IE 9]><html lang="en" class="ie ie9 lte9"><![endif]-->\n<!--[if gt IE 9]><html lang="en"><![endif]-->\n<!--[if !IE]><!--><html lang="en"><!--<![endif]-->\n<head>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<title>iTunesCharts.net: US Songs Monday, 14th February 2022</title>\n<meta name="keywords" content="us songs itunes charts united states american current top 100 hundred latest itune charts ituens">\n<meta name="description" content="US Songs Top 100 Chart for Monday, 14th February 2022. Number 1: The Next Episode (feat. Snoop Dogg) - Dr. Dre.">\n<link rel="shortcut icon" href="/assets/itunes_charts/i/favicon.ico" type="image/x-icon">\n<meta name="google-site-verification" content="291UlohElr2aD0C_CnfkP8CiuCvydvWASJOo9AWXPKg" />\n<!--meta name="google-site-verification" content="gSkQgYZt2kn9

In [117]:
response.status_code # 200 status code means OK!

200

In [118]:
# 4.1. parse html (create the 'soup')
soup = BeautifulSoup(response.content, "html.parser")
# 4.2. check that the html code looks like it should
soup

<!DOCTYPE html>

<!--[if lt IE 8]><html lang="en" class="ie lte9 lte8 lte7"><![endif]-->
<!--[if IE 8]><html lang="en" class="ie ie8 lte9 lte8"><![endif]-->
<!--[if IE 9]><html lang="en" class="ie ie9 lte9"><![endif]-->
<!--[if gt IE 9]><html lang="en"><![endif]-->
<!--[if !IE]><!--><html lang="en"><!--<![endif]-->
<head>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<title>iTunesCharts.net: US Songs Monday, 14th February 2022</title>
<meta content="us songs itunes charts united states american current top 100 hundred latest itune charts ituens" name="keywords"/>
<meta content="US Songs Top 100 Chart for Monday, 14th February 2022. Number 1: The Next Episode (feat. Snoop Dogg) - Dr. Dre." name="description"/>
<link href="/assets/itunes_charts/i/favicon.ico" rel="shortcut icon" type="image/x-icon"/>
<meta content="291UlohElr2aD0C_CnfkP8CiuCvydvWASJOo9AWXPKg" name="google-site-verification">
<!--meta name="google-site-verification" content="gSkQgYZt2kn97sYfHDAdeu9v

<b> you can copy the CSS selector using Chrome inspector. Just select the element you want to extract  and right click and select copy selector 

In [131]:
soup.select("span.artist")[0].get_text().strip()

'Dr. Dre'

In [133]:
soup.select("span.entry > a")[0].get_text().strip()

'The Next Episode (feat. Snoop Dogg)'

#### Building the dataframe

In [140]:
#initialize empty lists
artist = []
title = []

In [141]:
# define the number of iterations of our for loop 
# by checking how many elements are in the retrieved result set
# (this is equivalent but more robust than just explicitly defining 250 iterations)
num_iter = len(soup.select("span.artist"))
print(num_iter)

95


In [142]:
len(soup.select("span.entry > a"))

95

In [145]:
# iterate through the result set and retrive all the data
for i in range(num_iter):
    artist.append(soup.select("span.artist")[i].get_text().strip()) ## getting movies titles
    title.append(soup.select("span.entry > a")[i].get_text().strip()) ## getting dir and actors names

In [146]:
artist

['Dr. Dre',
 'Dr. Dre',
 'Eminem',
 'Mary J. Blige',
 'Jung Kook',
 '2Pac',
 'Dr. Dre',
 'Dr. Dre',
 'Ed Sheeran',
 '50 Cent',
 'Jennifer Lopez',
 'Snoop Dogg',
 'Mary J. Blige',
 'Carolina Gaitán - La Gaita, Mauro Castillo, Adassa, Rhenzy Feliz, Diane Guerrero, Stephanie Bea',
 'Eminem',
 'Becky G. & KAROL G',
 'Electric Light Orchestra',
 'Mary J. Blige',
 'Jessica Darrow',
 'Gayle',
 'Elton John & Dua Lipa',
 'Snoop Dogg',
 'Eminem',
 'Nicki Minaj & Lil Baby',
 'Mary J. Blige',
 'Marvin Gaye & Tammi Terrell',
 'Ed Sheeran',
 'Sia',
 'Walker Hayes',
 'Snoop Dogg',
 'Eminem',
 'Baby Bash',
 'Cody Johnson',
 'Em Beihold',
 'ADELE',
 'Justin Bieber',
 'Kendrick Lamar',
 'Eminem',
 '2Pac featuring Dr. Dre & Roger Troutman',
 'Hi-Rez & Jimmy Levy',
 'Glass Animals',
 'Latto',
 'Katy Nichole',
 'Stephanie Beatriz, Olga Merediz & Encanto - Cast',
 'Dr. Dre',
 'Lil Nas X',
 'Jordan Davis',
 'Muni Long',
 'ADELE',
 'Walker Hayes',
 'Morgan Wallen',
 'Chris Stapleton',
 '2Pac',
 'Bruno Mars, A

In [147]:
title

['The Next Episode (feat. Snoop Dogg)',
 'Still D.R.E. (feat. Snoop Dogg)',
 'Lose Yourself',
 'Family Affair',
 'Stay Alive (Prod. by SUGA of BTS)',
 'California Love (feat. Roger Troutman & Dr. Dre)',
 'Forgot About Dre (feat. Eminem)',
 "Nuthin' but a G thang (feat. Snoop Dogg)",
 'The Joker And The Queen (feat. Taylor Swift)',
 'In Da Club',
 'On My Way (Marry Me)',
 'Gin and Juice (feat. Dat Nigga Daz)',
 'No More Drama',
 "We Don't Talk About Bruno",
 'The Real Slim Shady',
 'MAMIII',
 'Showdown',
 'Good Morning Gorgeous',
 'Surface Pressure',
 'abcdefu',
 'Cold Heart (PNAU Remix)',
 "Drop It Like It's Hot (feat. Pharrell Williams)",
 'Without Me',
 'Bussin',
 'Real Love',
 "Ain't No Mountain High Enough",
 'Shivers',
 'Unstoppable',
 'AA',
 "Who Am I (What's My Name)?",
 'Love the Way You Lie (feat. Rihanna)',
 'Suga Suga',
 "'Til You Can't",
 'Numb Little Bug',
 'Easy On Me',
 'Ghost',
 'Alright',
 'Not Afraid',
 'California Love',
 'God Over Government',
 'Heat Waves',
 'Big E

In [151]:
# each list becomes a column
itunes_charts = pd.DataFrame({"artist":artist,
                               "title":title})

itunes_charts.head()

Unnamed: 0,artist,title
0,Dr. Dre,The Next Episode (feat. Snoop Dogg)
1,Dr. Dre,Still D.R.E. (feat. Snoop Dogg)
2,Eminem,Lose Yourself
3,Mary J. Blige,Family Affair
4,Jung Kook,Stay Alive (Prod. by SUGA of BTS)


In [152]:
itunes_charts.to_csv("itunes_charts.csv",index=False)