In [23]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [2]:
# URL of page to be scraped
url = 'https://www.landmarkcases.org/'

In [3]:
# Retrieve page with the requests module
response = requests.get(url)

In [4]:
# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')

In [5]:
# Examine the results, then determine element that contains sought info
print(soup.prettify())

<!DOCTYPE html>
<html>
 <head>
  <!-- Basic -->
  <meta charset="utf-8"/>
  <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
  <title>
   Landmark Supreme Court Cases
  </title>
  <meta content="index, follow" name="robots"/>
  <!-- Favicon -->
  <link href="/apple-touch-icon.png" rel="apple-touch-icon" sizes="180x180"/>
  <link href="/favicon-32x32.png" rel="icon" sizes="32x32" type="image/png"/>
  <link href="/favicon-16x16.png" rel="icon" sizes="16x16" type="image/png"/>
  <link href="/manifest.json" rel="manifest"/>
  <link color="#c52033" href="/safari-pinned-tab.svg" rel="mask-icon"/>
  <meta content="#ffffff" name="theme-color"/>
  <!-- Mobile Metas -->
  <meta content="width=device-width, minimum-scale=1.0, maximum-scale=1.0, user-scalable=no" name="viewport"/>
  <!-- Web Fonts  -->
  <link href="//fonts.googleapis.com/css?family=Open+Sans:300,400,600,700,800" rel="stylesheet" type="text/css"/>
  <script src="//use.fontawesome.com/a6c97bfcf3.js">
  </script>
  <!-- Vendor

In [8]:
# results are returned as an iterable list
results = soup.find_all('div', class_="post-content")
results

[<div class="post-content">
 <a href="/cases/brown-v-board-of-education" id="CaseRepeaterShort1_rptCases_ctl01_lnkReadMore">
 <h5>Brown v. Board of Education</h5>
 <p>School Segregation, Equal Protection</p>
 </a>
 </div>, <div class="post-content">
 <a href="/cases/dred-scott-v-sandford" id="CaseRepeaterShort1_rptCases_ctl02_lnkReadMore">
 <h5>Dred Scott v. Sandford</h5>
 <p>Slavery, Due Process, the Missouri Compromise</p>
 </a>
 </div>, <div class="post-content">
 <a href="/cases/gibbons-v-ogden" id="CaseRepeaterShort1_rptCases_ctl03_lnkReadMore">
 <h5>Gibbons v. Ogden</h5>
 <p>States Rights, Commerce Clause</p>
 </a>
 </div>, <div class="post-content">
 <a href="/cases/gideon-v-wainwright" id="CaseRepeaterShort1_rptCases_ctl04_lnkReadMore">
 <h5>Gideon v. Wainwright</h5>
 <p>Right to Counsel, Due Process</p>
 </a>
 </div>, <div class="post-content">
 <a href="/cases/hazelwood-v-kuhlmeier" id="CaseRepeaterShort1_rptCases_ctl05_lnkReadMore">
 <h5>Hazelwood v. Kuhlmeier</h5>
 <p>Censo

In [27]:
# Loop through returned results
titles = []
details = []
links = []

for result in results:
    # Error handling
    try:
        # Identify and return title of listing
        title = result.find('h5').text
        titles.append(title)
        # Identify and return price of listing
        detail = result.find('p').text
        details.append(detail)
        # Identify and return link to listing
        link = result.a['href']
        links.append(f'https://www.landmarkcases.org/{link}')

        # Print results only if title, price, and link are available
        if (title and detail and link):
            print('-------------')
            print(title)
            print(detail)
            print(f'https://www.landmarkcases.org/{link}')
    except AttributeError as e:
        print(e)

-------------
Brown v. Board of Education
School Segregation, Equal Protection
https://www.landmarkcases.org//cases/brown-v-board-of-education
-------------
Dred Scott v. Sandford
Slavery, Due Process, the Missouri Compromise
https://www.landmarkcases.org//cases/dred-scott-v-sandford
-------------
Gibbons v. Ogden
States Rights, Commerce Clause
https://www.landmarkcases.org//cases/gibbons-v-ogden
-------------
Gideon v. Wainwright
Right to Counsel, Due Process
https://www.landmarkcases.org//cases/gideon-v-wainwright
-------------
Hazelwood v. Kuhlmeier
Censorship, Student Press Rights
https://www.landmarkcases.org//cases/hazelwood-v-kuhlmeier
-------------
Korematsu v. United States
Japanese Internment, Equal Protection
https://www.landmarkcases.org//cases/korematsu-v-united-states
-------------
Mapp v. Ohio
Exclusionary Rule, Due Process
https://www.landmarkcases.org//cases/mapp-v-ohio
-------------
Marbury v. Madison
Judicial Review, Federalism
https://www.landmarkcases.org//cases/ma

In [19]:
titles

['Brown v. Board of Education',
 'Dred Scott v. Sandford',
 'Gibbons v. Ogden',
 'Gideon v. Wainwright',
 'Hazelwood v. Kuhlmeier',
 'Korematsu v. United States',
 'Mapp v. Ohio',
 'Marbury v. Madison',
 'McCulloch v. Maryland',
 'Miranda v. Arizona',
 'New Jersey v. T.L.O.',
 'Plessy v. Ferguson',
 'Regents of the U. of California v. Bakke',
 'Roe v. Wade',
 'Texas v. Johnson',
 'Tinker v. Des Moines',
 'United States v. Nixon']

In [20]:
cases = ["1953-069","1962-058","1987-019","1960-133","1965-122","1984-022","1977-147","1972-048","1988-124","1968-043","1973-172"]
cases

['1953-069',
 '1962-058',
 '1987-019',
 '1960-133',
 '1965-122',
 '1984-022',
 '1977-147',
 '1972-048',
 '1988-124',
 '1968-043',
 '1973-172']

In [39]:
df = pd.DataFrame(list(zip(titles, details, links, cases)), columns = ["Case", "Details", "Link", "CaseId"])
df.set_index("CaseId", inplace=True)
df.head()

Unnamed: 0_level_0,Case,Details,Link
CaseId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1953-069,Brown v. Board of Education,"School Segregation, Equal Protection",https://www.landmarkcases.org//cases/brown-v-b...
1962-058,Dred Scott v. Sandford,"Slavery, Due Process, the Missouri Compromise",https://www.landmarkcases.org//cases/dred-scot...
1987-019,Gibbons v. Ogden,"States Rights, Commerce Clause",https://www.landmarkcases.org//cases/gibbons-v...
1960-133,Gideon v. Wainwright,"Right to Counsel, Due Process",https://www.landmarkcases.org//cases/gideon-v-...
1965-122,Hazelwood v. Kuhlmeier,"Censorship, Student Press Rights",https://www.landmarkcases.org//cases/hazelwood...


In [40]:
df.to_csv("landmark.csv", index=True)