In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [2]:
predictions = 'case_predictions.csv'

In [3]:
case_predictions = pd.read_csv(predictions, encoding = "latin1")
case_predictions.head()

Unnamed: 0,caseName,caseId,justice_106,justice_108,justice_109,justice_110,justice_111,justice_112,justice_113,justice_114,justice_115,predictedDirection,decisionDirection
0,BROWN et al. v. BOARD OF EDUCATION OF TOPEKA e...,1953-069,1,1,2,2,1,1,2,2,1,1,2.0
1,MAPP v. OHIO,1960-133,1,2,2,1,1,1,1,2,1,1,2.0
2,"GIDEON v. WAINWRIGHT, CORRECTIONS DIRECTOR",1962-058,1,1,2,1,1,1,2,2,1,1,2.0
3,MIRANDA v. ARIZONA,1965-122,1,1,2,2,1,1,2,2,1,1,2.0
4,TINKER et al. v. DES MOINES INDEPENDENT COMMUN...,1968-043,1,1,2,2,1,1,2,2,1,1,2.0


In [4]:
case_predictions.set_index("caseId", inplace=True)
case_predictions

Unnamed: 0_level_0,caseName,justice_106,justice_108,justice_109,justice_110,justice_111,justice_112,justice_113,justice_114,justice_115,predictedDirection,decisionDirection
caseId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1953-069,BROWN et al. v. BOARD OF EDUCATION OF TOPEKA e...,1,1,2,2,1,1,2,2,1,1,2.0
1960-133,MAPP v. OHIO,1,2,2,1,1,1,1,2,1,1,2.0
1962-058,"GIDEON v. WAINWRIGHT, CORRECTIONS DIRECTOR",1,1,2,1,1,1,2,2,1,1,2.0
1965-122,MIRANDA v. ARIZONA,1,1,2,2,1,1,2,2,1,1,2.0
1968-043,TINKER et al. v. DES MOINES INDEPENDENT COMMUN...,1,1,2,2,1,1,2,2,1,1,2.0
1972-048,"ROE et al. v. WADE, DISTRICT ATTORNEY OF DALLA...",1,1,2,1,1,1,1,2,1,1,2.0
1973-172,"UNITED STATES v. NIXON, PRESIDENT OF THE UNITE...",1,1,2,1,1,1,2,1,1,1,1.0
1977-147,REGENTS OF THE UNIVERSITY OF CALIFORNIA v. BAKKE,1,1,2,2,1,1,2,1,1,1,1.0
1984-022,NEW JERSEY v. T. L. O.,1,2,2,2,2,2,2,1,1,2,1.0
1987-019,HAZELWOOD SCHOOL DISTRICT et al. v. KUHLMEIER ...,1,1,2,2,1,1,2,2,1,1,1.0


In [5]:
# URL of page to be scraped
url = 'https://www.landmarkcases.org/'

In [6]:
# Retrieve page with the requests module
response = requests.get(url)

In [7]:
# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')

In [8]:
# Examine the results, then determine element that contains sought info
print(soup.prettify())

<!DOCTYPE html>
<html>
 <head>
  <!-- Basic -->
  <meta charset="utf-8"/>
  <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
  <title>
   Landmark Supreme Court Cases
  </title>
  <meta content="index, follow" name="robots"/>
  <!-- Favicon -->
  <link href="/apple-touch-icon.png" rel="apple-touch-icon" sizes="180x180"/>
  <link href="/favicon-32x32.png" rel="icon" sizes="32x32" type="image/png"/>
  <link href="/favicon-16x16.png" rel="icon" sizes="16x16" type="image/png"/>
  <link href="/manifest.json" rel="manifest"/>
  <link color="#c52033" href="/safari-pinned-tab.svg" rel="mask-icon"/>
  <meta content="#ffffff" name="theme-color"/>
  <!-- Mobile Metas -->
  <meta content="width=device-width, minimum-scale=1.0, maximum-scale=1.0, user-scalable=no" name="viewport"/>
  <!-- Web Fonts  -->
  <link href="//fonts.googleapis.com/css?family=Open+Sans:300,400,600,700,800" rel="stylesheet" type="text/css"/>
  <script src="//use.fontawesome.com/a6c97bfcf3.js">
  </script>
  <!-- Vendor

In [9]:
# results are returned as an iterable list
results = soup.find_all('div', class_="post-content")

In [10]:
# Create empty lists to push values to
titles = []
details = []
links = []

# Create an array of case names to use for the scrape
titles_use = ["Brown v. Board of Education", "Gideon v. Wainwright", "Hazelwood v. Kuhlmeier", "Mapp v. Ohio", "Miranda v. Arizona", "New Jersey v. T.L.O.", "Regents of the U. of California v. Bakke", "Roe v. Wade", "Texas v. Johnson", "Tinker v. Des Moines", "United States v. Nixon"] 

# Loop through returned results
for result in results:
    # Error handling
    try:
        # Identify and return case title
        title = result.find('h5').text
        if title in titles_use:
            titles.append(title)
        # Identify and return detail of case
        detail = result.find('p').text
        if title in titles_use:
            details.append(detail)
        # Identify and return link full case details
        link = result.a['href']
        if title in titles_use:
            links.append(f'https://www.landmarkcases.org{link}')

        # Print results only if title, detail, and link are available
        if (title and detail and link):
            print('-------------')
            print(title)
            print(detail)
            print(f'https://www.landmarkcases.org{link}')
    except AttributeError as e:
        print(e)

-------------
Brown v. Board of Education
School Segregation, Equal Protection
https://www.landmarkcases.org/cases/brown-v-board-of-education
-------------
Dred Scott v. Sandford
Slavery, Due Process, the Missouri Compromise
https://www.landmarkcases.org/cases/dred-scott-v-sandford
-------------
Gibbons v. Ogden
States Rights, Commerce Clause
https://www.landmarkcases.org/cases/gibbons-v-ogden
-------------
Gideon v. Wainwright
Right to Counsel, Due Process
https://www.landmarkcases.org/cases/gideon-v-wainwright
-------------
Hazelwood v. Kuhlmeier
Censorship, Student Press Rights
https://www.landmarkcases.org/cases/hazelwood-v-kuhlmeier
-------------
Korematsu v. United States
Japanese Internment, Equal Protection
https://www.landmarkcases.org/cases/korematsu-v-united-states
-------------
Mapp v. Ohio
Exclusionary Rule, Due Process
https://www.landmarkcases.org/cases/mapp-v-ohio
-------------
Marbury v. Madison
Judicial Review, Federalism
https://www.landmarkcases.org/cases/marbury-v-

In [11]:
# Set an array of case numbers to zip to case data
cases = ["1953-069","1962-058","1987-019","1960-133","1965-122","1984-022","1977-147","1972-048","1988-124","1968-043","1973-172"]

In [12]:
# Create a dataframe with the zipped information
df = pd.DataFrame(list(zip(titles, details, links, cases)), columns = ["Case", "Details", "Link", "CaseId"])
df.set_index("CaseId", inplace=True)
df

Unnamed: 0_level_0,Case,Details,Link
CaseId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1953-069,Brown v. Board of Education,"School Segregation, Equal Protection",https://www.landmarkcases.org/cases/brown-v-bo...
1962-058,Gideon v. Wainwright,"Right to Counsel, Due Process",https://www.landmarkcases.org/cases/gideon-v-w...
1987-019,Hazelwood v. Kuhlmeier,"Censorship, Student Press Rights",https://www.landmarkcases.org/cases/hazelwood-...
1960-133,Mapp v. Ohio,"Exclusionary Rule, Due Process",https://www.landmarkcases.org/cases/mapp-v-ohio
1965-122,Miranda v. Arizona,"Self-Incrimination, Due Process",https://www.landmarkcases.org/cases/miranda-v-...
1984-022,New Jersey v. T.L.O.,Student Search & Seizure,https://www.landmarkcases.org/cases/new-jersey...
1977-147,Regents of the U. of California v. Bakke,"Affirmative Action, Equal Protection",https://www.landmarkcases.org/cases/regents-of...
1972-048,Roe v. Wade,"Abortion, Right to Privacy",https://www.landmarkcases.org/cases/roe-v-wade
1988-124,Texas v. Johnson,"Flag Burning, Freedom of Speech",https://www.landmarkcases.org/cases/texas-v-jo...
1968-043,Tinker v. Des Moines,"Student Speech, Symbolic Speech",https://www.landmarkcases.org/cases/tinker-v-d...


In [14]:
full = pd.concat([df, case_predictions], axis=1, join_axes=[df.index])
full

Unnamed: 0_level_0,Case,Details,Link,caseName,justice_106,justice_108,justice_109,justice_110,justice_111,justice_112,justice_113,justice_114,justice_115,predictedDirection,decisionDirection
CaseId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1953-069,Brown v. Board of Education,"School Segregation, Equal Protection",https://www.landmarkcases.org/cases/brown-v-bo...,BROWN et al. v. BOARD OF EDUCATION OF TOPEKA e...,1,1,2,2,1,1,2,2,1,1,2.0
1962-058,Gideon v. Wainwright,"Right to Counsel, Due Process",https://www.landmarkcases.org/cases/gideon-v-w...,"GIDEON v. WAINWRIGHT, CORRECTIONS DIRECTOR",1,1,2,1,1,1,2,2,1,1,2.0
1987-019,Hazelwood v. Kuhlmeier,"Censorship, Student Press Rights",https://www.landmarkcases.org/cases/hazelwood-...,HAZELWOOD SCHOOL DISTRICT et al. v. KUHLMEIER ...,1,1,2,2,1,1,2,2,1,1,1.0
1960-133,Mapp v. Ohio,"Exclusionary Rule, Due Process",https://www.landmarkcases.org/cases/mapp-v-ohio,MAPP v. OHIO,1,2,2,1,1,1,1,2,1,1,2.0
1965-122,Miranda v. Arizona,"Self-Incrimination, Due Process",https://www.landmarkcases.org/cases/miranda-v-...,MIRANDA v. ARIZONA,1,1,2,2,1,1,2,2,1,1,2.0
1984-022,New Jersey v. T.L.O.,Student Search & Seizure,https://www.landmarkcases.org/cases/new-jersey...,NEW JERSEY v. T. L. O.,1,2,2,2,2,2,2,1,1,2,1.0
1977-147,Regents of the U. of California v. Bakke,"Affirmative Action, Equal Protection",https://www.landmarkcases.org/cases/regents-of...,REGENTS OF THE UNIVERSITY OF CALIFORNIA v. BAKKE,1,1,2,2,1,1,2,1,1,1,1.0
1972-048,Roe v. Wade,"Abortion, Right to Privacy",https://www.landmarkcases.org/cases/roe-v-wade,"ROE et al. v. WADE, DISTRICT ATTORNEY OF DALLA...",1,1,2,1,1,1,1,2,1,1,2.0
1988-124,Texas v. Johnson,"Flag Burning, Freedom of Speech",https://www.landmarkcases.org/cases/texas-v-jo...,TEXAS v. JOHNSON,1,1,2,2,1,1,2,2,1,1,2.0
1968-043,Tinker v. Des Moines,"Student Speech, Symbolic Speech",https://www.landmarkcases.org/cases/tinker-v-d...,TINKER et al. v. DES MOINES INDEPENDENT COMMUN...,1,1,2,2,1,1,2,2,1,1,2.0


In [15]:
full.drop(columns=["Case"], inplace=True)
full.head()

Unnamed: 0_level_0,Details,Link,caseName,justice_106,justice_108,justice_109,justice_110,justice_111,justice_112,justice_113,justice_114,justice_115,predictedDirection,decisionDirection
CaseId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1953-069,"School Segregation, Equal Protection",https://www.landmarkcases.org/cases/brown-v-bo...,BROWN et al. v. BOARD OF EDUCATION OF TOPEKA e...,1,1,2,2,1,1,2,2,1,1,2.0
1962-058,"Right to Counsel, Due Process",https://www.landmarkcases.org/cases/gideon-v-w...,"GIDEON v. WAINWRIGHT, CORRECTIONS DIRECTOR",1,1,2,1,1,1,2,2,1,1,2.0
1987-019,"Censorship, Student Press Rights",https://www.landmarkcases.org/cases/hazelwood-...,HAZELWOOD SCHOOL DISTRICT et al. v. KUHLMEIER ...,1,1,2,2,1,1,2,2,1,1,1.0
1960-133,"Exclusionary Rule, Due Process",https://www.landmarkcases.org/cases/mapp-v-ohio,MAPP v. OHIO,1,2,2,1,1,1,1,2,1,1,2.0
1965-122,"Self-Incrimination, Due Process",https://www.landmarkcases.org/cases/miranda-v-...,MIRANDA v. ARIZONA,1,1,2,2,1,1,2,2,1,1,2.0


In [16]:
full = full[["caseName", "Details", "Link", "justice_106", "justice_108", "justice_109", "justice_110", "justice_111", "justice_112", "justice_113", "justice_114", "justice_115", "predictedDirection", "decisionDirection"]]
full

Unnamed: 0_level_0,caseName,Details,Link,justice_106,justice_108,justice_109,justice_110,justice_111,justice_112,justice_113,justice_114,justice_115,predictedDirection,decisionDirection
CaseId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1953-069,BROWN et al. v. BOARD OF EDUCATION OF TOPEKA e...,"School Segregation, Equal Protection",https://www.landmarkcases.org/cases/brown-v-bo...,1,1,2,2,1,1,2,2,1,1,2.0
1962-058,"GIDEON v. WAINWRIGHT, CORRECTIONS DIRECTOR","Right to Counsel, Due Process",https://www.landmarkcases.org/cases/gideon-v-w...,1,1,2,1,1,1,2,2,1,1,2.0
1987-019,HAZELWOOD SCHOOL DISTRICT et al. v. KUHLMEIER ...,"Censorship, Student Press Rights",https://www.landmarkcases.org/cases/hazelwood-...,1,1,2,2,1,1,2,2,1,1,1.0
1960-133,MAPP v. OHIO,"Exclusionary Rule, Due Process",https://www.landmarkcases.org/cases/mapp-v-ohio,1,2,2,1,1,1,1,2,1,1,2.0
1965-122,MIRANDA v. ARIZONA,"Self-Incrimination, Due Process",https://www.landmarkcases.org/cases/miranda-v-...,1,1,2,2,1,1,2,2,1,1,2.0
1984-022,NEW JERSEY v. T. L. O.,Student Search & Seizure,https://www.landmarkcases.org/cases/new-jersey...,1,2,2,2,2,2,2,1,1,2,1.0
1977-147,REGENTS OF THE UNIVERSITY OF CALIFORNIA v. BAKKE,"Affirmative Action, Equal Protection",https://www.landmarkcases.org/cases/regents-of...,1,1,2,2,1,1,2,1,1,1,1.0
1972-048,"ROE et al. v. WADE, DISTRICT ATTORNEY OF DALLA...","Abortion, Right to Privacy",https://www.landmarkcases.org/cases/roe-v-wade,1,1,2,1,1,1,1,2,1,1,2.0
1988-124,TEXAS v. JOHNSON,"Flag Burning, Freedom of Speech",https://www.landmarkcases.org/cases/texas-v-jo...,1,1,2,2,1,1,2,2,1,1,2.0
1968-043,TINKER et al. v. DES MOINES INDEPENDENT COMMUN...,"Student Speech, Symbolic Speech",https://www.landmarkcases.org/cases/tinker-v-d...,1,1,2,2,1,1,2,2,1,1,2.0


In [18]:
# Save to a csv
full.to_csv("predict_details.csv", index=True)