# Web Scrapping Challenge
Code by: Henry Greyner

In [26]:
# Import dependencies
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd
import requests

In [37]:
# Excute the chromedriver and start Chrome
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

## NASA Mars News

In [28]:
# Specify URL
url = 'http://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
browser.visit(url)

In [29]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Code to find the Title and Paragraph of first news
news = soup.find('div', class_='list_text')
news_title = news.find('div', class_='content_title').text
news_p = news.find('div', class_='article_teaser_body').text

In [30]:
print(news_title)

Virginia Middle School Student Earns Honor of Naming NASA's Next Mars Rover


In [31]:
print(news_p)

NASA chose a seventh-grader from Virginia as winner of the agency's "Name the Rover" essay contest. Alexander Mather's entry for "Perseverance" was voted tops among 28,000 entries. 


## JPL Mars Space Images - Featured Image

In [42]:
# Specify URL
url2 = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url2)
# Click on "Full Image"
browser.click_link_by_partial_text('FULL IMAGE')

In [43]:
# Click on "more info"
browser.click_link_by_partial_text('more info')

In [44]:
html = browser.html
soup2 = BeautifulSoup(html, 'html.parser')

# Find url using parsing by beautiful soup
featured_image = soup2.find('figure', class_='lede').a['href']
print(featured_image)

/spaceimages/images/largesize/PIA18848_hires.jpg


In [10]:
# Compose url with initial url and image url
featured_image_url = f'https://www.jpl.nasa.gov/spaceimages{featured_image}'
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/spaceimages/images/largesize/PIA19964_hires.jpg


## Mars Weather

In [11]:
# Specify URL
url3 = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url3)

In [16]:
tweet_html_content = requests.get(url3).text
soup = BeautifulSoup(tweet_html_content, "lxml")
tweet_list = soup.find_all('div', class_="js-tweet-text-container")
#empty list to hold tweet we are going to keep, used to strip useless content from string
holds_tweet = []
# Loop that scans every tweet and searches specifically for those that have weather info
for tweets in tweet_list: 
    tweet_body = tweets.find('p').text
    if 'InSight' and 'sol' in tweet_body:
        holds_tweet.append(tweet_body)
        #break statement to only print the first weather tweet found
        break
    else: 
        #if not weather related skip it and try again
        pass
    
#cleaned up tweet removes unncessary link to twitter image included in string, :-26 removes the last 26 characters which is the length of the img url
#after reviewing several links they all appear to work with the value of -26
mars_weather = ([holds_tweet[0]][0][:-26])
tweet_img_link = ([holds_tweet[0]][0][-26:])
print(f"{mars_weather}: {tweet_img_link}")

InSight sol 453 (2020-03-05) low -95.1ºC (-139.1ºF) high -10.8ºC (12.6ºF)
winds from the SSW at 6.0 m/s (13.3 mph) gusting to 21.4 m/s (47.9 mph)
pressure at 6.30 hPa: pic.twitter.com/8nEk9RxzXk


## Mars Facts

In [17]:
# Specify URL
url4 = 'https://space-facts.com/mars/'

In [18]:
# Using Pandas to scrape the table containing facts
mars_table = pd.read_html(url4)[0]
mars_table

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [19]:
# Renaming the table columns
mars_table.columns=["Description", "Value"]
mars_table

Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [20]:
# Making the Description the Index
mars_table.set_index('Description', inplace=True)
mars_table

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [21]:
# Exporting dataframe to html table
mars_table.to_html('mars_fact.html')

## Mars Hemispheres

In [22]:
# Specify URL
url5 = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url5)

In [23]:
html = browser.html
soup5 = BeautifulSoup(html, 'html.parser')

In [24]:
# Find all information about the items on the page
all_items = soup5.find_all('div', class_='item')
# Create empty list for hemisphere urls 
list_of_urls = []
# Assign a variable named main_url 
main_url = 'https://astrogeology.usgs.gov'
# Loop through the items previously stored
for i in all_items: 
    # Store title
    title = i.find('h3').get_text()
    # Assign url to a variable that leads to the page where full images are stored
    source_img_url = i.find('a', class_='itemLink product-item')['href']
    # Visit the page where full images are stored 
    browser.visit(main_url + source_img_url)
    # HTML Object
    source_img_html = browser.html
    # Parse HTML with Beautiful Soup
    soup6 = BeautifulSoup(source_img_html, 'html.parser')
    # Pull full image source url information
    img_url = main_url + soup6.find('img', class_='wide-image')['src']
    # Append the retrieved information into a list of dictionaries 
    list_of_urls.append({"title" : title, "img_url" : img_url})
# Display title and image urls of the items
list_of_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]