In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo
from splinter import Browser
from flask import Flask, render_template, redirect
from flask_pymongo import PyMongo
import pandas as pd
import time

In [2]:
 executable_path = {'executable_path': 'chromedriver.exe'}
 browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# NASA Mars News
news_url = 'https://redplanetscience.com/'
browser.visit(news_url)
html = browser.html
soup = BeautifulSoup(html,'html.parser')

In [4]:
# Retrieve the latest news title
news_title = soup.find_all('div', class_='content_title')[0].text
# Retrieve the latest news paragraph
news_p = soup.find_all('div', class_='article_teaser_body')[0].text

# Print the Latest title and its first paragraph
print(news_title)
print(f"------------------------------------------------")
print(news_p)

NASA's Mars Helicopter Attached to Mars 2020 Rover 
------------------------------------------------
The helicopter will be first aircraft to perform flight tests on another planet.


In [5]:
# JPL Mars Space Images - Featured Image
space_image_url = 'https://spaceimages-mars.com/'
browser.visit(space_image_url)
html = browser.html
soup = BeautifulSoup(html,'html.parser')

In [6]:
feature_img = soup.find(class_= 'headerimage fade-in')
print(feature_img['src'])

image/featured/mars3.jpg


In [7]:
feature_image_url = space_image_url + feature_img['src']
print(feature_image_url)

https://spaceimages-mars.com/image/featured/mars3.jpg


In [8]:
# Mars Facts
facts_url = 'https://galaxyfacts-mars.com/'
browser.visit(facts_url)
html = browser.html
soup = BeautifulSoup(html,'html.parser')

# Use Pandas to scrape table of facts
tables = pd.read_html(facts_url)
tables

[                         0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:          2 ( Phobos & Deimos )
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC

In [9]:
# Use indexing to slice the table to a dataframe
facts_df = tables[1]
facts_df.columns =['Description', 'Value']
facts_df

Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 ( Phobos & Deimos )
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [10]:
# Convert the dataframe to a HTML table and save to html file
facts_table = facts_df.to_html()
facts_table.replace('\n','')
print(facts_table)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Description</th>
      <th>Value</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>0</th>
      <td>Equatorial Diameter:</td>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>1</th>
      <td>Polar Diameter:</td>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>2</th>
      <td>Mass:</td>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <th>3</th>
      <td>Moons:</td>
      <td>2 ( Phobos &amp; Deimos )</td>
    </tr>
    <tr>
      <th>4</th>
      <td>Orbit Distance:</td>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <th>5</th>
      <td>Orbit Period:</td>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>6</th>
      <td>Surface Temperature:</td>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <th>7</th>
      <td>First Record:</td>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>8</th>
  

In [11]:
hemi_image_url = 'https://marshemispheres.com/'
browser.visit(hemi_image_url)
html = browser.html
soup = BeautifulSoup(html,'html.parser')

In [12]:
# Collect the urls for the hemisphere images
items = soup.find_all("div", class_="item")

main_url = 'https://marshemispheres.com/'
hemisphere_urls = []

for item in items:
    hemisphere_urls.append(f"{main_url}{item.find('a', class_='itemLink')['href']}")

print(*hemisphere_urls, sep = "\n") 

https://marshemispheres.com/cerberus.html
https://marshemispheres.com/schiaparelli.html
https://marshemispheres.com/syrtis.html
https://marshemispheres.com/valles.html


In [13]:
# Create a list to store the data
hemisphere_image_urls = []

# Loop through each url
for url in hemisphere_urls:
    # Navigate to the page
    browser.visit(url)
    
    # Assign the HTML content of the page to a variable
    
    hemisphere_html = browser.html
    
    # Parse HTML with Beautifulsoup
    
    soup = BeautifulSoup(hemisphere_html,'html.parser')
    
    img_url = soup.find('img', class_="wide-image")['src']
    title = soup.find('h2', class_="title").text
    
    hemisphere_image_urls.append({"title":title,"img_url":f"https://marshemispheres.com{img_url}"})

In [14]:
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.comimages/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.comimages/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.comimages/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.comimages/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]

In [15]:
!pip install selenium



In [16]:
!pip install webdriver-manager

Collecting webdriver-manager
  Using cached webdriver_manager-3.7.0-py2.py3-none-any.whl (25 kB)
Collecting python-dotenv
  Using cached python_dotenv-0.20.0-py3-none-any.whl (17 kB)
Installing collected packages: python-dotenv, webdriver-manager
Successfully installed python-dotenv-0.20.0 webdriver-manager-3.7.0


In [17]:
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager

In [18]:
driver = webdriver.Chrome(ChromeDriverManager().install())




[WDM] - Current google-chrome version is 103.0.5060
INFO:WDM:Current google-chrome version is 103.0.5060
[WDM] - Get LATEST chromedriver version for 103.0.5060 google-chrome
INFO:WDM:Get LATEST chromedriver version for 103.0.5060 google-chrome
[WDM] - There is no [win32] chromedriver for browser 103.0.5060 in cache
INFO:WDM:There is no [win32] chromedriver for browser 103.0.5060 in cache
[WDM] - About to download new driver from https://chromedriver.storage.googleapis.com/103.0.5060.53/chromedriver_win32.zip
INFO:WDM:About to download new driver from https://chromedriver.storage.googleapis.com/103.0.5060.53/chromedriver_win32.zip
[WDM] - Driver has been saved in cache [C:\Users\User\.wdm\drivers\chromedriver\win32\103.0.5060.53]
INFO:WDM:Driver has been saved in cache [C:\Users\User\.wdm\drivers\chromedriver\win32\103.0.5060.53]
  driver = webdriver.Chrome(ChromeDriverManager().install())
