In [10]:
#Scrape the data in the HTML table. To do so, choose one of two ways. 
#The first, simpler way is to use Pandas's read_html method. 
#The second, slightly more challenging way is to manually scrape the data by using Splinter and Beautiful Soup. We highly encourage you to choose the latter to reinforce your scraping skills.

%pip install splinter[selenium3]
from splinter import Browser
from bs4 import BeautifulSoup as soup
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd

Note: you may need to restart the kernel to use updated packages.


In [11]:
# Set up Splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

#Visit Mars data site:
url='https://data-class-mars-challenge.s3.amazonaws.com/Mars/index.html'
browser.visit(url)

html=browser.html
html_soup=soup(html, 'html.parser')

In [12]:
results = []
for table in html_soup.find_all('table', class_='table'):
    extracted = table.find('tbody') 
    if extracted not in results:
        try:
            results.append(extracted.text)
        except AttributeError:
            pass



In [13]:
#Create Dataframe:
#The id heading: The identification number of a single transmission from the Curiosity rover.
#The terrestrial_date heading: The date on Earth.
#The sol heading: The number of elapsed sols (Martian days) since Curiosity landed on Mars.
#The ls heading: The solar longitude.
#The month heading: The Martian month.
#The min_temp heading: The minimum temperature, in Celsius, of a single Martian day (sol).
#The pressure heading: The atmospheric pressure at Curiosity's location.
import pandas as pd
mars_lt = pd.read_html('https://data-class-mars-challenge.s3.amazonaws.com/Mars/index.html')
mars_lt


[        id terrestrial_date   sol   ls  month  min_temp  pressure
 0        2       2012-08-16    10  155      6     -75.0     739.0
 1       13       2012-08-17    11  156      6     -76.0     740.0
 2       24       2012-08-18    12  156      6     -76.0     741.0
 3       35       2012-08-19    13  157      6     -74.0     732.0
 4       46       2012-08-20    14  157      6     -74.0     740.0
 ...    ...              ...   ...  ...    ...       ...       ...
 1862  1889       2018-02-23  1973  133      5     -78.0     730.0
 1863  1892       2018-02-24  1974  134      5     -77.0     729.0
 1864  1894       2018-02-25  1975  134      5     -76.0     729.0
 1865  1893       2018-02-26  1976  135      5     -77.0     728.0
 1866  1895       2018-02-27  1977  135      5     -77.0     727.0
 
 [1867 rows x 7 columns]]

In [35]:
import numpy as np
np.array(mars_lt)
mars_np=np.reshape(mars_lt,(1867,7))
mars_df=pd.DataFrame(np.array(mars_np),columns=['The identification number of a single transmission from the Curiosity rover', 
                 'The date on Earth', 
                 'The number of elapsed sols (Martian days) since Curiosity landed on Mars', 
                 'The solar longitude',
                 'The Martian month',
                 'The minimum temperature, in Celsius, of a single Martian day (sol)',
                 'The atmospheric pressure at Curiosity Location'])

In [15]:
mars_df

Unnamed: 0,The identification number of a single transmission from the Curiosity rover,The date on Earth,The number of elapsed sols (Martian days) since Curiosity landed on Mars,The solar longitude,The Martian month,"The minimum temperature, in Celsius, of a single Martian day (sol)",The atmospheric pressure at Curiosity Location
0,2,2012-08-16,10,155,6,-75.0,739.0
1,13,2012-08-17,11,156,6,-76.0,740.0
2,24,2012-08-18,12,156,6,-76.0,741.0
3,35,2012-08-19,13,157,6,-74.0,732.0
4,46,2012-08-20,14,157,6,-74.0,740.0
...,...,...,...,...,...,...,...
1862,1889,2018-02-23,1973,133,5,-78.0,730.0
1863,1892,2018-02-24,1974,134,5,-77.0,729.0
1864,1894,2018-02-25,1975,134,5,-76.0,729.0
1865,1893,2018-02-26,1976,135,5,-77.0,728.0


In [17]:
#Export dataframe to CSV
mars_df.to_csv('Mars_data_fact.csv')

In [49]:
#How many months exist on Mars?

mth_mars=mars_df.groupby(['The Martian month']).size()
display(mth_mars)


The Martian month
1     174
2     178
3     192
4     194
5     149
6     147
7     142
8     141
9     134
10    112
11    138
12    166
dtype: int64

In [30]:
#Which month, on average, has the lowest temperature? The highest?
temp_mars=mars_df.groupby('The Martian month', as_index=False)['The minimum temperature, in Celsius, of a single Martian day (sol)'].mean()
temp_mars

Unnamed: 0,The Martian month,"The minimum temperature, in Celsius, of a single Martian day (sol)"
0,1,-77.16092
1,2,-79.932584
2,3,-83.307292
3,4,-82.747423
4,5,-79.308725
5,6,-75.29932
6,7,-72.28169
7,8,-68.382979
8,9,-69.171642
9,10,-71.982143


In [31]:
#Which month, on average, has the lowest atmospheric pressure? The highest?
prs_mars=mars_df.groupby('The Martian month', as_index=False)['The atmospheric pressure at Curiosity Location'].mean()
prs_mars

Unnamed: 0,The Martian month,The atmospheric pressure at Curiosity Location
0,1,862.488506
1,2,889.455056
2,3,877.322917
3,4,806.329897
4,5,748.557047
5,6,745.054422
6,7,795.105634
7,8,873.829787
8,9,913.30597
9,10,887.3125


In [61]:
#How many terrestrial days exist in a Martian year? A visual estimate within 25% was made
earth_days=mars_df.groupby('The solar longitude', as_index=False)['The date on Earth', 'The solar longitude'].count()
earth_days

  earth_days=mars_df.groupby('The solar longitude', as_index=False)['The date on Earth', 'The solar longitude'].count()


Unnamed: 0,The date on Earth,The solar longitude
0,6,6
1,5,5
2,6,6
3,5,5
4,3,3
...,...,...
355,6,6
356,6,6
357,6,6
358,5,5
