# Deliverable 2

Scrape and Analyze Mars Weather Data

In [60]:
'''Scrape the Mars Temperature website by using Splinter and Beautiful Soup.'''

# Import Splinter and BeautifulSoup
from splinter import Browser
from bs4 import BeautifulSoup as soup
from webdriver_manager.chrome import ChromeDriverManager

# Set the executable path 
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

# Visit the Mars temperature site
url = 'https://data-class-mars-challenge.s3.amazonaws.com/Mars/index.html'
browser.visit(url)

html = browser.html
html_soup = soup(html, 'html.parser')

In [61]:
import pandas as pd

# Find the table
table = html_soup.find('table', class_='table')

# Find the headers and add to a list
headers_items = table.find_all('th')
headers_list = [headers.text for headers in headers_items]

#Create an empty dataframe with the appropriate columns
data = pd.DataFrame(columns = headers_list)

# Find all the rows
rows = table.find_all('tr')

# Create for loop to iterate through all the rows in the table 
for values in rows[1:]: # Row starts under the tag 'tr'
    row_data = values.find_all('td') # Find all rows
    row = [value.text for value in row_data] # Add row data to a list
    length = len(data) # Find the number off columns in the data dataframe
    data.loc[length] = row # Add the data along the length of the dataframe

# View dataframe
data.head()

Unnamed: 0,id,terrestrial_date,sol,ls,month,min_temp,pressure
0,2,2012-08-16,10,155,6,-75.0,739.0
1,13,2012-08-17,11,156,6,-76.0,740.0
2,24,2012-08-18,12,156,6,-76.0,741.0
3,35,2012-08-19,13,157,6,-74.0,732.0
4,46,2012-08-20,14,157,6,-74.0,740.0


In [62]:
# Check data types of values in each column
data.dtypes

id                  object
terrestrial_date    object
sol                 object
ls                  object
month               object
min_temp            object
pressure            object
dtype: object

In [63]:
# Convert the data types to their appropriate types:
data_convert = data.astype({'id': int, 'sol': int, 'ls': int, 'min_temp': float, 'pressure': float})
data_convert['terrestrial_date'] = pd.to_datetime(data_convert['terrestrial_date'])

data_convert.dtypes

id                           int32
terrestrial_date    datetime64[ns]
sol                          int32
ls                           int32
month                       object
min_temp                   float64
pressure                   float64
dtype: object

In [64]:
browser.quit()