# Module 12 Challenge
## Deliverable 2: Scrape and Analyze Mars Weather Data

In [9]:
# Import relevant libraries
from splinter import Browser
from bs4 import BeautifulSoup as soup
import matplotlib.pyplot as plt
import pandas as pd

In [10]:
browser = Browser('chrome')

### Step 1: Visit the Website

Use automated browsing to visit the [Mars Temperature Data Site](https://static.bc-edx.com/data/web/mars_facts/temperature.html). Inspect the page to identify which elements to scrape.

   > **Hint** To identify which elements to scrape, you might want to inspect the page by using Chrome DevTools to discover whether the table contains usable classes.


In [11]:
# Visit the website
# https://static.bc-edx.com/data/web/mars_facts/temperature.html
url = "https://static.bc-edx.com/data/web/mars_facts/temperature.html"
browser.visit(url)

### Step 2: Scrape the Table

Create a Beautiful Soup object and use it to scrape the data in the HTML table.

Note that this can also be achieved by using the Pandas `read_html` function. However, use Beautiful Soup here to continue sharpening your web scraping skills.

In [12]:
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd

# Use ChromeDriverManager to download and install the appropriate ChromeDriver version
driver = webdriver.Chrome(ChromeDriverManager().install())

AttributeError: 'str' object has no attribute 'capabilities'

In [None]:
# Step 1: Use automated browsing to visit the Mars Temperature Data Site
url = 'https://static.bc-edx.com/data/web/mars_facts/temperature.html'
driver.get(url)

# Get the page source after the page has fully loaded
html = driver.page_source

In [None]:
# Step 2: Create a BeautifulSoup object and use it to scrape the data in the HTML table
soup = BeautifulSoup(html, 'html.parser')
table = soup.find('table')

data = []
for row in table.find_all('tr'):
    cells = row.find_all(['th', 'td'])
    row_data = [cell.text.strip() for cell in cells]
    data.append(row_data)

In [None]:
# Step 3: Assemble the scraped data into a Pandas DataFrame
columns = ['id', 'terrestrial_date', 'sol', 'ls', 'month', 'min_temp', 'pressure']
df = pd.DataFrame(data[1:], columns=columns)  # Skip the header row

In [None]:
# Step 4: Examine and cast data types
df['terrestrial_date'] = pd.to_datetime(df['terrestrial_date'])
df['sol'] = df['sol'].astype(int)
df['ls'] = df['ls'].astype(float)
df['month'] = df['month'].astype(int)
df['min_temp'] = df['min_temp'].astype(float)
df['pressure'] = df['pressure'].astype(int)

In [None]:
# Step 5: Analyze the dataset
# Question 1: How many months exist on Mars?
num_months = df['month'].nunique()
print(f"Number of months on Mars: {num_months}")

# Question 2: How many Martian (and not Earth) days worth of data exist in the scraped dataset?
num_sol_days = df['sol'].nunique()
print(f"Number of Martian days in the dataset: {num_sol_days}")

# Question 3: Coldest and warmest months on Mars
average_min_temp_by_month = df.groupby('month')['min_temp'].mean()
average_min_temp_by_month.plot(kind='bar', xlabel='Month', ylabel='Average Min Temperature (C)', title='Average Min Temperature by Month')

# Question 4: Months with the lowest and highest atmospheric pressure on Mars
average_pressure_by_month = df.groupby('month')['pressure'].mean()
average_pressure_by_month.plot(kind='bar', xlabel='Month', ylabel='Average Pressure', title='Average Atmospheric Pressure by Month')

# Question 5: About how many terrestrial days exist in a Martian year?
df.plot(x='terrestrial_date', y='min_temp', xlabel='Terrestrial Date', ylabel='Min Temperature (C)', title='Daily Minimum Temperature over Time')

# Step 6: Export the DataFrame to a CSV file
df.to_csv('mars_weather_data.csv', index=False)

In [None]:
# Step 6: Export the DataFrame to a CSV file
df.to_csv('mars_weather_data.csv', index=False)

In [None]:
# Close the WebDriver
driver.quit()
