# MarsToday: evaluating Mars climate through REMS sensor onboard Curiosity Mars rover

## Import box

In [78]:
# General Imports

from Functions import *

In [2]:
#Selenium imports

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support import expected_conditions as EC
import time

## Functions box

In [4]:
# Function to remove undesired columns

def remove_columns(df, column_name):
    
    """
    This is a function that removes undesired columns. Requires two arguments.
    Arguments: dataframe, column name
    Input: the current dataframe
    Output: the current dataframe without the selected columns
    """
    
    df.drop(columns=f"{column_name}", inplace=True)
    
    return df.sample(2)

In [5]:
# Function to rename columns

def rename_columns(df, old_name, new_name):
    
    """
    This a functions that renames the name of any given columns. Requires three arguments.
    Arguments: dataframe, old name of the column, new name of the column.
    Input: the current column name
    Output: the column renamed
    """
    
    df.rename(columns={f"{old_name}": f"{new_name}"}, inplace=True)
    return df.sample(2)

In [6]:
# Function to clean the atmosphere column

def clean_atmosphere(df,column_name,string,replacement):
    
    """
    This a function that cleans the Atmospheric opacity columns by replacing its elements.
    Requires three arguments.
    Arguments: dataframe, column name, string to replace, new string.
    Input: any string
    Output: a string
    """
    
    df[f"{column_name}"] = df[f"{column_name}"].replace(f"{string}",f"{replacement}")    
    return df.sample(2)

In [7]:
# Function to clean the month column

def clean_month(df,column_name):
    
    """
    This is a function that cleans the Month column. Requires two arguments. Removes unwanted strings and only keeps the value
    Arguments: dataframe, column name
    Input: string + digit
    Output: digit
    """
    
    df[f"{column_name}"] = df[f"{column_name}"].str.extract(r"(\d)")    
    return df.sample(2)

In [8]:
# Function to convert degrees Fahrenheit to degrees Celsius

def FtoC(df, column_name):
    
    """
    This is a function that converts any temperature in degrees Fahrenheit to degrees Celsius.
    Requires two arguments.
    Arguments: dataframe, column name
    Input: an INTEGER in Fahrenheit degrees
    Output: an INTEGER in Celsius degrees
    """
    
    df[f"{column_name}"] = [((i - 32.0) * 5.0/9.0) for i in df[f"{column_name}"]]

    return df.sample(2)

In [9]:
# Function to convert milibar to Pascals

def mbartoPa(df, column_name):
    
    """
    This is a function that converts any temperature in degrees Fahrenheit to degrees Celsius.
    Requires two arguments.
    Arguments: dataframe, column name
    Input: an INTEGER in Fahrenheit degrees
    Output: an INTEGER in Celsius degrees
    """
    
    df[f"{column_name}"] = [i*100 for i in df[f"{column_name}"]]

    return df.sample(2)

In [10]:
# Function to round decimals from floats

def roundval(df, column_name, n):
    """
    This is a function that rounds any float to a given n value. Requires three arguments.
    Arguments: dataframe, name of the column where you want to rewrite the values, value of the round
    Input: a float with multiple decimals
    Output: a float with n decimals
    """
    df[f"{column_name}"] = [round(i,n) for i in df[f"{column_name}"]]
    return df.sample(2)

In [45]:
def floatify(df, column_name):
    """
    This is a function that floats any value. Requires two arguments.
    Arguments: dataframe, name of the column where you want to rewrite the values
    Input: a string or integer
    Output: a float
    """
    df[f"{column_name}"] = df[f"{column_name}"].astype(float)
    return df.sample(2)

In [12]:
# This is a function to call NASA api with a given a specific date and Camera type.

def call_Curiosity (date, camera):
    """
    This is a function that calls NASA API 'Mars Rover Photos' with two arguments. It returns the url from
    a specific camera onboard Curiosity rover.
    date: input the desired date in the format YYYY-MM-DD as a STRING,
    camera: select between FHAZ, RHAZ, MAST, CHEMCAM, MAHLI, MARDI, NAVCAM, PANCAM, MINITES, as STRING
    
    """
        
    try:
        nasa = os.getenv("token")
        url = f"https://api.nasa.gov/mars-photos/api/v1/rovers/curiosity/photos?earth_date={date}&camera={camera}&api_key={nasa}"
        request = requests.get(url)
        df = pd.DataFrame(request.json())
        df_clean = pd.DataFrame(df.values[0][0])
        image_url = list(df_clean["img_src"])[0]
        display(Image(image_url, width=300, height=200))
        
        return f"Image available for camera {camera} onboard Curiosity rover"

    except:
            
        return f"No image available on {date} for camera {camera} onboard Curiosity rover, please select another date"    

In [13]:
def get_pictures_Curiosity(date):
    
    """
    This is a function that calls call_NASA function with one argument. It returns the url of all the pictures 
    taken by all the cameras of Curiosity rover from a specific Sol date.
    date: input the desired date in the format YYYY-MM-DD as STRING.
    
    """       
    cameralist = ["FHAZ", "RHAZ", "MAST", "CHEMCAM", "MAHLI", "MARDI", "NAVCAM", "PANCAM", "MINITES"]
    for i in cameralist:
        print(call_Curiosity(date, i))
    pass

# Data from Mars

Data imported from Kaggle: https://www.kaggle.com/code/davidbnn92/weather-data

## Data first visualization

In [14]:
mars = pd.read_csv("../data/mars-weather.csv")
mars

Unnamed: 0,id,terrestrial_date,sol,ls,month,min_temp,max_temp,pressure,wind_speed,atmo_opacity
0,1895,2018-02-27,1977,135,Month 5,-77.0,-10.0,727.0,,Sunny
1,1893,2018-02-26,1976,135,Month 5,-77.0,-10.0,728.0,,Sunny
2,1894,2018-02-25,1975,134,Month 5,-76.0,-16.0,729.0,,Sunny
3,1892,2018-02-24,1974,134,Month 5,-77.0,-13.0,729.0,,Sunny
4,1889,2018-02-23,1973,133,Month 5,-78.0,-18.0,730.0,,Sunny
...,...,...,...,...,...,...,...,...,...,...
1889,24,2012-08-18,12,156,Month 6,-76.0,-18.0,741.0,,Sunny
1890,13,2012-08-17,11,156,Month 6,-76.0,-11.0,740.0,,Sunny
1891,2,2012-08-16,10,155,Month 6,-75.0,-16.0,739.0,,Sunny
1892,232,2012-08-15,9,155,Month 6,,,,,Sunny


## Data cleaning

In [15]:
# Remove undesired columns

columns_to_remove = ["wind_speed", "id", "ls"]

for c in columns_to_remove:
    remove_columns(mars, c)

In [16]:
# Created a new column, average temperature

mars["Mean_temp"] = ((mars["min_temp"] + mars["max_temp"])/2)

In [17]:
# Cleaning the atmopshere column

clean_atmosphere(mars,"atmo_opacity","--","0")

Unnamed: 0,terrestrial_date,sol,month,min_temp,max_temp,pressure,atmo_opacity,Mean_temp
429,2016-12-11,1546,Month 10,-73.0,-7.0,889.0,1,-40.0
876,2015-08-29,1088,Month 2,-77.0,-19.0,885.0,1,-48.0


In [18]:
# Rename columns

oldname = ["terrestrial_date", "sol", "month", "min_temp", "max_temp", "pressure", "atmo_opacity"]
newname = ["Earth Date", "Sol", "Month", "Min_temp", "Max_temp", "Pressure", "Atmo_opacity"]

for o, n in zip(oldname, newname):
    rename_columns(mars, o, n)

In [19]:
# Cleaning the month column

clean_month(mars, "Month")

Unnamed: 0,Earth Date,Sol,Month,Min_temp,Max_temp,Pressure,Atmo_opacity,Mean_temp
534,2016-08-25,1441,8,-74.0,10.0,828.0,1,-32.0
794,2015-11-21,1170,3,-85.0,-31.0,893.0,1,-58.0


In [20]:
#Create the Season column importing Month values

mars["Season"] = mars["Month"]

for i in range(len(mars["Season"])):

    if i in range(1,4):
        
        mars["Season"] = mars["Season"].replace(f"{i}","Winter")
    
    elif i in range(4,7):

        mars["Season"] = mars["Season"].replace(f"{i}","Spring")
    
    elif i in range(7,10):

        mars["Season"] = mars["Season"].replace(f"{i}","Summer")
    
    elif i in range(10,13):

        mars["Season"] = mars["Season"].replace(f"{i}","Autumn")

In [21]:
mars

Unnamed: 0,Earth Date,Sol,Month,Min_temp,Max_temp,Pressure,Atmo_opacity,Mean_temp,Season
0,2018-02-27,1977,5,-77.0,-10.0,727.0,1,-43.5,Spring
1,2018-02-26,1976,5,-77.0,-10.0,728.0,1,-43.5,Spring
2,2018-02-25,1975,5,-76.0,-16.0,729.0,1,-46.0,Spring
3,2018-02-24,1974,5,-77.0,-13.0,729.0,1,-45.0,Spring
4,2018-02-23,1973,5,-78.0,-18.0,730.0,1,-48.0,Spring
...,...,...,...,...,...,...,...,...,...
1889,2012-08-18,12,6,-76.0,-18.0,741.0,1,-47.0,Spring
1890,2012-08-17,11,6,-76.0,-11.0,740.0,1,-43.5,Spring
1891,2012-08-16,10,6,-75.0,-16.0,739.0,1,-45.5,Spring
1892,2012-08-15,9,6,,,,1,,Spring


Data cleaned! :)

In [22]:
mars.to_csv("../data/mars-weather-cleaned.csv", index = False)

# Data from Earth

Extracted from NOAA database: https://www.ncei.noaa.gov/access/search/data-search/global-summary-of-the-day

## Data first visualization

In [23]:
earth = pd.read_csv("../data/papua-weather.csv")

In [24]:
earth

Unnamed: 0,STATION,DATE,MAX,MIN,SLP,TEMP
0,92035099999,2012-08-07,84.2,71.6,1013.0,77.3
1,92035099999,2012-08-08,86.9,73.4,1011.9,78.6
2,92035099999,2012-08-09,84.2,73.4,1012.0,77.8
3,92035099999,2012-08-10,78.8,71.6,1012.9,74.3
4,92035099999,2012-08-11,78.8,71.6,1012.8,74.2
...,...,...,...,...,...,...
1980,92035099999,2018-02-23,91.4,77.0,9999.9,83.8
1981,92035099999,2018-02-24,91.4,77.0,9999.9,85.4
1982,92035099999,2018-02-25,93.2,75.2,1007.1,81.8
1983,92035099999,2018-02-26,91.4,77.0,1006.9,82.8


## Data cleaning

In [25]:
# Deleted Station column

remove_columns(earth, "STATION")

Unnamed: 0,DATE,MAX,MIN,SLP,TEMP
1974,2018-02-17,93.2,73.4,1008.5,82.1
909,2015-02-25,91.4,75.2,1006.6,82.1


In [26]:
# Rename columns

rename_columns(earth, "DATE", "Earth Date")
rename_columns(earth, "MAX", "Max_temp")
rename_columns(earth, "MIN", "Min_temp")
rename_columns(earth, "TEMP", "Mean_temp")
rename_columns(earth, "SLP", "Pressure")

Unnamed: 0,Earth Date,Max_temp,Min_temp,Pressure,Mean_temp
1914,2017-12-19,87.8,78.8,1007.7,84.5
769,2014-10-08,84.4,75.2,1010.3,79.2


In [27]:
# Converting values from Celsius to Fahrenheit

columns = ["Max_temp","Min_temp","Mean_temp"]
for i in columns:
    FtoC(earth, i)

In [28]:
# Converting values from mBar to Pascals

columns = ["Pressure"]
for i in columns:
    mbartoPa(earth,i)

In [29]:
# Sorting the date by descending dates

earth.sort_values(by=["Earth Date"], ascending = False, inplace=True)
earth.reset_index(drop = True)

Unnamed: 0,Earth Date,Max_temp,Min_temp,Pressure,Mean_temp
0,2018-02-27,33.0,24.0,100720.0,27.333333
1,2018-02-26,33.0,25.0,100690.0,28.222222
2,2018-02-25,34.0,24.0,100710.0,27.666667
3,2018-02-24,33.0,25.0,999990.0,29.666667
4,2018-02-23,33.0,25.0,999990.0,28.777778
...,...,...,...,...,...
1980,2012-08-11,26.0,22.0,101280.0,23.444444
1981,2012-08-10,26.0,22.0,101290.0,23.500000
1982,2012-08-09,29.0,23.0,101200.0,25.444444
1983,2012-08-08,30.5,23.0,101190.0,25.888889


In [30]:
# Remove 99999s from Pressure

earth.drop(earth.index[earth["Pressure"] == 999990.0], inplace = True)

In [31]:
# Round values from Mean_temp

roundval(earth, "Mean_temp", 1)
earth.reset_index(drop = True)

Unnamed: 0,Earth Date,Max_temp,Min_temp,Pressure,Mean_temp
0,2018-02-27,33.0,24.0,100720.0,27.3
1,2018-02-26,33.0,25.0,100690.0,28.2
2,2018-02-25,34.0,24.0,100710.0,27.7
3,2018-02-22,34.0,24.0,100690.0,27.5
4,2018-02-21,32.0,23.0,100570.0,27.2
...,...,...,...,...,...
1750,2012-08-11,26.0,22.0,101280.0,23.4
1751,2012-08-10,26.0,22.0,101290.0,23.5
1752,2012-08-09,29.0,23.0,101200.0,25.4
1753,2012-08-08,30.5,23.0,101190.0,25.9


Data cleaned :)!

In [32]:
earth.to_csv("../data/papua-weather-cleaned.csv", index = False)

# Scraping REMS data

# Scraping REMS widget

url2 = "http://cab.inta-csic.es/rems/es/"

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
#driver.get("https://www.google.com")

lista_ = []

with driver:
    driver.get(url2)
    
    for i in range(1574):

        element = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.ID,"mw-previous")))
        #element.click()

        sol = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH,"//span[@id='mw-sol']"))).text
        earth_day = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH,"//span[@id='mw-terrestrial_date']"))).text
        month = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH,"//span[@id='mw-season']"))).text
        maxtemp = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH,"//span[@id='mw-max_temp']"))).text
        mintemp = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH,"//span[@id='mw-min_temp']"))).text
        pressure = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH,"//span[@id='mw-pressure']"))).text
        atmo_opacity = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH,"//span[@id='mw-atmo_opacity']"))).text
        
        new_dict = {
            "Earth Date": earth_day,
            "Sol": sol,
            "Month": month,
            "Min_temp": mintemp,
            "Max_temp": maxtemp,
            "Pressure": pressure,
            "Atmo_opacity": atmo_opacity
        }
        
        lista_.append(new_dict)
        element.click()

widget = pd.DataFrame(lista_)


In [65]:
widget = pd.read_csv("../data/widget.csv")

## Scrapped data visualization

In [66]:
widget = widget.iloc[0:1548]

## Data cleaning

In [67]:
#Cleaning the month

clean_month(widget, "Month")

Unnamed: 0,Earth Date,Sol,Month,Min_temp,Max_temp,Pressure,Atmo_opacity
439,2021-06-21,3155,3,-78,-29,872,Soleado
700,2020-09-18,2886,1,-71,-1,870,Soleado


In [68]:
# Creating the Season column

widget["Season"] = widget["Month"]

for i in range(len(widget["Season"])):

    if i in range(1,4):
        
        widget["Season"] = widget["Season"].replace(f"{i}","Winter")
    
    elif i in range(4,7):

        widget["Season"] = widget["Season"].replace(f"{i}","Spring")
    
    elif i in range(7,10):

        widget["Season"] = widget["Season"].replace(f"{i}","Summer")
    
    elif i in range(10,13):

        widget["Season"] = widget["Season"].replace(f"{i}","Autumn")

In [69]:
# Clean the atmo column
clean_atmosphere(widget,"Atmo_opacity","Soleado","Sunny")

Unnamed: 0,Earth Date,Sol,Month,Min_temp,Max_temp,Pressure,Atmo_opacity,Season
1318,2018-11-22,2238,1,-69,-2,853,Sunny,Winter
76,2022-07-26,3544,1,-69,-4,867,Sunny,Winter


In [70]:
# Remove weird values
widget.drop(widget.index[widget["Min_temp"] == "Valor no disponible"], inplace = True)

In [71]:
# Converting strings to floats
columns = ["Min_temp", "Max_temp", "Pressure"]
for i in columns:
    floatify(widget, i)

In [72]:
# Create the Mean column

widget["Mean_temp"] = ((widget["Min_temp"] + widget["Max_temp"])/2)

In [74]:
widget

Unnamed: 0,Earth Date,Sol,Month,Min_temp,Max_temp,Pressure,Atmo_opacity,Season,Mean_temp
0,2022-10-19,3627,1,-67.0,-9.0,803.0,Sunny,Winter,-38.0
1,2022-10-18,3626,1,-67.0,-12.0,804.0,Sunny,Winter,-39.5
2,2022-10-17,3625,1,-67.0,-11.0,804.0,Sunny,Winter,-39.0
3,2022-10-16,3624,1,-67.0,-12.0,806.0,Sunny,Winter,-39.5
4,2022-10-15,3623,1,-67.0,-12.0,808.0,Sunny,Winter,-39.5
...,...,...,...,...,...,...,...,...,...
1543,2018-03-05,1983,5,-76.0,-8.0,723.0,Sunny,Spring,-42.0
1544,2018-03-04,1982,5,-77.0,-7.0,724.0,Sunny,Spring,-42.0
1545,2018-03-03,1981,5,-75.0,-10.0,725.0,Sunny,Spring,-42.5
1546,2018-03-02,1980,5,-77.0,-11.0,725.0,Sunny,Spring,-44.0


Data cleaned!

In [75]:
widget.to_csv("../data/widget-cleaned.csv", index = False)