# **Space X  Falcon 9 First Stage Landing Prediction**


## Web scraping Falcon 9 and Falcon Heavy Launches Records from Wikipedia



In this lab, you will be performing web scraping to collect Falcon 9 historical launch records from a Wikipedia page titled `List of Falcon 9 and Falcon Heavy launches`

https://en.wikipedia.org/wiki/List_of_Falcon_9_and_Falcon_Heavy_launches

![](https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/labs/module_1_L2/images/Falcon9_rocket_family.svg)


In [None]:
import sys

import requests
from bs4 import BeautifulSoup
import re
import unicodedata
import pandas as pd

In [None]:
def date_time(table_cells): 
    return [date_time.strip() for date_time in list(table_cells.strings)][0:2]

def booster_version(table_cells): 
    out = "".join([booster_version for i, booster_version in enumerate(table_cells.strings) if i%2 == 0][0:-1])

def landing_status(table_cells):
    out = [i for i in table_cells.strings][0]
    return out

def get_mass(table_cells): 
    mass = unicodedata.normalize("NFKD", table_cells.text).strip()
    if mass: 
        mass.find("kg")
        new_mass = mass[0: mass.find("kg") + 2]
    
    else: 
        new_mass = 0 
    return new_mass

def extract_column_from_header(row): 

    if (row.br): 
        row.br.extract()
    if row.a: 
        row.a.extract()
    if row.sup: 
        row.sup.extract()
    
    column_name = " ".join(row.contents)

    if not(column_name.strip().isdigit()): 
        column_name = column_name.strip()
        return column_name



In [None]:
static_url = "https://en.wikipedia.org/w/index.php?title=List_of_Falcon_9_and_Falcon_Heavy_launches&oldid=1027686922"

In [None]:

response = requests.get(static_url)

if response.status_code == 200: 
    print(f"Success: {response.status_code}")

else: 
    print(f"Failure {response.status_code}")


In [None]:
soup = BeautifulSoup(response.text, "html.parser")

In [None]:
test = soup.title.string
print(test)

In [None]:
html_tables = soup.find_all("table")

print(f"Found {len(html_tables)} tables")

In [None]:
first_launch_table = html_tables[2]
print(first_launch_table)

In [None]:
column_names = []

for row in first_launch_table.find_all("th"):
    column_name = extract_column_from_header(row)
   
    if column_name is not None and len(column_name) > 0: 
        column_names.append(column_name)
        
print(column_names)


In [None]:
launch_dict= dict.fromkeys(column_names)

# Remove an irrelvant column
del launch_dict['Date and time ( )']

# Let's initial the launch_dict with each value to be an empty list
launch_dict['Flight No.'] = []
launch_dict['Launch Site'] = []
launch_dict['Payload'] = []
launch_dict['Payload Mass'] = []
launch_dict['Orbit'] = []
launch_dict['Customer'] = []
launch_dict['Launch Outcome'] = []
# Added some new columns
launch_dict['Version Booster']=[]
launch_dict['Booster Landing']=[]
launch_dict['Date']=[]
launch_dict['Time']=[]

In [None]:
extracted_row = 0
#Extract each table 
for table_number,table in enumerate(soup.find_all('table',"wikitable plainrowheaders collapsible")):
   # get table row 
    for rows in table.find_all("tr"):
        #check to see if first table heading is as number corresponding to launch a number 
        if rows.th:
            if rows.th.string:
                flight_number=rows.th.string.strip()
                flag=flight_number.isdigit()
        else:
            flag=False
        #get table element 
        row=rows.find_all('td')
        #if it is number save cells in a dictonary 
        if flag:
            extracted_row += 1
            # Flight Number value
            # TODO: Append the flight_number into launch_dict with key `Flight No.`
            launch_dict["Flight No."].append(flight_number)
            #print(flight_number)
            datatimelist=date_time(row[0])
            
            # Date value
            # TODO: Append the date into launch_dict with key `Date`
            launch_dict["Date"].append(datatimelist[0].strip(','))
            date = datatimelist[0].strip(',')
            #print(date)
            
            # Time value
            # TODO: Append the time into launch_dict with key `Time`
            launch_dict["Time"].append(datatimelist[1])
            time = datatimelist[1]
            #print(time)
              
            # Booster version
            # TODO: Append the bv into launch_dict with key `Version Booster`
            bv=booster_version(row[1])
            if not(bv):
                bv=row[1].a.string
            launch_dict["Version Booster"].append(bv)
            print(bv)
            
            # Launch Site
            # TODO: Append the bv into launch_dict with key `Launch Site`
            launch_site = row[2].a.string
            launch_dict["Launch Site"].append(launch_site)
            #print(launch_site)
            
            # Payload
            # TODO: Append the payload into launch_dict with key `Payload`
            payload = row[3].a.string
            launch_dict["Payload"].append(payload)
            #print(payload)
            
            # Payload Mass
            # TODO: Append the payload_mass into launch_dict with key `Payload mass"
            payload_mass = get_mass(row[4])
            launch_dict["Payload Mass"].append(payload_mass)
            #print(payload)
            
            # Orbit
            # TODO: Append the orbit into launch_dict with key `Orbit`
            orbit = row[5].a.string
            launch_dict["Orbit"].append(orbit)

            #print(orbit)
            
            # Customer
            # TODO: Append the customer into launch_dict with key `Customer`
            customer = row[6].text.strip()
            launch_dict["Customer"].append(customer)
            #print(customer)
            
            # Launch outcome
            # TODO: Append the launch_outcome into launch_dict with key `Launch outcome`
            launch_outcome = list(row[7].strings)[0]
            launch_dict["Launch Outcome"].append(launch_outcome)
            #print(launch_outcome)
            
            # Booster landing
            # TODO: Append the launch_outcome into launch_dict with key `Booster landing`
            booster_landing = landing_status(row[8])
            launch_dict["Booster Landing"].append(booster_landing)
            #print(booster_landing)
            

In [None]:
df = pd.DataFrame({ key:pd.Series(value) for key, value in launch_dict.items() })

In [None]:
df.to_csv("web_scraping.csv", index=False)