In [2]:
import pandas as pd
import numpy as np

# If running in Google Colab, you might need to upload the file first.
# You can do this by running the following lines in a Colab cell:
# from google.colab import files
# uploaded = files.upload()
# # After uploading, the file will be in the session's current directory.

# Load the dataset
# Ensure 'spacex_comprehensive_launches.csv' is uploaded to your Colab session
df = pd.read_csv("spacex_comprehensive_launches.csv")

# Display the first 10 rows
print("First 10 rows of the DataFrame:")
print(df.head(10))

# Calculate the percentage of missing values
print("\nPercentage of missing values in each column:")
print(df.isnull().sum() / df.count() * 100)

# Identify numerical and categorical columns
print("\nData types of each column:")
print(df.dtypes)

# Calculate the number of launches on each site
print("\nNumber of launches on each site:")
print(df['LaunchSite'].value_counts())

# Calculate the number and occurrence of each orbit
print("\nNumber and occurrence of each orbit:")
print(df['Orbit'].value_counts())

# Calculate the number and occurrence of mission outcome
print("\nNumber and occurrence of mission outcomes:")
landing_outcomes = df['Outcome'].value_counts()
print(landing_outcomes)

# Create the 'Class' column based on 'Booster landing'
# 'Success' is mapped to 1 (successful landing)
# 'No Attempt' or 'Failure' is mapped to 0 (unsuccessful or no attempt)
df['Class'] = df['Booster landing'].apply(lambda x: 1 if x == 'Success' else 0)

# Display the head of the DataFrame with the new 'Class' column
print("\nFirst 5 rows of the DataFrame with 'Class' column:")
print(df.head(5))

# Calculate the success rate
print("\nSuccess rate (mean of 'Class' column):")
print(df['Class'].mean())

# Save the modified DataFrame to a new CSV file
df.to_csv("dataset_part_2.csv", index=False)
print("\nModified DataFrame saved to 'dataset_part_2.csv'")

First 10 rows of the DataFrame:
   Flight No.  FlightNumber                      Date      Time Launchoutcome  \
0           1             1  2006-03-24T22:30:00.000Z  22:30:00         False   
1           2             2  2007-03-21T01:10:00.000Z  01:10:00         False   
2           3             3  2008-08-03T03:34:00.000Z  03:34:00         False   
3           4             4  2008-09-28T23:15:00.000Z  23:15:00          True   
4           5             5  2009-07-13T03:35:00.000Z  03:35:00          True   
5           6             6  2010-06-04T18:45:00.000Z  18:45:00          True   
6           7             7  2010-12-08T15:43:00.000Z  15:43:00          True   
7           8             8  2012-05-22T07:44:00.000Z  07:44:00          True   
8           9             9  2012-10-08T00:35:00.000Z  00:35:00          True   
9          10            10  2013-03-01T19:10:00.000Z  19:10:00          True   

  Outcome BoosterVersion Version Booster  \
0   False       Falcon 1        