In [1]:
# import necessary packages. 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [101]:
# fetch the data from csv file and store it in a variable. 
space_missions_df = pd.read_csv('./Space_Corrected.csv', index_col=0)

In [102]:
# Rename the columns named `Unnamed: 0` to `index` 
space_missions_df = space_missions_df.rename(columns={"Unnamed: 0" : "index"})

In [4]:
space_missions_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4324 entries, 0 to 4323
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   index           4324 non-null   int64 
 1   Company Name    4324 non-null   object
 2   Location        4324 non-null   object
 3   Datum           4324 non-null   object
 4   Detail          4324 non-null   object
 5   Status Rocket   4324 non-null   object
 6    Rocket         964 non-null    object
 7   Status Mission  4324 non-null   object
dtypes: int64(1), object(7)
memory usage: 304.0+ KB


In [5]:
space_missions_df.describe()

Unnamed: 0,index
count,4324.0
mean,2161.5
std,1248.375611
min,0.0
25%,1080.75
50%,2161.5
75%,3242.25
max,4323.0


In [10]:
# find if there's missing values. 
space_missions_df[space_missions_df.isna().any(axis=1)]

Unnamed: 0,index,Company Name,Location,Datum,Detail,Status Rocket,Rocket,Status Mission
2,2,SpaceX,"Pad A, Boca Chica, Texas, USA","Tue Aug 04, 2020 23:57 UTC",Starship Prototype | 150 Meter Hop,StatusActive,,Success
7,7,CASC,"LC-101, Wenchang Satellite Launch Center, China","Thu Jul 23, 2020 04:41 UTC",Long March 5 | Tianwen-1,StatusActive,,Success
13,13,IAI,"Pad 1, Palmachim Airbase, Israel","Mon Jul 06, 2020 01:00 UTC",Shavit-2 | Ofek-16,StatusActive,,Success
28,28,VKS RF,"Site 43/4, Plesetsk Cosmodrome, Russia","Fri May 22, 2020 07:31 UTC",Soyuz 2.1b/Fregat-M | Cosmos 2546,StatusActive,,Success
31,31,ExPace,"Site 95, Jiuquan Satellite Launch Center, China","Tue May 12, 2020 01:16 UTC",Kuaizhou 1A | Xingyun-2 01 (Wuhan) & 02,StatusActive,,Success
...,...,...,...,...,...,...,...,...
4319,4319,US Navy,"LC-18A, Cape Canaveral AFS, Florida, USA","Wed Feb 05, 1958 07:33 UTC",Vanguard | Vanguard TV3BU,StatusRetired,,Failure
4320,4320,AMBA,"LC-26A, Cape Canaveral AFS, Florida, USA","Sat Feb 01, 1958 03:48 UTC",Juno I | Explorer 1,StatusRetired,,Success
4321,4321,US Navy,"LC-18A, Cape Canaveral AFS, Florida, USA","Fri Dec 06, 1957 16:44 UTC",Vanguard | Vanguard TV3,StatusRetired,,Failure
4322,4322,RVSN USSR,"Site 1/5, Baikonur Cosmodrome, Kazakhstan","Sun Nov 03, 1957 02:30 UTC",Sputnik 8K71PS | Sputnik-2,StatusRetired,,Success


In [103]:
space_missions_df.head()

Unnamed: 0,index,Company Name,Location,Datum,Detail,Status Rocket,Rocket,Status Mission
0,0,SpaceX,"LC-39A, Kennedy Space Center, Florida, USA","Fri Aug 07, 2020 05:12 UTC",Falcon 9 Block 5 | Starlink V1 L9 & BlackSky,StatusActive,50.0,Success
1,1,CASC,"Site 9401 (SLS-2), Jiuquan Satellite Launch Ce...","Thu Aug 06, 2020 04:01 UTC",Long March 2D | Gaofen-9 04 & Q-SAT,StatusActive,29.75,Success
2,2,SpaceX,"Pad A, Boca Chica, Texas, USA","Tue Aug 04, 2020 23:57 UTC",Starship Prototype | 150 Meter Hop,StatusActive,,Success
3,3,Roscosmos,"Site 200/39, Baikonur Cosmodrome, Kazakhstan","Thu Jul 30, 2020 21:25 UTC",Proton-M/Briz-M | Ekspress-80 & Ekspress-103,StatusActive,65.0,Success
4,4,ULA,"SLC-41, Cape Canaveral AFS, Florida, USA","Thu Jul 30, 2020 11:50 UTC",Atlas V 541 | Perseverance,StatusActive,145.0,Success


In [104]:
# check for duplicated rows from Data frame.
space_missions_df.duplicated().sum()

# check if there any null or nan columns.
space_missions_df.isna().any()

index             False
Company Name      False
Location          False
Datum             False
Detail            False
Status Rocket     False
 Rocket            True
Status Mission    False
dtype: bool

In [52]:
space_missions_df.columns

Index(['index', 'Company Name', 'Location', 'Datum', 'Detail', 'Status Rocket',
       ' Rocket', 'Status Mission'],
      dtype='object')

In [98]:
# create two new columns seperating Datum to Date and time.
#create a new column time_zone and store those values in it.
space_missions_df['time_zone'] = space_missions_df['Datum'].str.split(' ').str[5]

#creates a new column taking the original dates and removing timezones from it
space_missions_df['date_um'] = space_missions_df['Datum'].str.replace(space_missions_df['time_zone'][0], '')

#converts the dates into proper datetime format and data type.
space_missions_df['date_um'] = pd.to_datetime(space_missions_df['date_um'], format='mixed')

#create new columns date and time and stores the values in it.
space_missions_df['date']  = space_missions_df['date_um'].dt.date
space_missions_df['time']  = space_missions_df['date_um'].dt.time


In [None]:
# convert the Datum to datetime. 

dt = pd.to_datetime(space_missions_df['Datum'], format='%a %b %d, %Y %H:%M %Z')

#### Unique companies and the company distributions. 

How many unique companies are represented in the dataset? What's the distribution of companies?

In [109]:
space_missions_df['Company Name'].unique()

array(['SpaceX', 'CASC', 'Roscosmos', 'ULA', 'JAXA', 'Northrop', 'ExPace',
       'IAI', 'Rocket Lab', 'Virgin Orbit', 'VKS RF', 'MHI', 'IRGC',
       'Arianespace', 'ISA', 'Blue Origin', 'ISRO', 'Exos', 'ILS',
       'i-Space', 'OneSpace', 'Landspace', 'Eurockot', 'Land Launch',
       'CASIC', 'KCST', 'Sandia', 'Kosmotras', 'Khrunichev', 'Sea Launch',
       'KARI', 'ESA', 'NASA', 'Boeing', 'ISAS', 'SRC', 'MITT', 'Lockheed',
       'AEB', 'Starsem', 'RVSN USSR', 'EER', 'General Dynamics',
       'Martin Marietta', 'Yuzhmash', 'Douglas', 'ASI', 'US Air Force',
       'CNES', 'CECLES', 'RAE', 'UT', 'OKB-586', 'AMBA',
       "Arm??e de l'Air", 'US Navy'], dtype=object)