In [1]:
# |exporti

import streamlit as st

from streamlit_jupyter import StreamlitPatcher, tqdm
StreamlitPatcher().jupyter() # register streamlit with jupyter-compatible wrappers

from streamlit_folium import st_folium

In [2]:
sp = StreamlitPatcher()
sp.jupyter()  # register patcher with streamlit

In [3]:
# |exporti

st.title("All space missions from 1957")

# All space missions from 1957

In [4]:
# |exporti

# import necessary packages. 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import folium
from geopy.geocoders import Nominatim
import geocoder

import io

In [5]:
# |exporti

st.markdown(f' # Fetch the data from csv file and store it in a variable.')
space_missions_df = pd.read_csv('./Space_Corrected.csv', index_col=0)

 # Fetch the data from csv file and store it in a variable.

In [6]:
# |exporti

st.markdown(f' ## Rename the columns named `Unnamed: 0` to `index` ')
space_missions_df = space_missions_df.rename(columns={"Unnamed: 0" : "index", "Company Name": "company_name", "Status Rocket":"status_rocket"," Rocket": "Rocket", "Status Mission": "status_mission"})

 ## Rename the columns named `Unnamed: 0` to `index` 

In [7]:
# |exporti

st.markdown(f' ## Explore the structure of the dataset ')

buffer = io.StringIO()
space_missions_df.info(buf=buffer)
df_space_missions_info = buffer.getvalue()

st.text(df_space_missions_info)

 ## Explore the structure of the dataset 

```None
<class 'pandas.core.frame.DataFrame'>
Index: 4324 entries, 0 to 4323
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   index           4324 non-null   int64 
 1   company_name    4324 non-null   object
 2   Location        4324 non-null   object
 3   Datum           4324 non-null   object
 4   Detail          4324 non-null   object
 5   status_rocket   4324 non-null   object
 6   Rocket          964 non-null    object
 7   status_mission  4324 non-null   object
dtypes: int64(1), object(7)
memory usage: 304.0+ KB

```

In [8]:
# |exporti

st.write(space_missions_df.describe())

Unnamed: 0,index
count,4324.0
mean,2161.5
std,1248.375611
min,0.0
25%,1080.75
50%,2161.5
75%,3242.25
max,4323.0


In [9]:
# |exporti

st.markdown(f' ### check if there is any null or nan columns.')
st.write(space_missions_df.isna().any())

 ### check if there is any null or nan columns.

index             False
company_name      False
Location          False
Datum             False
Detail            False
status_rocket     False
Rocket             True
status_mission    False
dtype: bool

In [10]:
# |exporti

st.markdown(f' ## Explore what are the missing values in the `Rocket` column.')
st.dataframe(space_missions_df[space_missions_df.isna().any(axis=1)])

 ## Explore what are the missing values in the `Rocket` column.

Unnamed: 0,index,company_name,Location,Datum,Detail,status_rocket,Rocket,status_mission
2,2,SpaceX,"Pad A, Boca Chica, Texas, USA","Tue Aug 04, 2020 23:57 UTC",Starship Prototype | 150 Meter Hop,StatusActive,,Success
7,7,CASC,"LC-101, Wenchang Satellite Launch Center, China","Thu Jul 23, 2020 04:41 UTC",Long March 5 | Tianwen-1,StatusActive,,Success
13,13,IAI,"Pad 1, Palmachim Airbase, Israel","Mon Jul 06, 2020 01:00 UTC",Shavit-2 | Ofek-16,StatusActive,,Success
28,28,VKS RF,"Site 43/4, Plesetsk Cosmodrome, Russia","Fri May 22, 2020 07:31 UTC",Soyuz 2.1b/Fregat-M | Cosmos 2546,StatusActive,,Success
31,31,ExPace,"Site 95, Jiuquan Satellite Launch Center, China","Tue May 12, 2020 01:16 UTC",Kuaizhou 1A | Xingyun-2 01 (Wuhan) & 02,StatusActive,,Success
...,...,...,...,...,...,...,...,...
4319,4319,US Navy,"LC-18A, Cape Canaveral AFS, Florida, USA","Wed Feb 05, 1958 07:33 UTC",Vanguard | Vanguard TV3BU,StatusRetired,,Failure
4320,4320,AMBA,"LC-26A, Cape Canaveral AFS, Florida, USA","Sat Feb 01, 1958 03:48 UTC",Juno I | Explorer 1,StatusRetired,,Success
4321,4321,US Navy,"LC-18A, Cape Canaveral AFS, Florida, USA","Fri Dec 06, 1957 16:44 UTC",Vanguard | Vanguard TV3,StatusRetired,,Failure
4322,4322,RVSN USSR,"Site 1/5, Baikonur Cosmodrome, Kazakhstan","Sun Nov 03, 1957 02:30 UTC",Sputnik 8K71PS | Sputnik-2,StatusRetired,,Success


In [11]:
space_missions_df.head()

Unnamed: 0,index,company_name,Location,Datum,Detail,status_rocket,Rocket,status_mission
0,0,SpaceX,"LC-39A, Kennedy Space Center, Florida, USA","Fri Aug 07, 2020 05:12 UTC",Falcon 9 Block 5 | Starlink V1 L9 & BlackSky,StatusActive,50.0,Success
1,1,CASC,"Site 9401 (SLS-2), Jiuquan Satellite Launch Ce...","Thu Aug 06, 2020 04:01 UTC",Long March 2D | Gaofen-9 04 & Q-SAT,StatusActive,29.75,Success
2,2,SpaceX,"Pad A, Boca Chica, Texas, USA","Tue Aug 04, 2020 23:57 UTC",Starship Prototype | 150 Meter Hop,StatusActive,,Success
3,3,Roscosmos,"Site 200/39, Baikonur Cosmodrome, Kazakhstan","Thu Jul 30, 2020 21:25 UTC",Proton-M/Briz-M | Ekspress-80 & Ekspress-103,StatusActive,65.0,Success
4,4,ULA,"SLC-41, Cape Canaveral AFS, Florida, USA","Thu Jul 30, 2020 11:50 UTC",Atlas V 541 | Perseverance,StatusActive,145.0,Success


In [12]:
# |exporti

st.markdown(f' ### Fill nan with default `0.0` in the column `Rocket`')
space_missions_df['Rocket'].fillna(0.0, inplace=True)

 ### Fill nan with default `0.0` in the column `Rocket`

In [13]:
# |exporti

st.markdown(''' 
### Check if there's any duplicated rows. 

duplicated rows from Data frame. 
''')
st.write(space_missions_df.duplicated().sum())

 
### Check if there's any duplicated rows. 

duplicated rows from Data frame. 


0

In [14]:
# |exporti

# create two new columns seperating Datum to Date and time.
#create a new column time_zone and store those values in it.
space_missions_df['time_zone'] = space_missions_df['Datum'].str.split(' ').str[5]

#creates a new column taking the original dates and removing timezones from it
space_missions_df['date_um'] = space_missions_df['Datum'].str.replace(space_missions_df['time_zone'][0], '')

#converts the dates into proper datetime format and data type.
space_missions_df['date_um'] = pd.to_datetime(space_missions_df['date_um'], format='mixed')

#create new columns date and time and stores the values in it.
space_missions_df['date']  = space_missions_df['date_um'].dt.date
space_missions_df['time']  = space_missions_df['date_um'].dt.time


In [15]:
# |exporti

st.markdown(f' ### Lets re-explore the dataframe with no null values and columns ')
st.dataframe(space_missions_df.head(10))

 ### Lets re-explore the dataframe with no null values and columns 

Unnamed: 0,index,company_name,Location,Datum,Detail,status_rocket,Rocket,status_mission,time_zone,date_um,date,time
0,0,SpaceX,"LC-39A, Kennedy Space Center, Florida, USA","Fri Aug 07, 2020 05:12 UTC",Falcon 9 Block 5 | Starlink V1 L9 & BlackSky,StatusActive,50.0,Success,UTC,2020-08-07 05:12:00,2020-08-07,05:12:00
1,1,CASC,"Site 9401 (SLS-2), Jiuquan Satellite Launch Ce...","Thu Aug 06, 2020 04:01 UTC",Long March 2D | Gaofen-9 04 & Q-SAT,StatusActive,29.75,Success,UTC,2020-08-06 04:01:00,2020-08-06,04:01:00
2,2,SpaceX,"Pad A, Boca Chica, Texas, USA","Tue Aug 04, 2020 23:57 UTC",Starship Prototype | 150 Meter Hop,StatusActive,0.0,Success,UTC,2020-08-04 23:57:00,2020-08-04,23:57:00
3,3,Roscosmos,"Site 200/39, Baikonur Cosmodrome, Kazakhstan","Thu Jul 30, 2020 21:25 UTC",Proton-M/Briz-M | Ekspress-80 & Ekspress-103,StatusActive,65.0,Success,UTC,2020-07-30 21:25:00,2020-07-30,21:25:00
4,4,ULA,"SLC-41, Cape Canaveral AFS, Florida, USA","Thu Jul 30, 2020 11:50 UTC",Atlas V 541 | Perseverance,StatusActive,145.0,Success,UTC,2020-07-30 11:50:00,2020-07-30,11:50:00
5,5,CASC,"LC-9, Taiyuan Satellite Launch Center, China","Sat Jul 25, 2020 03:13 UTC","Long March 4B | Ziyuan-3 03, Apocalypse-10 & N...",StatusActive,64.68,Success,UTC,2020-07-25 03:13:00,2020-07-25,03:13:00
6,6,Roscosmos,"Site 31/6, Baikonur Cosmodrome, Kazakhstan","Thu Jul 23, 2020 14:26 UTC",Soyuz 2.1a | Progress MS-15,StatusActive,48.5,Success,UTC,2020-07-23 14:26:00,2020-07-23,14:26:00
7,7,CASC,"LC-101, Wenchang Satellite Launch Center, China","Thu Jul 23, 2020 04:41 UTC",Long March 5 | Tianwen-1,StatusActive,0.0,Success,UTC,2020-07-23 04:41:00,2020-07-23,04:41:00
8,8,SpaceX,"SLC-40, Cape Canaveral AFS, Florida, USA","Mon Jul 20, 2020 21:30 UTC",Falcon 9 Block 5 | ANASIS-II,StatusActive,50.0,Success,UTC,2020-07-20 21:30:00,2020-07-20,21:30:00
9,9,JAXA,"LA-Y1, Tanegashima Space Center, Japan","Sun Jul 19, 2020 21:58 UTC",H-IIA 202 | Hope Mars Mission,StatusActive,90.0,Success,UTC,2020-07-19 21:58:00,2020-07-19,21:58:00


#### Unique companies and the company distributions. 

How many unique companies are represented in the dataset? What's the distribution of companies?

In [16]:
# |exporti

# all_companies = pd.Series(space_missions_df['Company Name']).value_counts()
st.markdown(f' ### All the list of unique company names and the count of space misisons')
st.write(space_missions_df['company_name'].value_counts())

 ### All the list of unique company names and the count of space misisons

company_name
RVSN USSR           1777
Arianespace          279
CASC                 251
General Dynamics     251
NASA                 203
VKS RF               201
US Air Force         161
ULA                  140
Boeing               136
Martin Marietta      114
SpaceX               100
MHI                   84
Northrop              83
Lockheed              79
ISRO                  76
Roscosmos             55
ILS                   46
Sea Launch            36
ISAS                  30
Kosmotras             22
US Navy               17
ISA                   13
Rocket Lab            13
Eurockot              13
ESA                   13
Blue Origin           12
IAI                   11
ExPace                10
ASI                    9
CNES                   8
AMBA                   8
MITT                   7
JAXA                   7
Land Launch            7
UT                     5
KCST                   5
CASIC                  5
Exos                   4
CECLES                 4
Arm??e de l'

In [17]:
# |exporti

#with pd.option_context('display.max_rows', None, 'display.max_columns', None):
st.dataframe(space_missions_df.groupby(['company_name', 'Location','Detail']).company_name.agg(['count']))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count
company_name,Location,Detail,Unnamed: 3_level_1
AEB,"VLS Pad, Alc?›ntara Launch Center, Maranh?œo, Brazil",VLS-1 | SACI-2,1
AEB,"VLS Pad, Alc?›ntara Launch Center, Maranh?œo, Brazil","VLS-1 | SATEC, UNOSAT",1
AEB,"VLS Pad, Alc?›ntara Launch Center, Maranh?œo, Brazil",VLS-1 | SCD-2A,1
AMBA,"LA-5B, RAAF Woomera Range Complex, Australia",Redstone Sparta | WRESAT,1
AMBA,"LC-26A, Cape Canaveral AFS, Florida, USA",Juno I | Explorer 1,1
...,...,...,...
VKS RF,"Site 90/20, Baikonur Cosmodrome, Kazakhstan",Tsyklon-2 | Cosmos 2421,1
Virgin Orbit,"Cosmic Girl, Mojave Air and Space Port, California, USA",LauncherOne | Demo Flight,1
Yuzhmash,"Site 32/2, Plesetsk Cosmodrome, Russia",Tsyklon-3 | Intercosmos 25 & Magion 3,1
Yuzhmash,"Site 90/20, Baikonur Cosmodrome, Kazakhstan",Tsyklon-2 | IS Mass Model,1


#### Launch Location analysis. 

In [18]:

#with pd.option_context('display.max_rows', None, 'display.max_columns', None):
print(space_missions_df['Location'].value_counts())

Location
Site 31/6, Baikonur Cosmodrome, Kazakhstan               235
Site 132/1, Plesetsk Cosmodrome, Russia                  216
Site 43/4, Plesetsk Cosmodrome, Russia                   202
Site 41/1, Plesetsk Cosmodrome, Russia                   198
Site 1/5, Baikonur Cosmodrome, Kazakhstan                193
                                                        ... 
Jiuquan Satellite Launch Center, China                     1
LP-41, Kauai, Pacific Missile Range Facility               1
Tai Rui Barge, Yellow Sea                                  1
Launch Plateform, Shahrud Missile Test Site                1
K-496 Submarine, Barents Sea Launch Area, Barents Sea      1
Name: count, Length: 137, dtype: int64


In [19]:
# |exporti

st.markdown(f' ### How many unique launch Locations are present in the dataframe. ')
launch_locations_count = space_missions_df['Location'].value_counts().reset_index()

 ### How many unique launch Locations are present in the dataframe. 

In [20]:
# |exporti

st.dataframe(launch_locations_count)

Unnamed: 0,Location,count
0,"Site 31/6, Baikonur Cosmodrome, Kazakhstan",235
1,"Site 132/1, Plesetsk Cosmodrome, Russia",216
2,"Site 43/4, Plesetsk Cosmodrome, Russia",202
3,"Site 41/1, Plesetsk Cosmodrome, Russia",198
4,"Site 1/5, Baikonur Cosmodrome, Kazakhstan",193
...,...,...
132,"Jiuquan Satellite Launch Center, China",1
133,"LP-41, Kauai, Pacific Missile Range Facility",1
134,"Tai Rui Barge, Yellow Sea",1
135,"Launch Plateform, Shahrud Missile Test Site",1


#### get latitute and longitude coordinates of all the spaceship launch locations. 

In [21]:
# |exporti


st.markdown('''
# *** disabled displaying the world map with launch locations temporarily since the loading time is too slow. ***
''')

# for i, launch_location in enumerate(launch_locations_count['Location']):
#     split_location_list = launch_location.split(",", 1)
#     # location_coordinates = split_location_list
#     if(len(split_location_list) > 1):
#           location_name = launch_location.split(",", 1)[1]
#           g = geocoder.osm(location_name)
#           if(g.ok):
#                  launch_locations_count.loc[i, 'Location'] = location_name
#                  launch_locations_count.loc[i, 'Lat'] = g.lat
#                  launch_locations_count.loc[i , 'Long'] = g.lng


# *** disabled displaying the world map with launch locations temporarily since the loading time is too slow. ***


In [22]:
# |exporti

# Drop NaN from launch_locations_count
# launch_locations_count.dropna(subset=["Location", "count", "Lat", "Long"], inplace=True)

#### Creating Map with folium

In [23]:
# |exporti

# st.title("Markers on world map for each location")

In [24]:
# |exporti

# create markers on world map for each location. 
# world_map = folium.Map(location=[0,0], zoom_start=2)

# for i,location in launch_locations_count.iterrows():
#     folium.Marker(
#         location=[location["Lat"], location["Long"]],
#         popup=f"Location: {location['Location']}<br>Count: {location['count']}",
#         icon=folium.Icon(icon="shuttle-space", prefix='fa',
#                          max_width=100)
#     ).add_to(world_map)


In [25]:
# |exporti

# st_folium(world_map, width="100%")

In [26]:
from nbdev.export import nb_export
nb_export('space_missions_eda.ipynb', lib_path='.', name='space_missions')