# Successful Space Orbital Launches in 2019

In [1]:
# getting tables from url

In [2]:
url = 'https://en.wikipedia.org/wiki/2019_in_spaceflight#Orbital_launches'

import pandas as pd
tables = pd.read_html(url)

In [3]:
df = tables[3]

In [4]:
df = df[3:]

In [5]:
df.columns = ['Date and time (UTC)','Rocket','Flight number','1','Launch site','2','LS']

In [6]:
df.head()

Unnamed: 0,Date and time (UTC),Rocket,Flight number,1,Launch site,2,LS
3,10 January17:05[20],Long March 3B/E,3B-Y56[21],Xichang LC-2,Xichang LC-2,CASC,CASC
4,10 January17:05[20],Chinasat 2D,CNSA,Geosynchronous,Communications,In orbit,Operational
5,11 January15:31[22],Falcon 9 Block 5,F9-067,Vandenberg SLC-4E,Vandenberg SLC-4E,SpaceX,SpaceX
6,11 January15:31[22],Iridium NEXT 66-75,Iridium,Low Earth,Communications,In orbit,Operational
7,15 January[23][24],Simorgh,,Semnan LP-2,Semnan LP-2,ISA,ISA


# Convert to ISO date format

In [7]:
import dateutil.parser as parser
text = '10 January 2019 17:05'
date = parser.parse(text)
print(date.isoformat())

2019-01-10T17:05:00


In [8]:
from datetime import datetime
date = datetime.strptime('10 January 2019 17:05:00', '%d %B %Y %H:%M:%S')
date.isoformat()

'2019-01-10T17:05:00'

In [9]:
def isodateformat(dt):
    if '['in dt:
        id = dt.index('[')
        dt = dt[:id]
    d = dt.split()
    #print(d)
    cnt=0
    for x in d[1]:
        if x.isdigit():
            idx = d[1].index(x)
            cnt=1
            break
    if cnt==0:
        idx=len(d[1])
        d[1]+='00:00'
    
    time = d[1][idx:]
    d[1] = d[1][:idx]
    d.insert(2,'2019')
    time = time.split(':')
    if len(time)!=3:
        time.append('00')
    
    #print(time)
    date = ' '.join(d[:3])+'T'+':'.join(time)
    #print(date)
    date = datetime.strptime(date, '%d %B %YT%H:%M:%S')
    date.isoformat()
    isodate = 'T'.join(str(date).split(' '))+'+00:00'
    return isodate

In [10]:
dt = df.iloc[140]['Date and time (UTC)']
isodateformat(dt)

'2019-06-05T04:06:00+00:00'

In [11]:
sum(df['Date and time (UTC)'].str.startswith('←'))

22

# Cleaning Data

In [12]:
indextoremove = df[df['Date and time (UTC)'].str.startswith('←')].index

In [13]:
df.drop(indextoremove,inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [14]:
df['Date and time (UTC)'] = df['Date and time (UTC)'].apply(lambda dt : isodateformat(dt))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [15]:
df.iloc[5]['Date and time (UTC)']

'2019-01-15T00:00:00+00:00'

In [16]:
df.head()

Unnamed: 0,Date and time (UTC),Rocket,Flight number,1,Launch site,2,LS
3,2019-01-10T17:05:00+00:00,Long March 3B/E,3B-Y56[21],Xichang LC-2,Xichang LC-2,CASC,CASC
4,2019-01-10T17:05:00+00:00,Chinasat 2D,CNSA,Geosynchronous,Communications,In orbit,Operational
5,2019-01-11T15:31:00+00:00,Falcon 9 Block 5,F9-067,Vandenberg SLC-4E,Vandenberg SLC-4E,SpaceX,SpaceX
6,2019-01-11T15:31:00+00:00,Iridium NEXT 66-75,Iridium,Low Earth,Communications,In orbit,Operational
7,2019-01-15T00:00:00+00:00,Simorgh,,Semnan LP-2,Semnan LP-2,ISA,ISA


In [17]:
df = df.reset_index(drop=True)

In [18]:
df.head()

Unnamed: 0,Date and time (UTC),Rocket,Flight number,1,Launch site,2,LS
0,2019-01-10T17:05:00+00:00,Long March 3B/E,3B-Y56[21],Xichang LC-2,Xichang LC-2,CASC,CASC
1,2019-01-10T17:05:00+00:00,Chinasat 2D,CNSA,Geosynchronous,Communications,In orbit,Operational
2,2019-01-11T15:31:00+00:00,Falcon 9 Block 5,F9-067,Vandenberg SLC-4E,Vandenberg SLC-4E,SpaceX,SpaceX
3,2019-01-11T15:31:00+00:00,Iridium NEXT 66-75,Iridium,Low Earth,Communications,In orbit,Operational
4,2019-01-15T00:00:00+00:00,Simorgh,,Semnan LP-2,Semnan LP-2,ISA,ISA


In [19]:
unique_dates = df['Date and time (UTC)'].unique().tolist()

In [20]:
unique_dates[0]

'2019-01-10T17:05:00+00:00'

In [21]:
new_df = {}
for date in unique_dates:
    new_df[date] = df.loc[df['Date and time (UTC)'] == date]['LS'].tolist()
    

In [22]:
list(new_df.values())[:2]

[['CASC', 'Operational'], ['SpaceX', 'Operational']]

# Finding successful launches

In [25]:
successdates = []
for k,v in new_df.items():
    if 'Successful' in v or 'Operational' in v or 'En Route' in v: 
        successdates.append(k)

In [26]:
successdates[:3]

['2019-01-10T17:05:00+00:00',
 '2019-01-11T15:31:00+00:00',
 '2019-01-18T00:50:20+00:00']

In [27]:
unique_dates = {}
for dt in successdates:
    date = dt.split('T')[0]
    if date not in unique_dates:
        unique_dates[date]=1
    else:
        unique_dates[date]+=1
unique_dates.values()

dict_values([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1])

In [28]:
from datetime import date, timedelta

sdate = date(2019, 1, 1)   # start date
edate = date(2019, 12, 31)   # end date

delta = edate - sdate       # as timedelta

required_dict = {}

for i in range(delta.days + 1):
    day = sdate + timedelta(days=i)
    if str(day) in unique_dates.keys():
        required_dict[str(day)+'T00:00:00+00:00'] = unique_dates[str(day)]
    else:
        required_dict[str(day)+'T00:00:00+00:00'] = 0

In [None]:
# Required Values 

In [29]:
required_dict

{'2019-01-01T00:00:00+00:00': 0,
 '2019-01-02T00:00:00+00:00': 0,
 '2019-01-03T00:00:00+00:00': 0,
 '2019-01-04T00:00:00+00:00': 0,
 '2019-01-05T00:00:00+00:00': 0,
 '2019-01-06T00:00:00+00:00': 0,
 '2019-01-07T00:00:00+00:00': 0,
 '2019-01-08T00:00:00+00:00': 0,
 '2019-01-09T00:00:00+00:00': 0,
 '2019-01-10T00:00:00+00:00': 1,
 '2019-01-11T00:00:00+00:00': 1,
 '2019-01-12T00:00:00+00:00': 0,
 '2019-01-13T00:00:00+00:00': 0,
 '2019-01-14T00:00:00+00:00': 0,
 '2019-01-15T00:00:00+00:00': 0,
 '2019-01-16T00:00:00+00:00': 0,
 '2019-01-17T00:00:00+00:00': 0,
 '2019-01-18T00:00:00+00:00': 1,
 '2019-01-19T00:00:00+00:00': 1,
 '2019-01-20T00:00:00+00:00': 0,
 '2019-01-21T00:00:00+00:00': 1,
 '2019-01-22T00:00:00+00:00': 0,
 '2019-01-23T00:00:00+00:00': 0,
 '2019-01-24T00:00:00+00:00': 1,
 '2019-01-25T00:00:00+00:00': 0,
 '2019-01-26T00:00:00+00:00': 0,
 '2019-01-27T00:00:00+00:00': 0,
 '2019-01-28T00:00:00+00:00': 0,
 '2019-01-29T00:00:00+00:00': 0,
 '2019-01-30T00:00:00+00:00': 0,
 '2019-01-

# Writing to CSV

In [31]:
data = pd.DataFrame.from_dict(required_dict, orient='index',columns = None)

In [32]:
data = data.reset_index()

In [33]:
data.columns = ['date', 'value']

In [34]:
data.to_csv('launch.csv')

In [37]:
data.head()

Unnamed: 0,date,value
0,2019-01-01T00:00:00+00:00,0
1,2019-01-02T00:00:00+00:00,0
2,2019-01-03T00:00:00+00:00,0
3,2019-01-04T00:00:00+00:00,0
4,2019-01-05T00:00:00+00:00,0
