In [1]:
import pandas as pd
import datetime, time
import numpy as np
from sqlalchemy import create_engine
import os 

In [2]:
pd.set_option('display.max_columns', 20)

In [3]:
astr_df = pd.read_csv('astronauts.csv')
spacewalks_df = pd.read_csv('space_walks.csv')
spacemissions_df = pd.read_csv('space_missions.csv')
global_launches_df = pd.read_csv('global_space_launches.csv')

In [22]:
astr_df.head(1)

Unnamed: 0,Name,Year,Group,Status,Birth Date,Birth Place,Gender,Alma Mater,Undergraduate Major,Graduate Major,Military Rank,Military Branch,Space Flights,Space Flight (hr),Space Walks,Space Walks (hr),Missions,Death Date,Death Mission
0,Joseph M. Acaba,2004.0,19.0,Active,5/17/1967,"Inglewood, CA",Male,University of California-Santa Barbara; Univer...,Geology,Geology,,,2,3307,2,13.0,"STS-119 (Discovery), ISS-31/32 (Soyuz)",,


In [None]:
spacewalks_df.head(1)

In [None]:
spacemissions_df.head(1)

In [None]:
global_launches_df.head(1)

## Dropping columns

In [4]:
spacemissions_df.drop(labels=['Unnamed: 0', 'Unnamed: 0.1'], axis=1, inplace=True)
global_launches_df.drop(labels=['DateTime','Year','Month','Day','Time'], axis=1, inplace=True)

## Date conversions

In [5]:
date_df = pd.DataFrame({"Global Launches Dates": global_launches_df.Date, "Spacemissions Dates": spacemissions_df.Datum
                       ,"Spacewalks Dates": spacewalks_df.Date})
date_df.head()

Unnamed: 0,Global Launches Dates,Spacemissions Dates,Spacewalks Dates
0,07/08/2020,"Fri Aug 07, 2020 05:12 UTC",06/03/1965
1,06/08/2020,"Thu Aug 06, 2020 04:01 UTC","March 16-17, 1966"
2,04/08/2020,"Tue Aug 04, 2020 23:57 UTC",06/05/1966
3,30/07/2020,"Thu Jul 30, 2020 21:25 UTC",07/19/1966
4,30/07/2020,"Thu Jul 30, 2020 11:50 UTC",07/20/1966


In [6]:
date_df.dtypes

Global Launches Dates    object
Spacemissions Dates      object
Spacewalks Dates         object
dtype: object

In [7]:
## Splicing string to get rid of time and timezone
spacemissions_df.Datum = spacemissions_df.Datum.apply(lambda x: x[0:16])

In [8]:
# Built-in Pandas datetime function

global_launches_df.Date = pd.to_datetime(global_launches_df.Date, format='%d/%m/%Y', errors = "coerce")

spacemissions_df.Datum = pd.to_datetime(spacemissions_df.Datum, format= '%a %b %d, %Y', errors='coerce')

spacewalks_df.Date = pd.to_datetime(spacewalks_df.Date, format= "%m/%d/%Y", errors="coerce")

In [9]:
# Re-running date_df to verify conversions were successful

In [10]:
date_df = pd.DataFrame({"Global Launches Dates": global_launches_df.Date, "Spacemissions Dates": spacemissions_df.Datum
                       ,"Spacewalks Dates": spacewalks_df.Date})
date_df.head()

Unnamed: 0,Global Launches Dates,Spacemissions Dates,Spacewalks Dates
0,2020-08-07,2020-08-07,1965-06-03
1,2020-08-06,2020-08-06,NaT
2,2020-08-04,2020-08-04,1966-06-05
3,2020-07-30,2020-07-30,1966-07-19
4,2020-07-30,2020-07-30,1966-07-20


In [11]:
date_df.dtypes

Global Launches Dates    datetime64[ns]
Spacemissions Dates      datetime64[ns]
Spacewalks Dates         datetime64[ns]
dtype: object

## Renaming columns for ease of access in pgAdmin

In [24]:
global_launches_df.columns = ['company_name', 'location', 'detail', 'status_rocket', 'rocket',
       'status_mission', 'country_of_launch', 'company_country_origin',
       'private_or_state', 'date']

spacemissions_df.columns = ['company_name', 'location', 'date', 'detail', 'status_rocket',
       'rocket', 'status_mission']

spacewalks_df.columns = ['eva#', 'country', 'crew', 'vehicle', 'date', 'duration', 'purpose']

astr_df.columns = ['name', 'year', 'group', 'status', 'birth_date', 'birth_place',
       'gender', 'alma_mater', 'undergraduate_major', 'graduate_major',
       'military_rank', 'military_branch', 'space_flights',
       'space_flight(hr)', 'space_walks', 'space_walks(hr)', 'missions',
       'death_date', 'death_mission']

## Creating connection to space_db  and converting dataframes to sql tables

In [28]:
password = os.environ.get('postgres_password')

In [15]:
engine = create_engine(f"postgresql://postgres:{password}@localhost:5432/space_db")

In [29]:
global_launches_df.to_sql("global_launches", engine)
spacemissions_df.to_sql("space_missions", engine)

In [30]:
spacewalks_df.to_sql("spacewalks", engine)

In [38]:
spacemissions_df.loc[spacemissions_df.date == '1969-07-20']

Unnamed: 0,company_name,location,date,detail,status_rocket,rocket,status_mission


In [37]:
spacewalks_df.head(50)

Unnamed: 0,eva#,country,crew,vehicle,date,duration,purpose
0,1.0,USA,Ed White,Gemini IV,1965-06-03,0:36,First U.S. EVA. Used HHMU and took photos. G...
1,2.0,USA,David Scott,Gemini VIII,NaT,0:00,HHMU EVA cancelled before starting by stuck on...
2,3.0,USA,Eugene Cernan,Gemini IX-A,1966-06-05,2:07,"Inadequate restraints, stiff 25ft umbilical an..."
3,4.0,USA,Mike Collins,Gemini X,1966-07-19,0:50,Standup EVA. UV photos of stars. Ended by ey...
4,5.0,USA,Mike Collins,Gemini X,1966-07-20,0:39,Retrieved MMOD experiment from docked Agena. ...
5,6.0,USA,Richard Gordon,Gemini XI,1966-09-13,0:44,Attached tether between Agena and Gemini. EVA...
6,7.0,USA,Richard Gordon,Gemini XI,1966-09-14,2:10,Standup EVA. Took star photos. Agena tether ops
7,8.0,USA,Buzz Aldrin,Gemini XII,1966-11-12,2:29,Standup EVA. Science tasks. Took star photos
8,9.0,USA,Buzz Aldrin,Gemini XII,1966-11-13,2:06,Attached tether between Agena and Gemini. UV ...
9,10.0,USA,Buzz Aldrin,Gemini XII,1966-11-14,0:55,Standup EVA. Jettisoned equipment. Took photos
