# Objectives:
* Importing raw CSV Master Data Table
* Removing comma seperation for number >1000
* Selectively choosing and exporting data pertinent to the planned visualizations
* Splitting one series to multiple, convert data type
* Performing quick value_counts for some columns

## Import Dependencies

In [1]:
import pandas as pd
import csv
import os
import datetime

## Import CSV

In [2]:
csv_path = os.path.join ("Data", "UCS-Satellite_Master.csv")
print (csv_path)

Data\UCS-Satellite_Master.csv


In [3]:
with open (csv_path, "r", encoding="UTF-8", newline="") as file_handler:
    data = csv.reader(file_handler, delimiter=",")

In [4]:
# unicode_escape is used as there are other non-ASCII characters 
df = pd.read_csv(csv_path, index_col=0, encoding= 'unicode_escape')

In [5]:
# reset default index
# create new columns called Orig_Sequence to make sure even when data is splitting
# the order of the original data is preserved
df.reset_index(inplace=True)
df["Orig_Sequence"] = df.index

In [6]:
df.columns

Index(['Satellite_Names', 'Country_of_Operator_Owner', 'Operator_Owner',
       'Users_Names', 'Purpose', 'Orbit_Classes', 'Orbit_Types', 'Perigee_km',
       'Apogee_km', 'Inclination_degrees', 'Period_minutes', 'Launch_Mass_km',
       'Dry_Mass_kg', 'Power_watts', 'Launch_Date', 'Expected_Lifetime_years',
       'Contractors_Names', 'Contractors_Country_Names', 'Launch_Site',
       'Launch_Vehicle', 'COSPAR_Number', 'NORAD_Number', 'Comments',
       'Orig_Sequence'],
      dtype='object')

In [7]:
# new sliced table called Sat_40_yr :: 
# the historical countries that have satellite in the last 40 years
Sat_40_yr = df[["Orig_Sequence", "Country_of_Operator_Owner"]]

In [8]:
Sat_count_40yr = Sat_40_yr['Country_of_Operator_Owner'].value_counts(dropna=True).to_frame()
Sat_count_40yr.reset_index(inplace=True)

In [9]:
Sat_count_40yr.rename(columns={"index":"Country_of_Operator_Owner", "Country_of_Operator_Owner":"Value_Counts"}, inplace=True)

In [10]:
# export to csv
Sat_count_40yr.to_csv(os.path.join("Data", "satCount40yr.csv"))

In [11]:
# new sliced table to contains launch date and broken down to 3 more columns
# day, month, and year
df['Launch_Date'] = pd.to_datetime(df['Launch_Date'])

In [12]:
df["Launch_Day"] = df["Launch_Date"].dt.day
df["Launch_Month"] = df["Launch_Date"].dt.month
df["Launch_Year"] = df["Launch_Date"].dt.year
Launch_Date = df[["Launch_Date", "Launch_Day", "Launch_Month","Launch_Year"]]

In [13]:
# export to csv
Launch_Date.to_csv(os.path.join("Data", "launchDate.csv"))

In [14]:
# new sliced table for demographic information of each satellite names
demoGData = df[['Orig_Sequence', 'Satellite_Names', 'Launch_Date', 'Country_of_Operator_Owner', 'Launch_Site', 'Operator_Owner',
       'Purpose', 'Orbit_Classes', 'Period_minutes', 'Launch_Mass_km',
       ]]


In [15]:
# remove comma thoudsand seperator, then convert from string to float
demoGData['Period_minutes'] = round(demoGData['Period_minutes'].str.replace(',', '', regex=True).astype('float'),2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [16]:
# create new columns period hours from minutes
demoGData.insert(8, "Period_Hours", round(demoGData['Period_minutes']/60,2), allow_duplicates = False)

In [18]:
# export to csv
demoGData.to_csv(os.path.join("Data", "demoGData.csv"))