# Objectives:
* Importing raw CSV Master Data Table
* Removing comma seperation for number >1000
* Selectively choosing and exporting data pertinent to the planned visualizations
* Splitting one series to multiple, convert data type
* Performing quick value_counts for some columns

## Import Dependencies

In [1]:
import pandas as pd
import csv
import os
import datetime
import numpy as np

## Import CSV

In [2]:
csv_path = os.path.join ("Data", "UCS_Satellite_Master.csv")
print (csv_path)

Data\UCS_Satellite_Master.csv


In [3]:
with open (csv_path, "r", encoding="UTF-8", newline="") as file_handler:
    data = csv.reader(file_handler, delimiter=",")

In [4]:
# unicode_escape is used as there are other non-ASCII characters 
df = pd.read_csv(csv_path, index_col=0, encoding= 'cp1252')
df

Unnamed: 0_level_0,Country_of_Operator_Owner,Operator_Owner,Users_Names,Purpose,Orbit_Classes,Orbit_Types,Perigee_km,Apogee_km,Inclination_degrees,Period_minutes,...,Power_watts,Launch_Date,Expected_Lifetime_years,Contractors_Names,Contractors_Country_Names,Launch_Site,Launch_Vehicle,COSPAR_Number,NORAD_Number,Comments
Satellite_Names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1HOPSAT-TD (1st-generation High Optical Performance Satellite),USA,Hera Systems,Commercial,Earth Observation,LEO,Non-Polar Inclined,566,576,36.90,96.08,...,0,12/11/2019,0.5,Hera Systems,USA,Satish Dhawan Space Centre,PSLV,2019-089H,44589,Pathfinder for planned earth observation const...
"TDRS-3 (Tracking and Data Relay Satellite, TDRS-C)",USA,National Aeronautics and Space Administration ...,Government,Communications,GEO,Not Available,35693,35878,11.53,1436.06,...,1800,9/29/1988,10.0,TRW Defense and Space Systems Group,USA,Cape Canaveral,Space Shuttle (STS 26),1988-091B,19548,Backup; still partially operational.
FLTSATCOM-8 (USA 46),USA,US Navy,Military,Communications,GEO,Not Available,35745,35829,8.97,1436.13,...,0,9/25/1989,5.0,"TRW, Defense and Space Systems Group",USA,Cape Canaveral,Atlas Centaur,1989-077A,20253,Old system replaced by UFO satellites; this sa...
AAUSat-4,Denmark,University of Aalborg,Civil,Earth Observation,LEO,Sun-Synchronous,442,687,98.20,95.9,...,0,4/25/2016,0.0,University of Aalborg,Denmark,Guiana Space Center,Soyuz 2.1a,2016-025E,41460,Carries AIS system.
Skynet 4C,United Kingdom,Intelsat/Paradigm Secure Communications (wholl...,Military,Communications,GEO,Not Available,35775,35797,13.60,1436.07,...,1200,8/30/1990,7.0,Astrium,France/UK/Germany,Guiana Space Center,Ariane 44LP,1990-079A,20776,Spare. In March 2010 it was announced that the...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zhuhai-1-03 (OVS-3),China,Zhuhai Orbita Control Engineering Co. Ltd.,Commercial,Earth Observation,LEO,Sun-Synchronous,494,511,97.40,94.6,...,0,9/19/2019,0.0,Zhuhai Orbita Control Engineering Co. Ltd.,China,Jiuquan Satellite Launch Center,Long March 11,2019-060A,44534,Not Available
Ziyuan 1-02C,China,China Centre for Resources Satellite Data and ...,Government,Earth Observation,LEO,Sun-Synchronous,763,773,98.56,100.2,...,0,12/22/2011,0.0,China Academy of Space Technology (CAST),China,Taiyuan Launch Center,Long March 4B,2011-079A,38038,Can acquire high-resolution data through remot...
Ziyuan 1-2D,China,China Centre for Resources Satellite Data and ...,Government,Earth Observation,LEO,Sun-Synchronous,748,758,98.50,99.8,...,0,9/14/2019,5.0,China Academy of Space Technology (CAST),China,Taiyuan Launch Center,Long March 4B,2019-059A,44528,Hyperspectral imaging
Ziyuan 3 (ZY-3),China,China Centre for Resources Satellite Data and ...,Government,Earth Observation,LEO,Sun-Synchronous,500,504,97.50,94.7,...,0,1/9/2012,4.0,China Academy of Space Technology (CAST),China,Taiyuan Launch Center,Long March 4B,2012-001A,38046,Land survey satellite.


In [5]:
# reset default index
# create new columns called Orig_Sequence to make sure even when data is splitting
# the order of the original data is preserved
df.reset_index(inplace=True)
df.insert(0, "Orig_Sequence", df.index)

In [6]:
pd.set_option('display.max_columns', None)
df.head()

Unnamed: 0,Orig_Sequence,Satellite_Names,Country_of_Operator_Owner,Operator_Owner,Users_Names,Purpose,Orbit_Classes,Orbit_Types,Perigee_km,Apogee_km,Inclination_degrees,Period_minutes,Launch_Mass_km,Dry_Mass_kg,Power_watts,Launch_Date,Expected_Lifetime_years,Contractors_Names,Contractors_Country_Names,Launch_Site,Launch_Vehicle,COSPAR_Number,NORAD_Number,Comments
0,0,1HOPSAT-TD (1st-generation High Optical Perfor...,USA,Hera Systems,Commercial,Earth Observation,LEO,Non-Polar Inclined,566,576,36.9,96.08,22,0,0,12/11/2019,0.5,Hera Systems,USA,Satish Dhawan Space Centre,PSLV,2019-089H,44589,Pathfinder for planned earth observation const...
1,1,"TDRS-3 (Tracking and Data Relay Satellite, TDR...",USA,National Aeronautics and Space Administration ...,Government,Communications,GEO,Not Available,35693,35878,11.53,1436.06,3180,1600,1800,9/29/1988,10.0,TRW Defense and Space Systems Group,USA,Cape Canaveral,Space Shuttle (STS 26),1988-091B,19548,Backup; still partially operational.
2,2,FLTSATCOM-8 (USA 46),USA,US Navy,Military,Communications,GEO,Not Available,35745,35829,8.97,1436.13,2310,1884,0,9/25/1989,5.0,"TRW, Defense and Space Systems Group",USA,Cape Canaveral,Atlas Centaur,1989-077A,20253,Old system replaced by UFO satellites; this sa...
3,3,AAUSat-4,Denmark,University of Aalborg,Civil,Earth Observation,LEO,Sun-Synchronous,442,687,98.2,95.9,1,0,0,4/25/2016,0.0,University of Aalborg,Denmark,Guiana Space Center,Soyuz 2.1a,2016-025E,41460,Carries AIS system.
4,4,Skynet 4C,United Kingdom,Intelsat/Paradigm Secure Communications (wholl...,Military,Communications,GEO,Not Available,35775,35797,13.6,1436.07,1474,850,1200,8/30/1990,7.0,Astrium,France/UK/Germany,Guiana Space Center,Ariane 44LP,1990-079A,20776,Spare. In March 2010 it was announced that the...


In [7]:
df.columns

Index(['Orig_Sequence', 'Satellite_Names', 'Country_of_Operator_Owner',
       'Operator_Owner', 'Users_Names', 'Purpose', 'Orbit_Classes',
       'Orbit_Types', 'Perigee_km', 'Apogee_km', 'Inclination_degrees',
       'Period_minutes', 'Launch_Mass_km', 'Dry_Mass_kg', 'Power_watts',
       'Launch_Date', 'Expected_Lifetime_years', 'Contractors_Names',
       'Contractors_Country_Names', 'Launch_Site', 'Launch_Vehicle',
       'COSPAR_Number', 'NORAD_Number', 'Comments'],
      dtype='object')

In [8]:
# function to remove non-ASCII
def remove_non_ascii(text):
    return ''.join([i if ord(i) < 128 else '' for i in text])

In [9]:
targ_cols = ['Satellite_Names', 'Country_of_Operator_Owner', 'Operator_Owner',
       'Users_Names', 'Purpose', 'Orbit_Classes', 'Orbit_Types', 'Contractors_Names', 'Contractors_Country_Names', 'Launch_Site',
       'Launch_Vehicle', 'Comments']
i=0
for each_col in targ_cols:
    print(f"{('=')*20}\nProcessing columns {each_col}")
    df[each_col] = df[each_col].apply(remove_non_ascii)
    i+=1
    print(f"{i}/{len(targ_cols)} columns")
          
    if i == len(targ_cols):
          print(f'{("=")*20}\nProcess Completed!')

Processing columns Satellite_Names
1/12 columns
Processing columns Country_of_Operator_Owner
2/12 columns
Processing columns Operator_Owner
3/12 columns
Processing columns Users_Names
4/12 columns
Processing columns Purpose
5/12 columns
Processing columns Orbit_Classes
6/12 columns
Processing columns Orbit_Types
7/12 columns
Processing columns Contractors_Names
8/12 columns
Processing columns Contractors_Country_Names
9/12 columns
Processing columns Launch_Site
10/12 columns
Processing columns Launch_Vehicle
11/12 columns
Processing columns Comments
12/12 columns
Process Completed!


In [10]:
# export master with orig seq to csv
df.to_csv(os.path.join("Data", "Satellite_Launch_MasterCL.csv"), index=False)
df

Unnamed: 0,Orig_Sequence,Satellite_Names,Country_of_Operator_Owner,Operator_Owner,Users_Names,Purpose,Orbit_Classes,Orbit_Types,Perigee_km,Apogee_km,Inclination_degrees,Period_minutes,Launch_Mass_km,Dry_Mass_kg,Power_watts,Launch_Date,Expected_Lifetime_years,Contractors_Names,Contractors_Country_Names,Launch_Site,Launch_Vehicle,COSPAR_Number,NORAD_Number,Comments
0,0,1HOPSAT-TD (1st-generation High Optical Perfor...,USA,Hera Systems,Commercial,Earth Observation,LEO,Non-Polar Inclined,566,576,36.90,96.08,22,0,0,12/11/2019,0.5,Hera Systems,USA,Satish Dhawan Space Centre,PSLV,2019-089H,44589,Pathfinder for planned earth observation const...
1,1,"TDRS-3 (Tracking and Data Relay Satellite, TDR...",USA,National Aeronautics and Space Administration ...,Government,Communications,GEO,Not Available,35693,35878,11.53,1436.06,3180,1600,1800,9/29/1988,10.0,TRW Defense and Space Systems Group,USA,Cape Canaveral,Space Shuttle (STS 26),1988-091B,19548,Backup; still partially operational.
2,2,FLTSATCOM-8 (USA 46),USA,US Navy,Military,Communications,GEO,Not Available,35745,35829,8.97,1436.13,2310,1884,0,9/25/1989,5.0,"TRW, Defense and Space Systems Group",USA,Cape Canaveral,Atlas Centaur,1989-077A,20253,Old system replaced by UFO satellites; this sa...
3,3,AAUSat-4,Denmark,University of Aalborg,Civil,Earth Observation,LEO,Sun-Synchronous,442,687,98.20,95.9,1,0,0,4/25/2016,0.0,University of Aalborg,Denmark,Guiana Space Center,Soyuz 2.1a,2016-025E,41460,Carries AIS system.
4,4,Skynet 4C,United Kingdom,Intelsat/Paradigm Secure Communications (wholl...,Military,Communications,GEO,Not Available,35775,35797,13.60,1436.07,1474,850,1200,8/30/1990,7.0,Astrium,France/UK/Germany,Guiana Space Center,Ariane 44LP,1990-079A,20776,Spare. In March 2010 it was announced that the...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2661,2661,Zhuhai-1-03 (OVS-3),China,Zhuhai Orbita Control Engineering Co. Ltd.,Commercial,Earth Observation,LEO,Sun-Synchronous,494,511,97.40,94.6,55,0,0,9/19/2019,0.0,Zhuhai Orbita Control Engineering Co. Ltd.,China,Jiuquan Satellite Launch Center,Long March 11,2019-060A,44534,Not Available
2662,2662,Ziyuan 1-02C,China,China Centre for Resources Satellite Data and ...,Government,Earth Observation,LEO,Sun-Synchronous,763,773,98.56,100.2,1500,0,0,12/22/2011,0.0,China Academy of Space Technology (CAST),China,Taiyuan Launch Center,Long March 4B,2011-079A,38038,Can acquire high-resolution data through remot...
2663,2663,Ziyuan 1-2D,China,China Centre for Resources Satellite Data and ...,Government,Earth Observation,LEO,Sun-Synchronous,748,758,98.50,99.8,2650,0,0,9/14/2019,5.0,China Academy of Space Technology (CAST),China,Taiyuan Launch Center,Long March 4B,2019-059A,44528,Hyperspectral imaging
2664,2664,Ziyuan 3 (ZY-3),China,China Centre for Resources Satellite Data and ...,Government,Earth Observation,LEO,Sun-Synchronous,500,504,97.50,94.7,2650,0,0,1/9/2012,4.0,China Academy of Space Technology (CAST),China,Taiyuan Launch Center,Long March 4B,2012-001A,38046,Land survey satellite.


### Satellite Counts by Country / 46 years

In [11]:
# new sliced table called Sat_46_yr :: 
# the historical countries that have satellite in the last 40 years
Sat_46_yr = df[["Orig_Sequence", "Country_of_Operator_Owner"]]

In [12]:
Sat_count_46yr = Sat_46_yr['Country_of_Operator_Owner'].value_counts(dropna=True).to_frame()
Sat_count_46yr.reset_index(inplace=True)

In [13]:
Sat_count_46yr.rename(columns={"index":"Country_of_Operator_Owner", "Country_of_Operator_Owner":"Value_Counts"}, inplace=True)
Sat_count_46yr.insert(0, "Serial_Number", Sat_count_46yr.index)

In [14]:
Sat_count_46yr

Unnamed: 0,Serial_Number,Country_of_Operator_Owner,Value_Counts
0,0,USA,1308
1,1,China,356
2,2,Russia,167
3,3,United Kingdom,130
4,4,Japan,78
...,...,...,...
98,98,Iran,1
99,99,Morocco/Germany,1
100,100,Colombia,1
101,101,Ecuador,1


In [15]:
# export to csv
Sat_count_46yr.to_csv(os.path.join("Data", "Launch_by_Country_46yr.csv"), index=False)

### Satellite Launch Date

In [16]:
# new sliced table to contains launch date and broken down to 3 more columns
# day, month, and year
df['Launch_Date'] = pd.to_datetime(df['Launch_Date'])

In [17]:
df["Launch_Day"] = df["Launch_Date"].dt.day
df["Launch_Month"] = df["Launch_Date"].dt.month
df["Launch_Year"] = df["Launch_Date"].dt.year
Launch_Date = df[["Orig_Sequence", "Launch_Date", "Launch_Day", "Launch_Month","Launch_Year"]]

In [18]:
Launch_Date.head()


Unnamed: 0,Orig_Sequence,Launch_Date,Launch_Day,Launch_Month,Launch_Year
0,0,2019-12-11,11,12,2019
1,1,1988-09-29,29,9,1988
2,2,1989-09-25,25,9,1989
3,3,2016-04-25,25,4,2016
4,4,1990-08-30,30,8,1990


In [19]:
export to csv
Launch_Date.to_csv(os.path.join("Data", "Launch_Date.csv"), index=False)

In [20]:
day_counts = pd.DataFrame(Launch_Date['Launch_Day'].value_counts())
day_counts.sort_index(inplace=True, ascending=True)
day_counts.reset_index(inplace=True)
day_counts.rename(columns={"index": "Day", "Launch_Day" : "Day_Counts"}, inplace=True)
day_counts

Unnamed: 0,Day,Day_Counts
0,1,61
1,2,37
2,3,79
3,4,43
4,5,73
5,6,89
6,7,108
7,8,62
8,9,57
9,10,52


In [21]:
month_counts = pd.DataFrame(Launch_Date['Launch_Month'].value_counts())
month_counts.sort_index(inplace=True, ascending=True)
month_counts.reset_index(inplace=True)
month_counts.rename(columns={"index": "Month", "Launch_Month" : "Month_Counts"}, inplace=True)
month_counts

Unnamed: 0,Month,Month_Counts
0,1,287
1,2,302
2,3,205
3,4,141
4,5,168
5,6,222
6,7,235
7,8,134
8,9,175
9,10,180


In [22]:
year_counts = pd.DataFrame(Launch_Date['Launch_Year'].value_counts())
year_counts.sort_index(inplace=True, ascending=True)
year_counts.reset_index(inplace=True)
year_counts.rename(columns={"index": "Year", "Launch_Year" : "Year_Counts"}, inplace=True)
year_counts

Unnamed: 0,Year,Year_Counts
0,1974,1
1,1988,1
2,1989,1
3,1990,2
4,1991,1
5,1992,1
6,1993,3
7,1994,4
8,1995,4
9,1996,8


### DemoGraphic Data

In [None]:
# new sliced table for demographic information of each satellite names
demoGData = df[['Orig_Sequence', 'Satellite_Names', 'Launch_Date', 'Country_of_Operator_Owner', 'Launch_Site', 'Operator_Owner',
       'Purpose', 'Orbit_Classes', 'Period_minutes', 'Launch_Mass_km',
       ]]


In [None]:
# remove comma thoudsand seperator, then convert from string to float
demoGData['Period_minutes'] = round(demoGData['Period_minutes'].str.replace(',', '', regex=True).astype('float'),2)

In [None]:
# create new columns period hours from minutes
demoGData.insert(8, "Period_Hours", round(demoGData['Period_minutes']/60,2), allow_duplicates = False)

In [None]:
# export to csv
demoGData.to_csv(os.path.join("Data", "Demographic_Data.csv"), index=False)