In [86]:
import pandas as pd
import plotly.express as px

In [87]:
df = pd.read_csv('../data/psatcat.csv').drop(0, axis=0)

In [88]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19250 entries, 1 to 19250
Data columns (total 24 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   #JCAT       19250 non-null  object
 1   Piece       19250 non-null  object
 2   Name        19250 non-null  object
 3   LDate       19250 non-null  object
 4   TLast       19250 non-null  object
 5   TOp         19250 non-null  object
 6   TDate       19250 non-null  object
 7   TF          19250 non-null  object
 8   Program     19250 non-null  object
 9   Plane       19250 non-null  object
 10  Att         19250 non-null  object
 11  Mvr         19250 non-null  object
 12  Class       19250 non-null  object
 13  Category    19250 non-null  object
 14  UNState     19250 non-null  object
 15  UNReg       19250 non-null  object
 16  UNPeriod    19250 non-null  object
 17  UNPerigee   19250 non-null  object
 18  UNApogee    19250 non-null  object
 19  UNInc       19250 non-null  object
 20  Result

In [89]:
df = df.rename(columns={
    '#JCAT': 'GCAT', 
    'Piece': 'SatPiece', 
    'Name': 'SatName', 
    'LDate': 'LaunchDate', 
    'TLast': 'LastKnownActiveTime', 
    'TOp': 'EndOperationDate', 
    'TDate': 'EndTransmissionDate', 
    'TF': 'EndTransmissionFlag',
    'Program': 'Program', 
    'Plane': 'OrbitalPlane', 
    'Att': 'AttitudeControl', 
    'Mvr': 'ManeuverabilityFlag', 
    'Class': 'Class', 
    'Category': 'Category', 
    'UNState': 'UNState',
    'UNReg': 'UNReg', 
    'UNPeriod':'UNPeriod', 
    'UNPerigee':'UNPerigee', 
    'UNApogee':'UNApogee', 
    'UNInc':'UNInc', 
    'Result': 'Result',
    'Control':'ControlCenter', 
    'Discipline':'Discipline', 
    'Comment':'Comment'
})

In [90]:
df.columns

Index(['GCAT', 'SatPiece', 'SatName', 'LaunchDate', 'LastKnownActiveTime',
       'EndOperationDate', 'EndTransmissionDate', 'EndTransmissionFlag',
       'Program', 'OrbitalPlane', 'AttitudeControl', 'ManeuverabilityFlag',
       'Class', 'Category', 'UNState', 'UNReg', 'UNPeriod', 'UNPerigee',
       'UNApogee', 'UNInc', 'Result', 'ControlCenter', 'Discipline',
       'Comment'],
      dtype='object')

In [91]:
df['Class'].value_counts()

Class
B     10514
D      5060
C      2889
A       768
CD        7
BD        7
CB        4
BC        1
Name: count, dtype: int64

In [92]:
df['LaunchDate'] = pd.to_datetime(df['LaunchDate'], errors='coerce')
df['EndTransmissionDate'] = pd.to_datetime(df['EndTransmissionDate'], errors='coerce')

In [93]:
df['LaunchYear'] = df['LaunchDate'].dt.year
df['LaunchMonthYear'] = df['LaunchDate'].dt.to_period('M').astype(str)

In [94]:
df = df.dropna(subset=['Class', 'LaunchDate'])

In [95]:
col_map = {
    "B": "Commercial",
    "D": "Defense",
    "C": "Civil",
    "A": "Amateur",
    "CD": "Civil",
    "BD": "Commercial",
    "CB": "Civil",
    "BC": "Commercial"
}
df['Class'] = df['Class'].map(col_map)

In [96]:
df.shape

(19250, 26)

In [97]:
df['Class'].value_counts()

Class
Commercial    10522
Defense        5060
Civil          2900
Amateur         768
Name: count, dtype: int64

In [98]:
launch_count_by_sat_class = df.groupby(['LaunchYear', 'Class']).size().reset_index(name='LaunchCount')

In [99]:
fig = px.bar(launch_count_by_sat_class, x='LaunchYear', y='LaunchCount', color='Class',
             title='Annual Number of Launches',
             labels={'LaunchYear': 'Year', 'LaunchCount': 'Number of Launches'},
             color_discrete_sequence=['#2c57c9', '#8d50d0', '#c95574', '#0b786c', '#ab7310', '#ca78cc'], opacity = 0.8,
             width=1100)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
), legend_title=None, template='plotly_dark')
fig.show()