# Saaf

In [13]:
import pandas as pd
import numpy as np

# Load the SAAF CSV file
file_path = 'context--2008-08-22_2010-07-10--saaf.csv'  # Replace with your actual file path
saaf_data = pd.read_csv(file_path)

# Convert ut_ms to a timestamp in datetime format
saaf_data['ut_ms'] = pd.to_datetime(saaf_data['ut_ms'], unit='ms')

# Set the timestamp as the index
saaf_data.set_index('ut_ms', inplace=True)

# Drop the original ut_ms column as it's no longer needed
#saaf_data.drop(columns=['ut_ms'], inplace=True)

# Resample the data to 15-minute intervals, using mean values for the aggregation
saaf_data_resampled = saaf_data.resample('15T').mean() 

# Create cosine transformations of the angles sa, sx, sy, sz
saaf_data_resampled['cos_sa'] = np.cos(np.radians(saaf_data_resampled['sa']))
saaf_data_resampled['cos_sx'] = np.cos(np.radians(saaf_data_resampled['sx']))
saaf_data_resampled['cos_sy'] = np.cos(np.radians(saaf_data_resampled['sy']))
saaf_data_resampled['cos_sz'] = np.cos(np.radians(saaf_data_resampled['sz']))

# Display the first few rows of the preprocessed data
print(saaf_data_resampled.head())


                        sa      sx      sy          sz   cos_sa    cos_sx  \
ut_ms                                                                       
2010-07-10 00:00:00  0.255  28.145  90.255  118.145000  0.99999  0.881757   
2010-07-10 00:15:00  0.255  28.145  90.255  118.145000  0.99999  0.881757   
2010-07-10 00:30:00  0.255  28.145  90.255  118.145000  0.99999  0.881757   
2010-07-10 00:45:00  0.255  28.145  90.255  118.140667  0.99999  0.881757   
2010-07-10 01:00:00  0.255  28.142  90.255  118.135000  0.99999  0.881781   

                       cos_sy    cos_sz  
ut_ms                                    
2010-07-10 00:00:00 -0.004451 -0.471705  
2010-07-10 00:15:00 -0.004451 -0.471705  
2010-07-10 00:30:00 -0.004451 -0.471705  
2010-07-10 00:45:00 -0.004451 -0.471638  
2010-07-10 01:00:00 -0.004451 -0.471551  


  saaf_data_resampled = saaf_data.resample('15T').mean()


In [14]:
saaf_data_resampled

Unnamed: 0_level_0,sa,sx,sy,sz,cos_sa,cos_sx,cos_sy,cos_sz
ut_ms,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-07-10 00:00:00,0.255000,28.145000,90.255000,118.145000,0.999990,0.881757,-4.450575e-03,-0.471705
2010-07-10 00:15:00,0.255000,28.145000,90.255000,118.145000,0.999990,0.881757,-4.450575e-03,-0.471705
2010-07-10 00:30:00,0.255000,28.145000,90.255000,118.145000,0.999990,0.881757,-4.450575e-03,-0.471705
2010-07-10 00:45:00,0.255000,28.145000,90.255000,118.140667,0.999990,0.881757,-4.450575e-03,-0.471638
2010-07-10 01:00:00,0.255000,28.142000,90.255000,118.135000,0.999990,0.881781,-4.450575e-03,-0.471551
...,...,...,...,...,...,...,...,...
2012-05-26 22:45:00,2.537692,5.515769,89.990000,95.515769,0.999019,0.995370,1.745329e-04,-0.096120
2012-05-26 23:00:00,3.200000,0.035000,90.000000,90.035000,0.998441,1.000000,6.123234e-17,-0.000611
2012-05-26 23:15:00,0.882500,0.002500,90.000000,89.997500,0.999881,1.000000,6.123234e-17,0.000044
2012-05-26 23:30:00,1.106923,3.511538,89.996538,90.058462,0.999813,0.998122,6.041524e-05,-0.001020


# Dmop

In [15]:
import pandas as pd

# Define the list of all known groups
all_groups = ['AXXX', 'AAAA', 'ASEQ', 'ATTT', 'APSF', 'AMMM', 'MOCS', 'PENS', 'PENE', 'MPER',
              'MOCE', 'MAPO', 'SCMN', 'AOOO', 'ASSS', 'AHHH', 'ASXX', 'AVVV', 'ATMB', 'PPNS',
              'PPNE', 'APWF', 'UPBS', 'UPBE', 'PDNS', 'PDNE', 'UDBS', 'UDBE', 'AACF', 'OBCP',
              'ADMC', 'Trig', 'DISA']

# Load the DMOP CSV file (replace the file path with your actual path)
dmop_data = pd.read_csv('context--2008-08-22_2010-07-10--dmop.csv')

# Convert 'ut_ms' to a timestamp in datetime format
dmop_data['ut_ms'] = pd.to_datetime(dmop_data['ut_ms'], unit='ms')

# Set the timestamp as the index
dmop_data.set_index('ut_ms', inplace=True)

# Extract the first 4 characters as the subsystem group and the rest as the command
dmop_data['subsystem_group'] = dmop_data['subsystem'].str[:4]
dmop_data['subsystem_command'] = dmop_data['subsystem'].str[4:]

# Resample the data to 15-minute intervals, counting occurrences of each subsystem group
dmop_data_resampled = dmop_data.resample('15T').subsystem_group.value_counts().unstack(fill_value=0)

# Ensure that all groups are present in the resampled dataframe
for group in all_groups:
    if group not in dmop_data_resampled.columns:
        dmop_data_resampled[group] = 0

# Rearrange columns to match the order of the group list
dmop_data_resampled = dmop_data_resampled[all_groups]

# Define the fixed top 5 pairs manually (as per your given data)
top_5_pairs = [('AMMM', 'ATTT'), ('APSF', 'ATTT'), ('AMMM', 'APSF'), ('AAAA', 'ATTT'), ('ASSS', 'ATTT')]

# Create a new DataFrame to store the counts of the top 5 pairs in each 15-minute interval
top_5_pairs_counts = pd.DataFrame(index=dmop_data_resampled.index)

# Iterate over the predefined top 5 pairs and calculate their occurrences in each time interval
for pair in top_5_pairs:
    # For each pair, check if both subsystems were active in the interval
    top_5_pairs_counts[f'{pair[0]} & {pair[1]}'] = dmop_data_resampled.apply(
        lambda row: 1 if row[pair[0]] > 0 and row[pair[1]] > 0 else 0, axis=1
    )

# Merge the resampled data and the top 5 pairs counts
dmop_data = pd.concat([dmop_data_resampled, top_5_pairs_counts], axis=1)

# Save the final merged data to a CSV file
print(dmop_data.head())



  dmop_data_resampled = dmop_data.resample('15T').subsystem_group.value_counts().unstack(fill_value=0)


                     AXXX  AAAA  ASEQ  ATTT  APSF  AMMM  MOCS  PENS  PENE  \
ut_ms                                                                       
2010-07-10 00:45:00     1     0     0     0     0     0     0     0     0   
2010-07-10 01:00:00     0     0     1     0     0     0     0     0     0   
2010-07-10 02:00:00     0     0     0     0     0     0     0     0     0   
2010-07-10 02:45:00     1     0     0     0     0     0     0     0     0   
2010-07-10 03:00:00     0     5     0     0     0     0     0     0     0   

                     MPER  ...  AACF  OBCP  ADMC  Trig  DISA  AMMM & ATTT  \
ut_ms                      ...                                              
2010-07-10 00:45:00     0  ...     0     0     0     0     0            0   
2010-07-10 01:00:00     0  ...     0     0     0     0     0            0   
2010-07-10 02:00:00     0  ...     0     0     0     0     0            0   
2010-07-10 02:45:00     0  ...     0     0     0     0     0            0  

In [16]:
dmop_data

Unnamed: 0_level_0,AXXX,AAAA,ASEQ,ATTT,APSF,AMMM,MOCS,PENS,PENE,MPER,...,AACF,OBCP,ADMC,Trig,DISA,AMMM & ATTT,APSF & ATTT,AMMM & APSF,AAAA & ATTT,ASSS & ATTT
ut_ms,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-07-10 00:45:00,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2010-07-10 01:00:00,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2010-07-10 02:00:00,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2010-07-10 02:45:00,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2010-07-10 03:00:00,0,5,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2012-05-14 04:15:00,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2012-05-14 06:30:00,0,0,0,0,0,0,0,2,0,0,...,0,0,0,0,0,0,0,0,0,0
2012-05-14 06:45:00,0,0,0,0,0,0,0,0,2,0,...,0,0,0,0,0,0,0,0,0,0
2012-05-14 07:00:00,0,0,0,0,0,0,0,0,0,2,...,0,0,0,0,0,0,0,0,0,0


# Ftl

In [17]:
import os
import numpy as np
import pandas as pd

# Predefined list of types
point_types = ['EARTH', 'SLEW', 'ACROSS_TRACK', 'MAINTENANCE', 'NADIR', 'INERTIAL',
               'RADIO_SCIENCE', 'WARMUP', 'D1PVMC', 'SPECULAR', 'D4PNPO', 'D3POCM', 
               'D2PLND', 'D7PLTS', 'D8PLTP', 'D5PPHB', 'SPOT', 'D9PSPO']

# Load the FTL data
ftl_file_path = 'context--2008-08-22_2010-07-10--ftl.csv'  # Replace with your correct path
ftl_df = pd.read_csv(ftl_file_path)

# Convert 'utb_ms' from milliseconds to datetime format and rename the column to 'ut_ms'
ftl_df['ut_ms'] = pd.to_datetime(ftl_df['utb_ms'], unit='ms')
ftl_df.drop(columns=['ute_ms', 'utb_ms'], inplace=True)

# One-hot encoding function using the predefined list
def get_ohe(example, column_names):
    # Create a binary array indicating which column corresponds to the 'example' type
    return np.array([1 if col == example else 0 for col in column_names])

# Apply one-hot encoding to the 'type' column
print('One-hot encoding columns...')
ohe_point_type_cols = ['is_{}'.format(pt.lower()) for pt in point_types]
ftl_df[ohe_point_type_cols] = ftl_df['type'].apply(lambda x: pd.Series(get_ohe(x, point_types)))

# Drop the 'flagcomms' column as requested
ftl_df.drop(columns=['flagcomms','type'], inplace=True)

# Set the 'ut_ms' datetime column as the index
ftl_df.set_index('ut_ms', inplace=True)

# Select only numeric columns before resampling
numeric_columns = ftl_df.select_dtypes(include=[np.number])

# Resample the data to 15-minute intervals and sum the values
ftl_df_resampled = numeric_columns.resample('15T').sum().fillna(0.0)

# Display the processed data
print(ftl_df_resampled.head())




One-hot encoding columns...
                     is_earth  is_slew  is_across_track  is_maintenance  \
ut_ms                                                                     
2010-07-10 00:45:00         1        0                0               0   
2010-07-10 01:00:00         0        0                0               0   
2010-07-10 01:15:00         0        0                0               0   
2010-07-10 01:30:00         0        0                0               0   
2010-07-10 01:45:00         0        0                0               0   

                     is_nadir  is_inertial  is_radio_science  is_warmup  \
ut_ms                                                                     
2010-07-10 00:45:00         0            0                 0          0   
2010-07-10 01:00:00         0            0                 0          0   
2010-07-10 01:15:00         0            0                 0          0   
2010-07-10 01:30:00         0            0                 0          0

  ftl_df_resampled = numeric_columns.resample('15T').sum().fillna(0.0)


In [7]:
ftl_df_resampled

Unnamed: 0_level_0,is_earth,is_slew,is_across_track,is_maintenance,is_nadir,is_inertial,is_radio_science,is_warmup,is_d1pvmc,is_specular,is_d4pnpo,is_d3pocm,is_d2plnd,is_d7plts,is_d8pltp,is_d5pphb,is_spot,is_d9pspo
ut_ms,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2008-08-22 00:00:00,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2008-08-22 00:15:00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2008-08-22 00:30:00,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2008-08-22 00:45:00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2008-08-22 01:00:00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2010-07-09 22:30:00,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2010-07-09 22:45:00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2010-07-09 23:00:00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2010-07-09 23:15:00,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


# Evtf

In [18]:
import pandas as pd

# Load the EVTF CSV file
evtf_file_path = 'context--2008-08-22_2010-07-10--evtf.csv'  # Replace with your correct file path
evtf_data = pd.read_csv(evtf_file_path)

# Step 1: Convert ut_ms to a timestamp
evtf_data['ut_ms'] = pd.to_datetime(evtf_data['ut_ms'], unit='ms')

# Step 2: Extract relevant words from the description column
keywords = [
    'LOS', 'AOS', 'PHO_PENUMBRA_START', 'PHO_PENUMBRA_END', 'PHO_UMBRA_START', 'PHO_UMBRA_END',
    'MAR_PENUMBRA_START', 'MAR_PENUMBRA_END', 'MAR_UMBRA_START', 'MAR_UMBRA_END',
    'OCC_PHOBOS_START', 'OCC_PHOBOS_END', 'OCC_DEIMOS_START', 'OCC_DEIMOS_END',
    'DEI_PENUMBRA_START', 'DEI_PENUMBRA_END', 'ASCEND', 'DESCEND', 'PERICENTER', 'APOCENTER'
]

# Step 3: Create a column for each keyword and count occurrences in the description
for keyword in keywords:
    evtf_data[keyword] = evtf_data['description'].apply(lambda x: 1 if keyword in x else 0)

# Step 4: Set the timestamp as the index
evtf_data.set_index('ut_ms', inplace=True)

# Step 5: Align the timestamps to the nearest lower 15-minute mark (e.g., 00:00:00, 00:15:00, etc.)
evtf_data.index = evtf_data.index.floor('15T')

# Step 6: Resample the data into 15-minute intervals, summing the occurrences of each event
evtf_resampled = evtf_data[keywords].resample('15T').sum()

# Display the preprocessed and resampled data
print(evtf_resampled.head())


                     LOS  AOS  PHO_PENUMBRA_START  PHO_PENUMBRA_END  \
ut_ms                                                                 
2010-07-10 00:00:00    0    1                   0                 0   
2010-07-10 00:15:00    0    0                   0                 0   
2010-07-10 00:30:00    0    0                   0                 0   
2010-07-10 00:45:00    0    1                   0                 0   
2010-07-10 01:00:00    0    0                   0                 0   

                     PHO_UMBRA_START  PHO_UMBRA_END  MAR_PENUMBRA_START  \
ut_ms                                                                     
2010-07-10 00:00:00                0              0                   0   
2010-07-10 00:15:00                0              0                   0   
2010-07-10 00:30:00                0              0                   0   
2010-07-10 00:45:00                0              0                   0   
2010-07-10 01:00:00                0              0 

  evtf_data.index = evtf_data.index.floor('15T')
  evtf_resampled = evtf_data[keywords].resample('15T').sum()


# Ltdata

In [19]:
import pandas as pd

# Load the LTDATA CSV file
ltdata_file_path = 'context--2008-08-22_2010-07-10--ltdata.csv'  # Replace with your correct file path
ltdata_data = pd.read_csv(ltdata_file_path)

# Step 1: Convert ut_ms to a timestamp in datetime format
ltdata_data['ut_ms'] = pd.to_datetime(ltdata_data['ut_ms'], unit='ms')

# Step 2: Set the timestamp as the index and drop the original ut_ms column
ltdata_data.set_index('ut_ms', inplace=True)
# ltdata_data.drop(columns=['ut_ms'], inplace=True)

# Step 3: Resample the data into 15-minute intervals, forward filling missing values
ltdata_resampled = ltdata_data.resample('15T').ffill()

# Display the resampled data (optional)
print(ltdata_resampled.head())

# Save the processed data to a CSV file (optional)
ltdata_resampled.to_csv('ltdata_processed.csv')


  ltdata_resampled = ltdata_data.resample('15T').ffill()


                       sunmars_km  earthmars_km  sunmarsearthangle_deg  \
ut_ms                                                                    
2010-07-10 00:00:00  2.419318e+08  2.775835e+08              33.150188   
2010-07-10 00:15:00  2.419318e+08  2.775835e+08              33.150188   
2010-07-10 00:30:00  2.419318e+08  2.775835e+08              33.150188   
2010-07-10 00:45:00  2.419318e+08  2.775835e+08              33.150188   
2010-07-10 01:00:00  2.419318e+08  2.775835e+08              33.150188   

                     solarconstantmars  eclipseduration_min  \
ut_ms                                                         
2010-07-10 00:00:00         522.294605                  0.0   
2010-07-10 00:15:00         522.294605                  0.0   
2010-07-10 00:30:00         522.294605                  0.0   
2010-07-10 00:45:00         522.294605                  0.0   
2010-07-10 01:00:00         522.294605                  0.0   

                     occultationduratio

# Energy Received(feature)

In [20]:
# Calculate the energy_received feature using the formula
p_max = 200000000000000000  # given value of p_max

energy_rece = pd.DataFrame(index=saaf_data_resampled.index)

# For each angle (sa, sx, sy, sz) in the SAAF file, we calculate the energy_received
for angle in ['sa', 'sx', 'sy', 'sz']:
    energy_rece[f'energy_received_{angle}'] = (p_max * saaf_data_resampled[f'cos_{angle}']) / (ltdata_resampled['sunmars_km'] ** 2)

saaf_data_resampled.drop(columns=['cos_sa', 'cos_sx', 'cos_sy', 'cos_sz'], inplace=True)

# Display the resulting dataframe with the new feature
print(energy_rece.head())


                     energy_received_sa  energy_received_sx  \
ut_ms                                                         
2010-07-10 00:00:00            3.416959            3.012956   
2010-07-10 00:15:00            3.416959            3.012956   
2010-07-10 00:30:00            3.416959            3.012956   
2010-07-10 00:45:00            3.416959            3.012956   
2010-07-10 01:00:00            3.416959            3.013040   

                     energy_received_sy  energy_received_sz  
ut_ms                                                        
2010-07-10 00:00:00           -0.015208           -1.611811  
2010-07-10 00:15:00           -0.015208           -1.611811  
2010-07-10 00:30:00           -0.015208           -1.611811  
2010-07-10 00:45:00           -0.015208           -1.611583  
2010-07-10 01:00:00           -0.015208           -1.611285  


# Powerline

In [47]:
power_data = pd.read_csv('power--2008-08-22_2010-07-10.csv')

power_data['ut_ms'] = pd.to_datetime(power_data['ut_ms'], unit='ms')
power_data = power_data.set_index('ut_ms')

power_data = power_data.resample('15T').mean().interpolate()
power_data.head()

power_data.to_csv('Test_Power_Satellite.csv')

  power_data = power_data.resample('15T').mean().interpolate()


# Merge Data

In [22]:
# Merge all datasets on the timestamp
data = pd.merge(saaf_data_resampled, energy_rece, on='ut_ms', how='outer')
data = pd.merge(data, dmop_data, on='ut_ms', how='outer')
data = pd.merge(data, evtf_resampled, on='ut_ms', how='outer')
data = pd.merge(data, ftl_df_resampled, on='ut_ms', how='outer')
data = pd.merge(data, ltdata_resampled, on='ut_ms',  how='outer')

# Sort by timestamp
data = data.sort_values('ut_ms')

data = data.fillna(method='ffill').fillna(0)

data.to_csv('Preprocess_Feature_MEX.CSV')

  data = data.fillna(method='ffill').fillna(0)
