In [49]:
# Show the Accuracy of the Project-3 predictions

import numpy as np
import pandas as pd

# Read the true eclipse labels for the testing data
true_labels_df = pd.read_csv("Eclipse_Pred\True_Eclipse_Test_Label.csv", index_col="Catalog Number")

# Read the predicted eclipse types from Project3_pred.csv
predicted_labels_df = pd.read_csv("Project3_pred.csv", index_col="Catalog Number")

# Ensure that both DataFrames have the same index
true_labels_df.index = true_labels_df.index.astype(str)
predicted_labels_df.index = predicted_labels_df.index.astype(str)

# Merge the DataFrames on the index (Catalog Number)
merged_df = pd.merge(true_labels_df, predicted_labels_df, left_index=True, right_index=True)

# Calculate test accuracy by comparing predicted and true labels
test_accuracy = np.mean(merged_df["Eclipse Type"] == merged_df["Pred_Eclipse_Type"]) * 100
print("Test Accuracy based on True Labels: {:.2f}%".format(test_accuracy))


Test Accuracy based on True Labels: 74.40%


In [52]:
# Show the accuracy of Project-3 Mapped

import numpy as np
import pandas as pd

# Read the true eclipse labels for the testing data
true_labels_df = pd.read_csv("Eclipse_Pred\True_Mapped_Eclipse_Test_Label.csv", index_col="Catalog Number")

# Read the predicted eclipse types from Project3_pred.csv
predicted_labels_df = pd.read_csv("Project3_pred.csv", index_col="Catalog Number")

# Ensure that both DataFrames have the same index
true_labels_df.index = true_labels_df.index.astype(str)
predicted_labels_df.index = predicted_labels_df.index.astype(str)

# Merge the DataFrames on the index (Catalog Number)
merged_df = pd.merge(true_labels_df, predicted_labels_df, left_index=True, right_index=True)

# Calculate test accuracy by comparing predicted and true labels
test_accuracy = np.mean(merged_df["Eclipse Type"] == merged_df["Pred_Eclipse_Type"]) * 100
print("Test Accuracy based on True Labels: {:.2f}%".format(test_accuracy))


Test Accuracy based on True Labels: 67.14%


In [50]:
# Show the divergence of Project-3 predictions

import numpy as np
import pandas as pd

# Kullback-Leibler (KL) divergence between the predicted probability and the observed target
def KL_div(P, Q):
    if len(P) != len(Q):
        return 100
    else:
        p = np.array(P) + 0.000001   # adding an epsilon to avoid log(0) 
        q = np.array(Q) + 0.000001   # adding an epsilon to avoid log(0)    
        KL = np.dot(p, np.log(p) - np.log(q)) / p.sum()
    return KL

# Read the true eclipse labels for the testing data
true_labels_df = pd.read_csv("Eclipse_Pred\True_Eclipse_Test_Label.csv", index_col="Catalog Number")

# Read the predicted eclipse types from Project3_pred.csv
predicted_labels_df = pd.read_csv("Project3_pred.csv", index_col="Catalog Number")

# Ensure that both DataFrames have the same index
true_labels_df.index = true_labels_df.index.astype(str)
predicted_labels_df.index = predicted_labels_df.index.astype(str)

# Merge the DataFrames on the index (Catalog Number)
merged_df = pd.merge(true_labels_df, predicted_labels_df, left_index=True, right_index=True)

# Calculate the KL divergence score
KL_div_score = KL_div(merged_df["Eclipse Type"], merged_df["Pred_Eclipse_Type"])

print("KL Divergence Score: ", KL_div_score)


KL Divergence Score:  0.07891437071695853


In [51]:
# Show the divergence of project-3 mapped

import numpy as np
import pandas as pd

# Kullback-Leibler (KL) divergence between the predicted probability and the observed target
def KL_div(P, Q):
    if len(P) != len(Q):
        return 100
    else:
        p = np.array(P) + 0.000001   # adding an epsilon to avoid log(0) 
        q = np.array(Q) + 0.000001   # adding an epsilon to avoid log(0)    
        KL = np.dot(p, np.log(p) - np.log(q)) / p.sum()
    return KL

# Read the true eclipse labels for the testing data
true_labels_df = pd.read_csv("Eclipse_Pred\True_Mapped_Eclipse_Test_Label.csv", index_col="Catalog Number")

# Read the predicted eclipse types from Project3_pred.csv
predicted_labels_df = pd.read_csv("Project3_pred.csv", index_col="Catalog Number")

# Ensure that both DataFrames have the same index
true_labels_df.index = true_labels_df.index.astype(str)
predicted_labels_df.index = predicted_labels_df.index.astype(str)

# Merge the DataFrames on the index (Catalog Number)
merged_df = pd.merge(true_labels_df, predicted_labels_df, left_index=True, right_index=True)

# Calculate the KL divergence score
KL_div_score = KL_div(merged_df["Eclipse Type"], merged_df["Pred_Eclipse_Type"])

print("KL Divergence Score: ", KL_div_score)


KL Divergence Score:  0.33639244728777884


In [None]:
# Adjust the file True_Eclipse_Test_Label.csv to remove Nan Categories

import pandas as pd

# Read in the Eclipse_Test.csv file
eclipse_test_df = pd.read_csv("Eclipse_Pred\Eclipse_Test.csv")

# Drop rows with missing values in the 'Central Duration Seconds' column
eclipse_test_df.dropna(subset=['Central Duration Seconds'], inplace=True)

# Keep only the 'Catalog Number' and 'Eclipse Type' columns
eclipse_test_df = eclipse_test_df[['Catalog Number', 'Eclipse Type']]

# Write the modified DataFrame to the True_Eclipse_Test_Label.csv file
eclipse_test_df.to_csv("Eclipse_Pred\True_Eclipse_Test_Label.csv", index=False)

# Print the size of the modified file
print("Size of the file:", len(eclipse_test_df))

In [48]:
import pandas as pd

# Read in the Eclipse_Test.csv file
eclipse_test_df = pd.read_csv("Eclipse_Pred\Eclipse_Test.csv")

# Drop rows with missing values in the 'Central Duration Seconds' column
eclipse_test_df.dropna(subset=['Central Duration Seconds'], inplace=True)

# Replace Eclipse types based on the specified mappings
eclipse_test_df['Eclipse Type'] = eclipse_test_df['Eclipse Type'].replace({
    0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0,     # Map 0-5 to 0
    6: 6, 7: 6, 8: 6, 9: 6, 10: 6, 11: 6,   # Map 6-11 to 6
    12: 12, 13: 12, 14: 12,                 # Map 12-14 to 12
    15: 15, 16: 15, 17: 15, 18: 15          # Map 15-18 to 15
})

# Keep only the 'Catalog Number' and 'Eclipse Type' columns
eclipse_test_df = eclipse_test_df[['Catalog Number', 'Eclipse Type']]

# Write the modified DataFrame to the True_Eclipse_Test_Label.csv file
eclipse_test_df.to_csv("Eclipse_Pred\True_Mapped_Eclipse_Test_Label.csv", index=False)

# Print the size of the modified file
print("Size of the file:", len(eclipse_test_df))

Size of the file: 1488
