In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# read the SIRIUS output file into a pandas dataframe
df = pd.read_table('Data/cap_manu/input/canopus_compound_summary_40_varieties.tsv', sep='\t')
df

In [None]:
# group the dataframe by NPC#class and count the number of occurrences of each class
class_counts = df.groupby("NPC#class")["featureId"].count().reset_index()

# sort the class counts in descending order
class_counts = class_counts.sort_values("featureId", ascending=False)

# print the class counts to the console
print(class_counts)

class_counts.to_excel("Data/cap_manu/output/allchilis_reanal_classs_counts.xlsx", index=False)

In [None]:
# Count the total occurrences of each unique entry in the "NPC#class" column
class_counts = df["NPC#class"].value_counts().reset_index()

# Rename columns for clarity
class_counts.columns = ["NPC#class", "Total"]

total_count = class_counts['Total'].sum()

class_counts['Percentage'] = (class_counts['Total']/total_count) *100

# Print the class counts to the console
print(class_counts)

In [None]:
# Selecting the top 5 NPC classes by percentage
top_5_classes = class_counts.nlargest(5, 'Percentage')

# Plotting
plt.figure(figsize=(10, 8))
plt.bar(top_5_classes['NPC#class'], top_5_classes['Percentage'], color='skyblue')

# Adding labels and title
plt.xlabel('NPC Class')
plt.ylabel('Percentage')
plt.title('Top 5 NPC Classes by Percentage')
plt.xticks(rotation=40 )  # Rotate x-axis labels for better readability

# Adding legend
plt.legend(['Percentage'])

# Show plot
plt.tight_layout()
plt.show()