In [None]:
import pandas as pd

# Load the TSV file obtained from exporting the submission
file_path = 'submissions.tsv'
df = pd.read_csv(file_path, delimiter="\t")

# Display the first few rows of the data
df.head()

In [None]:
# Convert Submitted column to datetime
df['Submitted'] = pd.to_datetime(df['Submitted'], errors='coerce').dt.to_period('M')
sample_count = df.sort_values(['Type','Submitted']).groupby(['Type', 'Submitted'], sort=False)[['samples','libraries']].sum()

# Get combined samples and library numbers for each type
sample_count['all'] = sample_count['samples'] + sample_count['libraries']
sample_count.to_csv('samplecount.csv')

In [None]:
# Plot samples monthly (remove "Type" from multiindex with unstack), with each type as a subplot
plot = sample_count.unstack(level=0).plot(kind='bar', subplots=True, y='all', figsize=(15, 100), layout=(-1, 1))

In [None]:
# Plot sample count for each submission type as individual plot
for t in sample_count.index.get_level_values('Type').unique():
    # Get a temporary dataframe for all monthly values aggregated for this type
    temp_df = sample_count.loc[[t]].reset_index(level=0, drop=True)
    # Resasmple submitted index per month, filling in values of 0 where empty
    temp_df = temp_df.resample('M').asfreq(fill_value=0)
    # Plot
    plot = temp_df.plot(kind='bar', y='all', title=t, figsize=(15, 10))