In [None]:
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
df = pd.read_parquet('../../output/database_creation/database.parquet')

In [None]:
# Show the number of measurement regimes per plate number as bar plot

df.groupby('plate')['light_regime'].nunique().plot(kind='bar', ylabel='Number of light regimes', title='Number of light regimes per plate')

In [None]:
# Print out the total number of mutant_ID values

print(f'Total number of mutant_ID values: {df["mutant_ID"].nunique()}')

In [None]:
# Plot every single y2 time series - columns y2_0 to y2_81

df_y2 = df.filter(regex='y2_\d+')

print(f'There are {len(df_y2)} rows')
print(f'Number of rows without Nans: {len(df_y2.dropna(how="all"))}')

light_regimes = df['light_regime'].unique()

assert len(light_regimes) == 6

fig, ax = plt.subplots(2, 3, figsize=(15, 10))

for i, light_regime in enumerate(light_regimes):
    ax[i//3, i%3].set_title(f'Light regime: {light_regime}')
    ax[i//3, i%3].set_xticks([])
    
    for row in df_y2[df['light_regime'] == light_regime].dropna(how='all').iterrows():
        ax[i//3, i%3].plot(row[1], color='black', alpha=0.1)
        
    ax[i//3, i%3].set_title(f'Light regime: {light_regime}')
    
plt.show()



In [None]:
# Write out the number of rows where all y2 values are NaN for each plate - light regime combination

df_y2['plate id'] = df['plate'].astype(str) + ' - ' + df['light_regime']

df_y2['y2_all_nan'] = df_y2.iloc[:, :81].isna().all(axis=1)

df_y2.groupby('plate id')['y2_all_nan'].sum().plot(kind='bar', ylabel='Number of rows with all NaN y2 values', title='Number of rows with all NaN y2 values per plate - light regime combination', figsize=(15, 5))
