In [1]:
from pathlib import Path
import pandas as pd
import numpy as np

In [2]:
raw_data_dir = Path().resolve().parent / "_data" / "healthy" / "raw"
real_mouse_timepoints = {}
mouse_metadata = pd.read_csv(raw_data_dir / "metadata.tsv", sep='\t')
for mouse in ['2', '3', '4', '5']:
    mouse_times = []
    mouse_slice = mouse_metadata.loc[
        mouse_metadata['sampleID'].str.startswith(f'{mouse}-')
        & (mouse_metadata['time'] != 0.0)
        & (mouse_metadata['time'] != 0.5)
    ]
    real_mouse_timepoints[mouse] = np.sort(mouse_slice['time'])
real_mouse_timepoints

{'2': array([ 1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ,  4.5,  5. ,  6. ,  7. ,
         8. ,  9. , 10. , 11. , 14. , 16. , 18. , 21. , 21.5, 22. , 22.5,
        23. , 23.5, 24. , 25. , 28. , 28.5, 29. , 29.5, 30. , 30.5, 31. ,
        31.5, 32. , 33. , 35. , 35.5, 36. , 36.5, 37. , 37.5, 38. , 39. ,
        42. , 42.5, 43. , 43.5, 44. , 44.5, 45. , 45.5, 46. , 47. , 50. ,
        50.5, 51. , 51.5, 52. , 52.5, 53. , 54. , 57. , 57.5, 58. , 58.5,
        59. , 59.5, 60. , 60.5, 61. , 62. , 63. , 64. , 64.5]),
 '3': array([ 1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ,  4.5,  5. ,  6. ,  7. ,
         8. ,  9. , 10. , 11. , 14. , 16. , 18. , 21. , 21.5, 22. , 22.5,
        23. , 23.5, 24. , 25. , 28. , 29. , 29.5, 30. , 30.5, 31. , 31.5,
        32. , 33. , 35. , 35.5, 36. , 36.5, 37. , 38. , 39. , 42. , 42.5,
        43. , 43.5, 44. , 44.5, 45. , 45.5, 46. , 47. , 50. , 50.5, 51. ,
        51.5, 52. , 52.5, 53. , 54. , 57. , 57.5, 58. , 58.5, 59. , 59.5,
        60. , 60.5, 61. , 62. , 63. , 

In [3]:
output_dir = Path().resolve().parent / "_outputs"  # directory containing R output matrices.
target_dir = Path() / "mbtransfer"  # destination directory

In [5]:
subdirs = [
    ('log')
]

for (subdir) in subdirs:
    print(f'subdir: {subdir}')
    df_entries = []
    for mouse_idx, mouse_id in enumerate(['2', '3', '4', '5']):
        mouse_pred = pd.read_csv(output_dir / subdir / f"m{mouse_id}_forecast.csv", index_col=0)
        mouse_input = pd.read_csv(output_dir / subdir / f"m{mouse_id}_ground_truth.csv", index_col=0)
        for t_idx, t in enumerate(real_mouse_timepoints[mouse_id]):
            if np.abs(np.round(t, 0) - t) < 0.01:
                t_integer = int(np.round(t, 0))
            else:
                continue
            for taxon_idx, taxon in enumerate(mouse_pred.index):
                df_entries.append({
                    'HeldoutSubjectId': mouse_id,
                    'HeldoutSubjectIdx': mouse_idx,
                    'Method': 'mbtransfer',
                    'TaxonIdx': taxon_idx,
                    'TimePoint': t_idx,
                    'Truth': mouse_input.loc[taxon, f'm{mouse_id}_T{t_integer}'],
                    'Pred': mouse_pred.loc[taxon, f'm{mouse_id}_T{t_integer}']
                })
                
    heldout_rel_df = pd.DataFrame(df_entries)
    del df_entries
    (target_dir / subdir).mkdir(exist_ok=True, parents=True)
    heldout_rel_df.sort_values(['HeldoutSubjectId', 'TaxonIdx']).to_csv(target_dir / subdir / "relative_cv.tsv", sep='\t', index=False)

subdir: log
