In [1]:
# Standard imports
import pandas as pd
import numpy as np
import time

# For multiindex slicing
idx = pd.IndexSlice

In [2]:
# Load counts df
ct_df = pd.read_csv('csv_results/oh_counts_table.csv', index_col=[0,1], usecols=range(1,9))
ct_df = ct_df.reorder_levels([1,0])

# Get rid of NpN entries
ix = ct_df.index.drop('NpN', level=1) 
ct_df = ct_df.loc[ix,:]

# Regularize counts
ct_df = ct_df.where(ct_df>0,0.0)+1
ct_df['total'] = ct_df.sum(axis=1)
ct_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,4,5,6,7,8,9,total
replicate,primer,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
rep1,ApA,1.153971,271.469849,3207.209324,2703.402852,1097.586942,556.130115,7836.953055
rep2,ApA,55.45368,246.168443,4052.189959,4813.025962,3522.058243,646.429137,13335.325424
rep3,ApA,41.758572,205.025771,3232.493927,2683.40106,1356.395298,28.347419,7547.422046
rep1,ApC,18.88195,159.577889,3050.709273,5012.971234,130.377103,1.0,8373.51745
rep2,ApC,38.747765,300.890478,3749.77525,7071.63493,2677.158825,197.929395,14036.136644


In [3]:
# Compute percentages for 
pct_df = ct_df.copy()
pct_df.loc[:,:] = 0

# Compute percent for each replicate
for rep in ['rep1', 'rep2', 'rep3']:
    rep_sum = ct_df.loc[idx[rep,:],:].sum(axis=0)
    N = rep_sum.sum()
    pct_df.loc[idx[rep,:],:] = 100*ct_df.loc[idx[rep,:],:]/N
pct_df['total'] = pct_df.sum(axis=1)

# Save
file_name = 'csv_results/oh_pct_by_rep.csv'
pct_df.to_csv(file_name)
print(f'Wrote {file_name}')

# Show
pct_df.loc[idx['rep2',:],:]

Wrote csv_results/oh_pct_by_rep.csv


Unnamed: 0_level_0,Unnamed: 1_level_0,4,5,6,7,8,9,total
replicate,primer,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
rep2,ApA,0.005944,0.026387,0.434361,0.515916,0.377535,0.069292,2.858869
rep2,ApC,0.004153,0.032253,0.401944,0.75802,0.286969,0.021216,3.009112
rep2,ApG,0.001582,0.022282,0.696035,1.149085,0.635407,0.165566,5.339915
rep2,ApU,0.003422,0.030519,0.255056,0.330341,0.282238,0.090531,1.984214
rep2,CpA,0.000807,0.028474,0.396422,0.467051,0.153149,0.000107,2.092021
rep2,CpC,0.000886,0.020373,0.525909,0.755182,0.114552,0.000107,2.834018
rep2,CpG,0.000837,0.027037,0.350925,0.457366,0.077637,0.009568,1.846742
rep2,CpU,0.003502,0.030714,0.509172,0.508497,0.145262,0.006628,2.407551
rep2,GpA,0.005612,0.025486,1.164661,1.340979,0.785299,0.142648,6.929373
rep2,GpC,0.002172,0.008267,0.856043,1.377772,0.720098,0.035305,5.999314


In [4]:
# Compute and save mean counts
mean_pct_df = pct_df.groupby(level=1).mean()

# Save
file_name = 'csv_results/oh_pct_mean.csv'
mean_pct_df.to_csv(file_name)
print(f'Wrote {file_name}')

# Show
mean_pct_df

Wrote csv_results/oh_pct_mean.csv


Unnamed: 0_level_0,4,5,6,7,8,9,total
primer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ApA,0.003884,0.028645,0.415321,0.398172,0.229551,0.046346,2.243837
ApC,0.002281,0.026573,0.387288,0.647415,0.133066,0.007156,2.40756
ApG,0.002715,0.023282,0.685212,1.035211,0.603771,0.14979,4.999961
ApU,0.003881,0.024835,0.232888,0.209582,0.14514,0.067785,1.368222
CpA,0.001293,0.028121,0.411853,0.486201,0.139861,0.00012,2.134898
CpC,0.001422,0.021094,0.539921,0.77929,0.122883,0.00012,2.92946
CpG,0.001028,0.030974,0.363309,0.467835,0.079011,0.008081,1.900476
CpU,0.00371,0.032454,0.513388,0.483922,0.127227,0.002461,2.326323
GpA,0.004935,0.023797,1.190992,1.243117,0.636192,0.100547,6.399163
GpC,0.001536,0.010175,0.932157,1.448766,0.600877,0.019446,6.025912


In [5]:
# Compute and save mean counts
std_pct_df = pct_df.groupby(level=1).std()

# Save
file_name = 'csv_results/oh_pct_std.csv'
std_pct_df.to_csv(file_name)
print(f'Wrote {file_name}')

# Show
std_pct_df

Wrote csv_results/oh_pct_std.csv


Unnamed: 0_level_0,4,5,6,7,8,9,total
primer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ApA,0.00325,0.003116,0.030261,0.103658,0.130644,0.0369,0.538191
ApC,0.001852,0.006876,0.022104,0.095819,0.139345,0.012177,0.534672
ApG,0.000987,0.001982,0.02697,0.111219,0.042747,0.01774,0.324797
ApU,0.002527,0.006027,0.022722,0.105104,0.122561,0.036129,0.539148
CpA,0.000424,0.001604,0.034442,0.034043,0.012172,1.3e-05,0.123966
CpC,0.000826,0.001975,0.033725,0.04559,0.021503,1.3e-05,0.13648
CpG,0.000243,0.003728,0.027301,0.026456,0.008754,0.001689,0.090595
CpU,0.001088,0.001634,0.036015,0.023624,0.015633,0.003618,0.119371
GpA,0.001025,0.005025,0.070455,0.136177,0.132756,0.038335,0.48535
GpC,0.000586,0.001828,0.073668,0.091786,0.113762,0.017838,0.188261
