In [2]:
import pandas as pd

from utils.prepare_data import save_data

In [3]:
start, end = snakemake.config["sweep_region_start"], snakemake.config["sweep_region_end"]

In [4]:
maps = pd.read_table(snakemake.input[0], sep=" ").set_index('Physical_Pos')
# To convert from centiMorgan to # of recombination events:
maps = maps/100
region = maps.loc[maps.index.to_series().between(start, end)]

### Rates for the sweep region

In [6]:
rates = (region.iloc[-1] - region.iloc[0])/(end - start)
rates["start"] = start
rates["end"] = end
rates.to_csv(snakemake.output["recombination_at_sweep"], index=True, sep='\t', header=False)

### Rates across the chromosome

In [27]:
ws = 1_000_000
increment = 100_000
start = 1
end = ws

result = []

while end < maps.index.to_series().max():
    region = maps.loc[maps.index.to_series().between(start, end)]
    try:
        rates = (region.iloc[-1] - region.iloc[0])/(end - start)
        rates["start"] = start
        rates["end"] = end
        result.append(rates)
    except:
        pass
    start = start + ws
    end = end + ws
    
df_large = pd.DataFrame.from_records(result).assign(window_size=1000000)

In [28]:
ws = 10_000
increment = 1_000
start = 1
end = ws

result = []

while end < maps.index.to_series().max():
    region = maps.loc[maps.index.to_series().between(start, end)]
    try:
        rates = (region.iloc[-1] - region.iloc[0])/(end - start)
        rates["start"] = start
        rates["end"] = end
        result.append(rates)
    except:
        pass
    start = start + ws
    end = end + ws
    
df_fine = pd.DataFrame.from_records(result).assign(window_size=10000)

In [30]:
df = pd.concat([df_large, df_fine])

In [31]:
save_data(df, snakemake.output["chromosome_recombinations"])

### Recombination map for Relate

Format described in https://myersgroup.github.io/relate/input_data.html#.map

In [103]:
maps = pd.read_table(snakemake.input[0], sep=" ").set_index('Physical_Pos')

In [108]:
maps.columns

In [114]:
# What map to use?
centimorgans = maps.mean(axis='columns')
# centimorgans = maps['AA_Map']

In [115]:
cM_diff = pd.Series(
    centimorgans[1:].values - centimorgans[:-1].values,
    index=maps.index[:-1]
)

pos_diff = pd.Series(
    maps.index[1:].values - maps.index[:-1].values,
    index=maps.index[:-1]
)

rates = (cM_diff/pos_diff)*1e6

In [116]:
# Put columns in the right order, save map
relate_map = pd.DataFrame({
    'cM/Mb': rates,
    'cM': centimorgans[:-1]
})
relate_map.index.name = 'position'

In [117]:
relate_map

In [118]:
relate_map.to_csv(snakemake.output['relate'], sep=' ', index=True)