## Mutations and recombination using OLSLR

Do linear regression of SNV densities against recombination rates using ordinary least squares linear regression (OLSLR). This is undertaken for comparison purposes only.

In [2]:
import numpy as np
import pandas as pd
import os
import gzip, pickle
from time import time
import statsmodels.api as sm
import warnings


print(os.environ['CONDA_DEFAULT_ENV'])
print(np.__version__)

projdir = "/Users/helmutsimon/repos/ProbPolymorphism"        #path to repository
if not os.getcwd() == projdir:
    os.chdir(projdir)
from shared import recombination

base
1.16.3


The following cell calculates results using OLSLR by chromosome. This is used for the Supplementary Table in the manuscript.

In [7]:
chroms = np.arange(1, 23).astype(str).tolist()
mrates = [1.1045541764661985e-08, 1.2481509352581898e-08, 1.254443516411994e-08, 1.2609734521720365e-08,
              1.216379148788216e-08, 1.2228991967962778e-08, 1.2298304077726808e-08, 1.3325693328599174e-08,
              1.0711369887343474e-08, 1.238059175011868e-08, 1.2241940318060874e-08, 1.2117457093135447e-08,
              1.0174746106096945e-08, 1.0146311894484388e-08, 1.0516600482736078e-08, 1.2597261162425896e-08,
              1.1681529656302903e-08, 1.1855256275211491e-08, 1.214570124735936e-08, 1.1756514975959873e-08,
              8.965863348091259e-09, 9.024242643357694e-09]
path = "/Users/helmutsimon/Google Drive/Genetics/Neighbourhood Effects Project"
rows = list()
sex = 'sexav'
for i, chrom in enumerate(chroms):
    csv_filename = path + '/Recombination_data/recomb_table_all_sexes_ch' + str(chrom) + '.csv'
    data_table = pd.read_csv(csv_filename, sep=',', index_col=0)
    data_table = recombination.correct_missing_data(data_table, 'LOCF', 'sexav')
    print(data_table)
    variants_profiled = data_table.iloc[:, np.arange(5, 17)]
    variant_counts = variants_profiled.sum(axis=1)
    var_rates = variant_counts / 10000
    std_rates = data_table['stdrate_' + sex].values
    snv_dens = np.mean(var_rates)
    std_rates = sm.add_constant(std_rates)
    ols = sm.OLS(var_rates, std_rates)
    ols_result = ols.fit()
    intercept = ols_result.params[0]
    rfunc = lambda x: (snv_dens - x) / snv_dens
    recomb_rate = np.mean(std_rates) * 0.0116 / (100 * 1e4)
    mutation_rate = mrates[i]
    mutsper = (rfunc(intercept) * mutation_rate) / recomb_rate
    rows.append((chrom, np.mean(var_rates), ols_result.params[1], ols_result.params[0],\
                 ols_result.rsquared, mutsper))

Number of rows altered (no variants or adjacent) =  439
        chr        pos  seqbin_sexav  stdrate_sexav      pos38   C->T  C->A  \
0      chr1    6000669             1       0.797296    6018022   62.0   7.0   
1      chr1    6010669             1       2.160694    6028022   53.0  12.0   
2      chr1    6020669             1       1.562406    6038022   84.0   9.0   
3      chr1    6030669             1       0.000014    6048022   72.0  11.0   
4      chr1    6040669             1       3.466929    6058022   90.0  13.0   
5      chr1    6050669             1       5.865215    6068022   94.0  16.0   
6      chr1    6060669             1       5.968401    6078022  119.0  10.0   
7      chr1    6070669             1       5.319290    6088022  105.0  15.0   
8      chr1    6080669             1       3.590860    6098022   91.0  16.0   
9      chr1    6090669             1       2.610914    6108022   67.0  13.0   
10     chr1    6100669             1       0.000000    6118022   81.0  19.0

Number of rows altered (no variants or adjacent) =  342
        chr        pos  seqbin_sexav  stdrate_sexav      pos38  C->T  C->A  \
1      chr2    5094443             1       0.000027    5036859  70.0  13.0   
2      chr2    5104443             1       0.000018    5046859  42.0  12.0   
3      chr2    5114443             1       0.000018    5056859  50.0  13.0   
4      chr2    5124443             1       0.000002    5066859  43.0   8.0   
5      chr2    5134443             1       0.000000    5076859  60.0  19.0   
6      chr2    5144443             1       0.261157    5086859  56.0  27.0   
7      chr2    5154443             1       2.056664    5096859  62.0  10.0   
8      chr2    5164443             1       0.000000    5106859  48.0  11.0   
9      chr2    5174443             1       2.012934    5116859  49.0  15.0   
10     chr2    5184443             1       5.776606    5126859  48.0   9.0   
11     chr2    5194443             1       6.707109    5136859  78.0  14.0   
12     c

Number of rows altered (no variants or adjacent) =  122
        chr        pos  seqbin_sexav  stdrate_sexav      pos38  C->T  C->A  \
0      chr3    5046894             1      10.755282    5030209  76.0  17.0   
1      chr3    5056894             1       9.448957    5040209  75.0  12.0   
2      chr3    5066894             1       3.093787    5050209  70.0  15.0   
3      chr3    5076894             1       1.801007    5060209  77.0  17.0   
4      chr3    5086894             1      27.447894    5070209  69.0  20.0   
5      chr3    5096894             1       2.677556    5080209  91.0  21.0   
6      chr3    5106894             1       1.798722    5090209  65.0  10.0   
7      chr3    5116894             1       2.663115    5100209  60.0  18.0   
8      chr3    5126894             1       0.773205    5110209  88.0  15.0   
9      chr3    5136894             1       0.773205    5120209  85.0  17.0   
10     chr3    5146894             1       0.709156    5130209  68.0  11.0   
11     c

Number of rows altered (no variants or adjacent) =  149
        chr        pos  seqbin_sexav  stdrate_sexav      pos38   C->T  C->A  \
0      chr4    5068508             1       3.926023    5015880   64.0  16.0   
1      chr4    5078508             1       3.044575    5025880   77.0   9.0   
2      chr4    5088508             1       0.186635    5035880   66.0  10.0   
3      chr4    5098508             1       6.390760    5045880   55.0  10.0   
4      chr4    5108508             1      20.214660    5055880   75.0  15.0   
5      chr4    5118508             1       1.187931    5065880   58.0   8.0   
6      chr4    5128508             1       7.322015    5075880   62.0  14.0   
7      chr4    5138508             1       0.002567    5085880   50.0  13.0   
8      chr4    5148508             1       0.000000    5095880   68.0   9.0   
9      chr4    5158508             1       0.000000    5105880   46.0   6.0   
10     chr4    5168508             1      12.642522    5115880   56.0   9.0

Number of rows altered (no variants or adjacent) =  163
        chr        pos  seqbin_sexav  stdrate_sexav      pos38  C->T  C->A  \
0      chr5    5096139             1       0.653780    5043026  57.0  17.0   
1      chr5    5106139             1       1.149681    5053026  61.0  10.0   
2      chr5    5116139             1       0.579235    5063026  48.0  13.0   
3      chr5    5126139             1       0.000330    5073026  68.0  17.0   
4      chr5    5136139             1       0.000000    5083026  72.0  10.0   
5      chr5    5146139             1       0.000000    5093026  63.0  12.0   
6      chr5    5156139             1       0.000000    5103026  58.0  13.0   
7      chr5    5166139             1      16.930562    5113026  65.0  12.0   
8      chr5    5176139             1       0.000000    5123026  54.0  10.0   
9      chr5    5186139             1      17.210712    5133026  63.0   9.0   
10     chr5    5196139             1       7.846918    5143026  70.0   9.0   
11     c

Number of rows altered (no variants or adjacent) =  90
        chr        pos  seqbin_sexav  stdrate_sexav      pos38  C->T  C->A  \
0      chr6    5125522             1       0.000000    5180289  53.0  14.0   
1      chr6    5135522             1       0.000000    5190289  62.0   7.0   
2      chr6    5145522             1       0.000000    5200289  64.0   7.0   
3      chr6    5155522             1       0.000000    5210289  52.0   7.0   
4      chr6    5165522             1       0.000000    5220290  61.0   7.0   
5      chr6    5175522             1       0.000000    5230290  72.0  11.0   
6      chr6    5185522             1       0.000000    5240290  57.0  17.0   
7      chr6    5195522             1       0.000000    5250290  74.0  14.0   
8      chr6    5205522             1       0.000000    5260290  62.0  13.0   
9      chr6    5215522             1       0.000000    5270290  41.0  10.0   
10     chr6    5225522             1       0.000000    5280290  50.0  13.0   
11     ch

Number of rows altered (no variants or adjacent) =  357
        chr        pos  seqbin_sexav  stdrate_sexav      pos38   C->T  C->A  \
0      chr7    5154081             1       0.000000    5147924   64.0  12.0   
1      chr7    5164081             1       0.000079    5157924   77.0  11.0   
2      chr7    5174081             1       0.000187    5167924   74.0  11.0   
3      chr7    5184081             1       4.793148    5177924   72.0  14.0   
4      chr7    5194081             1       0.286785    5187924   89.0   7.0   
5      chr7    5204081             1       0.000000    5197924   65.0   7.0   
6      chr7    5214081             1       0.000000    5207924   84.0  19.0   
7      chr7    5224081             1       0.000000    5217924  102.0   7.0   
8      chr7    5234081             1       0.000000    5227924  110.0  12.0   
9      chr7    5244081             1       7.984839    5237924   88.0  23.0   
10     chr7    5254081             1       0.170400    5247924   90.0  13.0

Number of rows altered (no variants or adjacent) =  280
        chr        pos  seqbin_sexav  stdrate_sexav      pos38   C->T  C->A  \
0      chr8    5171818             1       0.000000    5326888   69.0  24.0   
1      chr8    5181818             1       0.000000    5336888   55.0  16.0   
2      chr8    5191818             1       0.000000    5346888   66.0  27.0   
3      chr8    5201818             1       1.070388    5356888   89.0  26.0   
4      chr8    5211818             1       0.000000    5366888   81.0  26.0   
5      chr8    5221818             1       0.000000    5376888   65.0  22.0   
6      chr8    5231818             1       0.000000    5386888  108.0  24.0   
7      chr8    5241818             1       0.265880    5396888   74.0  28.0   
8      chr8    5251818             1       2.629691    5406888   53.0  20.0   
9      chr8    5261818             1       1.383527    5416888   61.0  20.0   
10     chr8    5271818             1       0.000000    5426888   70.0  24.0

Number of rows altered (no variants or adjacent) =  465
        chr        pos  seqbin_sexav  stdrate_sexav      pos38   C->T  C->A  \
0      chr9    5199201             1       0.449859    5209201   50.0  14.0   
1      chr9    5209201             1       0.000000    5219201   48.0  24.0   
2      chr9    5219201             1       0.000000    5229201   46.0  27.0   
3      chr9    5229201             1       0.000000    5239201   40.0  12.0   
4      chr9    5239201             1       0.000000    5249201   51.0  16.0   
5      chr9    5249201             1       0.000000    5259201   71.0  14.0   
6      chr9    5259201             1       3.239415    5269201   48.0  23.0   
7      chr9    5269201             1       4.689344    5279201   63.0  22.0   
8      chr9    5279201             1       0.323732    5289201   47.0  15.0   
9      chr9    5289201             1       1.664054    5299201   48.0  19.0   
10     chr9    5299201             1       4.014807    5309201   50.0  15.0

Number of rows altered (no variants or adjacent) =  250
         chr        pos  seqbin_sexav  stdrate_sexav      pos38  C->T  C->A  \
0      chr10    5130853             1       0.000000    5098661  75.0  18.0   
1      chr10    5140853             1       0.000000    5108661  90.0  28.0   
2      chr10    5150853             1       0.000000    5118661  61.0  22.0   
3      chr10    5160853             1       0.000000    5128661  58.0  17.0   
4      chr10    5170853             1       0.000000    5138661  70.0  27.0   
5      chr10    5180853             1       0.000000    5148662  55.0  18.0   
6      chr10    5190853             1       0.000000    5158655  43.0  11.0   
7      chr10    5200853             1       0.000000    5168890  25.0  10.0   
8      chr10    5210853             1       0.000000    5178890   8.0   0.0   
9      chr10    5220853             1       0.915510    5188890  33.0  14.0   
10     chr10    5230853             1       0.821688    5198890  40.0   4.0

Number of rows altered (no variants or adjacent) =  238
         chr        pos  seqbin_sexav  stdrate_sexav      pos38  C->T  C->A  \
0      chr11    5194256             1       8.674812    5216450  75.0  19.0   
1      chr11    5204256             1       8.674812    5226450  56.0  13.0   
2      chr11    5214256             1       2.803699    5236450  44.0  11.0   
3      chr11    5224256             1       0.000000    5246450  76.0  23.0   
4      chr11    5234256             1       0.177461    5256450  74.0  24.0   
5      chr11    5244256             1       0.533718    5266450  63.0  16.0   
6      chr11    5254256             1       0.285714    5276450  58.0  14.0   
7      chr11    5264256             1       0.060721    5286450  45.0  11.0   
8      chr11    5274256             1       2.166931    5296450  61.0  13.0   
9      chr11    5284256             1      10.075029    5306450  52.0   8.0   
10     chr11    5294256             1       0.000000    5316450  51.0  14.0

Number of rows altered (no variants or adjacent) =  411
         chr        pos  seqbin_sexav  stdrate_sexav      pos38  C->T  C->A  \
0      chr12    5069079             1       8.356125    5089652  76.0  12.0   
1      chr12    5079079             1       1.319354    5099652  59.0  13.0   
2      chr12    5089079             1       3.482438    5109652  52.0   9.0   
3      chr12    5099079             1       0.000000    5119652  38.0  11.0   
4      chr12    5109079             1       0.000000    5129652  50.0  14.0   
5      chr12    5119079             1       0.000000    5139652  65.0   9.0   
6      chr12    5129079             1       0.000000    5149652  57.0  12.0   
7      chr12    5139079             1       0.000000    5159652  60.0   7.0   
8      chr12    5149079             1       7.362189    5169652  75.0  19.0   
9      chr12    5159079             1       0.000000    5179652  52.0  10.0   
10     chr12    5169079             1       0.000000    5189652  56.0  16.0

        chr        pos  seqbin_sexav  stdrate_sexav      pos38  C->T  C->A  \
0     chr13   23143775             1       0.000000   23671636  55.0  13.0   
1     chr13   23153775             1       0.000000   23681636  97.0  12.0   
2     chr13   23163775             1       3.578962   23691636  67.0  22.0   
3     chr13   23173775             1       1.471687   23701636  86.0  16.0   
4     chr13   23183775             1       3.531287   23711636  68.0  19.0   
5     chr13   23193775             1       0.389501   23721636  67.0  12.0   
6     chr13   23203775             1       0.000000   23731636  54.0  11.0   
7     chr13   23213775             1       0.000000   23741636  52.0   7.0   
8     chr13   23223775             1       0.000000   23751636  60.0  16.0   
9     chr13   23233775             1       0.000000   23761636  61.0   8.0   
10    chr13   23243775             1       0.000000   23771636  53.0  18.0   
11    chr13   23253775             1       0.000000   23781636  

Number of rows altered (no variants or adjacent) =  189
        chr       pos  seqbin_sexav  stdrate_sexav     pos38  C->T  C->A  \
0     chr15  23426386             1       0.371557  25630146  51.0  15.0   
1     chr15  23436386             1       0.158811  25640146  66.0  16.0   
2     chr15  23446386             1       1.517456  25650146  57.0  11.0   
3     chr15  23456386             1       1.127358  25660146  51.0  14.0   
4     chr15  23466386             1      20.641971  25670146  75.0  11.0   
5     chr15  23476386             1       8.614193  25680146  71.0   9.0   
6     chr15  23486386             1       0.000000  25690146  89.0   9.0   
7     chr15  23496386             1       0.000000  25700146  59.0  15.0   
8     chr15  23506386             1       0.006882  25710146  75.0   9.0   
9     chr15  23516386             1       8.383866  25720146  88.0  15.0   
10    chr15  23526386             1       3.244950  25730146  91.0  17.0   
11    chr15  23536386           

Number of rows altered (no variants or adjacent) =  320
        chr       pos  seqbin_sexav  stdrate_sexav     pos38   C->T  C->A  \
0     chr16   5042354             1       0.782797   5052352   68.0  14.0   
1     chr16   5052354             1       2.035367   5062352  110.0  16.0   
2     chr16   5062354             1       2.874000   5072352   89.0  17.0   
3     chr16   5072354             1       2.874000   5082352  103.0   9.0   
4     chr16   5082354             1       2.874000   5092352  118.0  22.0   
5     chr16   5092354             1       2.874000   5102352   85.0  16.0   
6     chr16   5102354             1       2.874000   5112352   99.0  24.0   
7     chr16   5112354             1       2.874000   5122352  103.0  25.0   
8     chr16   5122354             1       2.564419   5132352   77.0  15.0   
9     chr16   5132354             1       1.606265   5142352  107.0  24.0   
10    chr16   5142354             1       1.606265   5152352   93.0  17.0   
11    chr16   515235

Number of rows altered (no variants or adjacent) =  224
        chr       pos  seqbin_sexav  stdrate_sexav     pos38   C->T  C->A  \
0     chr17   5058011             1       0.438115   5213992   77.0   9.0   
1     chr17   5068011             1       0.116363   5223992   79.0  14.0   
2     chr17   5078011             1       0.000000   5233992   97.0  10.0   
3     chr17   5088011             1       0.200393   5243992   56.0  10.0   
4     chr17   5098011             1       0.116581   5253992   73.0   7.0   
5     chr17   5108011             1       0.059993   5263992   83.0  15.0   
6     chr17   5118011             1       0.059993   5273992   66.0   6.0   
7     chr17   5128011             1       0.059993   5283992   55.0  12.0   
8     chr17   5138011             1       0.040873   5293992   78.0   6.0   
9     chr17   5148011             1       0.000000   5303992   45.0   6.0   
10    chr17   5158011             1       0.000000   5313992   74.0   5.0   
11    chr17   516801

Number of rows altered (no variants or adjacent) =  264
        chr       pos  seqbin_sexav  stdrate_sexav     pos38  C->T  C->A  \
0     chr19   5222034             1       9.044262   5271023  99.0   6.0   
1     chr19   5232034             1       0.000000   5281023  87.0  12.0   
2     chr19   5242034             1       1.598940   5291023  65.0  25.0   
3     chr19   5252034             1       1.848918   5301023  82.0  10.0   
4     chr19   5262034             1       0.794519   5311023  67.0  14.0   
5     chr19   5272034             1       0.037916   5321023  76.0  18.0   
6     chr19   5282034             1       6.314572   5331023  90.0  15.0   
7     chr19   5292034             1       3.023406   5341023  53.0  13.0   
8     chr19   5302034             1       0.000000   5351023  78.0  11.0   
9     chr19   5312034             1       2.284232   5361023  68.0  10.0   
10    chr19   5322034             1       3.347841   5371023  82.0  21.0   
11    chr19   5332034           

Number of rows altered (no variants or adjacent) =  63
        chr       pos  seqbin_sexav  stdrate_sexav     pos38  C->T  C->A  \
0     chr20   5016799             1       0.000000   5088153  76.0  13.0   
1     chr20   5026799             1       0.607111   5098153  70.0  10.0   
2     chr20   5036799             1       3.260531   5108153  55.0   5.0   
3     chr20   5046799             1       2.678647   5118153  52.0  10.0   
4     chr20   5056799             1       1.901941   5128153  58.0   7.0   
5     chr20   5066799             1       2.147762   5138153  57.0   3.0   
6     chr20   5076799             1       2.499381   5148153  53.0   9.0   
7     chr20   5086799             1       1.026286   5158153  50.0  10.0   
8     chr20   5096799             1       0.144836   5168153  79.0   9.0   
9     chr20   5106799             1       0.176774   5178153  71.0   8.0   
10    chr20   5116799             1       0.001471   5188153  57.0  16.0   
11    chr20   5126799            

        chr       pos  seqbin_sexav  stdrate_sexav     pos38   C->T  C->A  \
0     chr22  20417698             1       1.576316  21733409   54.0  14.0   
1     chr22  20427698             1       2.138331  21743409   84.0  11.0   
2     chr22  20437698             1       0.635161  21753409   82.0  16.0   
3     chr22  20447698             1       0.000000  21763409   70.0  17.0   
4     chr22  20457698             1       0.000000  21773409   78.0  10.0   
5     chr22  20467698             1       0.000000  21783409   69.0  15.0   
6     chr22  20477698             1       0.000000  21793409   60.0  16.0   
7     chr22  20487698             1       0.000000  21803409   68.0   9.0   
8     chr22  20497698             1       0.000000  21813409   59.0   7.0   
9     chr22  20507698             1       0.000000  21823409   37.0  12.0   
10    chr22  20517698             1       0.000000  21833409   53.0  15.0   
11    chr22  20527698             1       0.000000  21843409   64.0  11.0   

In [9]:
columns = ['chrom', 'SNV density', 'Slope', 'Intercept', 'Variance', 'Rate']
results_table = pd.DataFrame(rows, columns=columns)
results_table.set_index(['chrom'], inplace=True)
results_table.to_csv(path + '/Article_references/OLSLR_supplementary.csv', sep=',')
results_table

Unnamed: 0_level_0,SNV density,Slope,Intercept,Variance,Rate
chrom,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,0.025188,0.000289,0.024902,0.022531,0.010845
2,0.026017,0.000273,0.025779,0.016917,0.010528
3,0.026216,0.000211,0.026024,0.019305,0.008311
4,0.026745,0.000207,0.026558,0.014499,0.00798
5,0.026069,0.000199,0.025887,0.012299,0.007642
6,0.026345,0.000137,0.026225,0.005215,0.00511
7,0.026218,0.000288,0.025949,0.018673,0.011257
8,0.027383,0.000231,0.02717,0.011269,0.009315
9,0.025376,0.0005,0.024874,0.026225,0.018223
10,0.026308,0.000286,0.02602,0.018848,0.01163
