### Table 1 and Figure 3 in CNV in schizophrenia in Sweden

In [1]:
import pandas as pd
import numpy as np
from pprint import pprint
from collections import Counter
from pandasql import sqldf
from scipy import stats
import statsmodels.api as sm

#### Table 1

In [2]:
sw_pheno = pd.read_table("../data/swcnv/swcnv.pheno", sep="\s+")
sw_sum = pd.read_table("../data/swcnv/swcnv.qc6.cnv.summary", sep="\s+") # "CHR"
sw_fam = pd.read_table("../data/swcnv/swcnv.qc6.fam", sep="\s+", header = None, 
                       usecols = (0,1,4,5), names = ["FID", "IID", "sex", "Case-Ctrl"]) # sex and case-ctrl
sw_cnv = pd.read_table("../data/swcnv/swcnv.qc6.cnv", sep="\s+")
sw_cluster = pd.read_table("../data/swcnv/swcnv.clusters", sep="\s+") # Sw1, Sw2-4, Sw5-6
sw_cluster = sw_cluster.rename(columns={"uFID": "FID", "uIID": "IID"})
sw_indiv = pd.read_table("../data/swcnv/swcnv.qc6.cnv.indiv", sep="\s+") # "NSEG": number of CNV in each sample
sw_cluster_indiv = pd.merge(sw_cluster, sw_indiv, how="inner", on=["FID", "IID"])
sw_cluster_cnv = pd.merge(sw_cluster, sw_cnv, how="inner", on=["FID", "IID"])
# One line for each individual with at least one CNV
sw_cluster_cnv_indiv = pd.merge(sw_cluster_cnv, sw_indiv, how="inner", on=["FID", "IID"])
sw_cluster_cnv_indiv["CNV_LEN"] = (sw_cluster_cnv_indiv["BP2"] - sw_cluster_cnv_indiv["BP1"])/1000

In [3]:
# Number of Cases and Controls, 2: cases, 1: controls
sw_fam.groupby("Case-Ctrl").size()

Case-Ctrl
1    5917
2    4719
dtype: int64

In [4]:
# sex by status
sex_by_casectrl = sw_fam.groupby(["Case-Ctrl", "sex"]).size()
print (sex_by_casectrl)

Case-Ctrl  sex
1          1      3034
           2      2883
2          1      2826
           2      1893
dtype: int64


In [5]:
prp_male_case = sex_by_casectrl[2][1] / (sex_by_casectrl[2][1] + sex_by_casectrl[2][2])
print (prp_male_case)
prp_male_ctrl = sex_by_casectrl[1][1] / (sex_by_casectrl[1][1] + sex_by_casectrl[1][2])
print (prp_male_ctrl)

0.598855689765
0.512759844516


In [6]:
print (sw_cluster_indiv.groupby(["tr", "PHE"]).size())

tr            PHE
scz_s234_eur  1      2137
              2      1847
scz_swe1_eur  1       206
              2       207
scz_swe5_eur  1      2456
              2      1674
scz_swe6_eur  1      1118
              2       991
dtype: int64


In [7]:
# "NSEG": number of CNV in each subject
mean_cnv_case_sw1 = np.mean(sw_cluster_indiv["NSEG"][sw_cluster_indiv["tr"]=="scz_swe1_eur"]
                            [sw_cluster_indiv["PHE"]==2].tolist())
mean_cnv_ctrl_sw1 = np.mean(sw_cluster_indiv["NSEG"][sw_cluster_indiv["tr"]=="scz_swe1_eur"]
                            [sw_cluster_indiv["PHE"]==1].tolist())
print (mean_cnv_case_sw1, mean_cnv_ctrl_sw1)
mean_cnv_case_sw234 = np.mean(sw_cluster_indiv["NSEG"][sw_cluster_indiv["tr"]=="scz_s234_eur"]
                              [sw_cluster_indiv["PHE"]==2].tolist())
mean_cnv_ctrl_sw234 = np.mean(sw_cluster_indiv["NSEG"][sw_cluster_indiv["tr"]=="scz_s234_eur"]
                              [sw_cluster_indiv["PHE"]==1].tolist())
print (mean_cnv_case_sw234, mean_cnv_ctrl_sw234)
mean_cnv_case_sw56 = np.mean(sw_cluster_indiv["NSEG"][sw_cluster_indiv["tr"]=="scz_swe5_eur"]
                            [sw_cluster_indiv["PHE"]==2].tolist()
                            + sw_cluster_indiv["NSEG"][sw_cluster_indiv["tr"]=="scz_swe6_eur"]
                            [sw_cluster_indiv["PHE"]==2].tolist())
mean_cnv_ctrl_sw56 = np.mean(sw_cluster_indiv["NSEG"][sw_cluster_indiv["tr"]==("scz_swe5_eur")]
                             [sw_cluster_indiv["PHE"]==1].tolist()
                            + sw_cluster_indiv["NSEG"][sw_cluster_indiv["tr"]=="scz_swe6_eur"]
                            [sw_cluster_indiv["PHE"]==1].tolist())
print (mean_cnv_case_sw56, mean_cnv_ctrl_sw56)
mean_cnv_case = np.mean(sw_cluster_indiv["NSEG"][sw_cluster_indiv["PHE"]==2].tolist())
mean_cnv_ctrl = np.mean(sw_cluster_indiv["NSEG"][sw_cluster_indiv["PHE"]==1].tolist())
print (mean_cnv_case, mean_cnv_ctrl)

1.05797101449 0.844660194175
1.2360584732 1.20870379036
0.758348968105 0.683547845551
0.958465776648 0.878823728241


In [8]:
prp_del_case = sw_cluster_cnv_indiv["TYPE"][sw_cluster_cnv_indiv["PHE"]==2].tolist()
prp_del_ctrl = sw_cluster_cnv_indiv["TYPE"][sw_cluster_cnv_indiv["PHE"]==1].tolist()
# 1 is deletion, 3 is duplication
print (prp_del_case.count(1)/len(prp_del_case))
print (prp_del_ctrl.count(1)/len(prp_del_ctrl))

0.38182622153437984
0.38211538461538463


In [9]:
prp_del_case_sw1 = sw_cluster_cnv_indiv["TYPE"][sw_cluster_cnv_indiv["PHE"]==2][sw_cluster_cnv_indiv["tr"]=="scz_swe1_eur"].tolist()
prp_del_ctrl_sw1 = sw_cluster_cnv_indiv["TYPE"][sw_cluster_cnv_indiv["PHE"]==1][sw_cluster_cnv_indiv["tr"]=="scz_swe1_eur"].tolist()
print (prp_del_case_sw1.count(1)/len(prp_del_case_sw1))
print (prp_del_ctrl_sw1.count(1)/len(prp_del_ctrl_sw1))

0.4794520547945205
0.39080459770114945


In [10]:
prp_del_case_sw234 = sw_cluster_cnv_indiv["TYPE"][sw_cluster_cnv_indiv["PHE"]==2][sw_cluster_cnv_indiv["tr"]=="scz_s234_eur"].tolist()
prp_del_ctrl_sw234 = sw_cluster_cnv_indiv["TYPE"][sw_cluster_cnv_indiv["PHE"]==1][sw_cluster_cnv_indiv["tr"]=="scz_s234_eur"].tolist()
print (prp_del_case_sw234.count(1)/len(prp_del_case_sw234))
print (prp_del_ctrl_sw234.count(1)/len(prp_del_ctrl_sw234))

0.3587385019710907
0.36624080526519553


In [11]:
prp_del_case_sw56 = sw_cluster_cnv_indiv["TYPE"][sw_cluster_cnv_indiv["PHE"]==2][sw_cluster_cnv_indiv["tr"]=="scz_swe5_eur"].tolist() + sw_cluster_cnv_indiv["TYPE"][sw_cluster_cnv_indiv["PHE"]==2][sw_cluster_cnv_indiv["tr"]=="scz_swe6_eur"].tolist()
prp_del_ctrl_sw56 = sw_cluster_cnv_indiv["TYPE"][sw_cluster_cnv_indiv["PHE"]==1][sw_cluster_cnv_indiv["tr"]=="scz_swe5_eur"].tolist() + sw_cluster_cnv_indiv["TYPE"][sw_cluster_cnv_indiv["PHE"]==1][sw_cluster_cnv_indiv["tr"]=="scz_swe6_eur"].tolist()
print (prp_del_case_sw56.count(1)/len(prp_del_case_sw56))
print (prp_del_ctrl_sw56.count(1)/len(prp_del_ctrl_sw56))

0.39732805541810984
0.3982808022922636


In [12]:
median_cnv_len_case = np.median(sw_cluster_cnv_indiv["CNV_LEN"][sw_cluster_cnv_indiv["PHE"]==2].tolist())
median_cnv_len_ctrl = np.median(sw_cluster_cnv_indiv["CNV_LEN"][sw_cluster_cnv_indiv["PHE"]==1].tolist())
print (median_cnv_len_case)
print (median_cnv_len_ctrl)

188.797
182.711


In [13]:
median_cnv_len_case_sw1 = np.median(sw_cluster_cnv_indiv["CNV_LEN"][sw_cluster_cnv_indiv["PHE"]==2][sw_cluster_cnv_indiv["tr"]=="scz_swe1_eur"].tolist())
median_cnv_len_ctrl_sw1 = np.median(sw_cluster_cnv_indiv["CNV_LEN"][sw_cluster_cnv_indiv["PHE"]==1][sw_cluster_cnv_indiv["tr"]=="scz_swe1_eur"].tolist())
print (median_cnv_len_case_sw1)
print (median_cnv_len_ctrl_sw1)

178.016
181.313


In [14]:
median_cnv_len_case_sw234 = np.median(sw_cluster_cnv_indiv["CNV_LEN"][sw_cluster_cnv_indiv["PHE"]==2][sw_cluster_cnv_indiv["tr"]=="scz_s234_eur"].tolist())
median_cnv_len_ctrl_sw234 = np.median(sw_cluster_cnv_indiv["CNV_LEN"][sw_cluster_cnv_indiv["PHE"]==1][sw_cluster_cnv_indiv["tr"]=="scz_s234_eur"].tolist())
print (median_cnv_len_case_sw234)
print (median_cnv_len_ctrl_sw234)

188.213
182.211


In [15]:
median_cnv_len_case_sw56 = np.median(sw_cluster_cnv_indiv["CNV_LEN"][sw_cluster_cnv_indiv["PHE"]==2][sw_cluster_cnv_indiv["tr"]=="scz_swe5_eur"].tolist() + sw_cluster_cnv_indiv["CNV_LEN"][sw_cluster_cnv_indiv["PHE"]==2][sw_cluster_cnv_indiv["tr"]=="scz_swe6_eur"].tolist())
median_cnv_len_ctrl_sw56 = np.median(sw_cluster_cnv_indiv["CNV_LEN"][sw_cluster_cnv_indiv["PHE"]==1][sw_cluster_cnv_indiv["tr"]=="scz_swe5_eur"].tolist() + sw_cluster_cnv_indiv["CNV_LEN"][sw_cluster_cnv_indiv["PHE"]==1][sw_cluster_cnv_indiv["tr"]=="scz_swe6_eur"].tolist())
print (median_cnv_len_case_sw56)
print (median_cnv_len_ctrl_sw56)

190.739
183.127


#### Figure 3

In [42]:
sw_cluster_indiv = pd.merge(sw_cluster, sw_indiv, how="inner", on=["FID", "IID"])
sw_cluster_indiv_cnv = pd.merge(sw_cluster_indiv, sw_cnv, how="outer", on=["FID", "IID"])
sw_cluster_indiv_cnv["PHE"] = sw_cluster_indiv_cnv.apply(lambda row: 1 if row["PHE"]==2 else 0, axis=1)
# sw_cluster_indiv_cnv

In [18]:
# One line for each individual, with CNV or without CNV
query = '''
SELECT FID, IID, PHE, NSEG, TYPE, tr
FROM sw_cluster_indiv_cnv
GROUP BY FID, IID, PHE, NSEG, tr
ORDER BY PHE DESC
'''
# FIXME: needs to combine TYPE
sw_indiv_adj = sqldf(query)
# sw_indiv_adj = sw_indiv_adj.rename(columns={"PHE_NEW": "PHE"})
sw_indiv_adj["hasCNV"] = sw_indiv_adj.apply(lambda row: 1 if row["NSEG"]>0 else 0, axis=1)
sw_indiv_adj["tr_adj"] = sw_indiv_adj.apply(lambda row: 1 if row["tr"]=="scz_swe1_eur" 
                                else 2 if row["tr"]=="scz_s234_eur" else 3, axis=1)
sw_indiv_adj

Unnamed: 0,FID,IID,PHE,NSEG,TYPE,tr,hasCNV,tr_adj
0,PT-1RTZ,1,1,0,,scz_swe1_eur,0,1
1,PT-1RU7,1,1,0,,scz_swe1_eur,0,1
2,PT-1RUJ,1,1,1,3.0,scz_swe1_eur,1,1
3,PT-1RUO,1,1,1,3.0,scz_swe1_eur,1,1
4,PT-1RV2,1,1,3,3.0,scz_swe1_eur,1,1
5,PT-1RV4,1,1,1,3.0,scz_swe1_eur,1,1
6,PT-1RVE,1,1,0,,scz_swe1_eur,0,1
7,PT-1RVH,1,1,0,,scz_swe1_eur,0,1
8,PT-1RVS,1,1,0,,scz_swe1_eur,0,1
9,PT-1RW3,1,1,1,1.0,scz_swe1_eur,1,1


In [19]:
# all CNVs logistic regression
indep_cols = sw_indiv_adj[["NSEG", "tr_adj"]]
all_CNV_logit = sm.Logit(sw_indiv_adj["PHE"], indep_cols)
res = all_CNV_logit.fit()
print (res.summary())
print (np.exp(res.params))

Optimization terminated successfully.
         Current function value: 0.685450
         Iterations 4
                           Logit Regression Results                           
Dep. Variable:                    PHE   No. Observations:                10636
Model:                          Logit   Df Residuals:                    10634
Method:                           MLE   Df Model:                            1
Date:                Mon, 17 Jul 2017   Pseudo R-squ.:                0.001951
Time:                        18:04:21   Log-Likelihood:                -7290.4
converged:                       True   LL-Null:                       -7304.7
                                        LLR p-value:                 9.354e-08
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
NSEG           0.0634      0.018      3.543      0.000       0.028       0.098
tr_adj        -0.1124      0.

In [20]:
# all CNV OR, Fisher's test
query = '''
SELECT PHE, TYPE, count(FID)
FROM sw_indiv_adj
GROUP BY PHE, TYPE
ORDER BY count(FID) DESC
'''
print (sqldf(query))
print (sw_indiv_adj.groupby(["PHE", "TYPE"]).size())
stats.fisher_exact([[1659+1083+50+5, 1969+1288+46+4], [1922, 2610]])[0]

   PHE  TYPE  count(FID)
0    0   NaN        2610
1    0   3.0        1969
2    1   NaN        1922
3    1   3.0        1659
4    0   1.0        1288
5    1   1.0        1083
6    1   4.0          50
7    0   4.0          46
8    1   0.0           5
9    0   0.0           4
PHE  TYPE
0    0.0        4
     1.0     1288
     3.0     1969
     4.0       46
1    0.0        5
     1.0     1083
     3.0     1659
     4.0       50
dtype: int64


1.1485380709477924

In [43]:
sw_cluster_indiv_cnv

Unnamed: 0,FID,IID,tr,PHE,NSEG,KB,KBAVG,CHR,BP1,BP2,TYPE,SCORE,SITES
0,PT-BSLD,1,scz_s234_eur,1,0,0.000,0.000,,,,,,
1,PT-BPTP,1,scz_s234_eur,1,1,101.080,101.080,7.0,9128802.0,9229882.0,1.0,65.010,86.0
2,PT-BPAI,1,scz_s234_eur,1,1,499.114,499.114,16.0,18228118.0,18727232.0,3.0,13.130,41.0
3,PT-BSLJ,1,scz_s234_eur,1,1,136.615,136.615,1.0,16869363.0,17005978.0,3.0,38.680,55.0
4,PT-BPIS,1,scz_s234_eur,1,0,0.000,0.000,,,,,,
5,PT-BQP5,1,scz_s234_eur,1,1,244.253,244.253,16.0,18830938.0,19075191.0,3.0,97.890,113.0
6,PT-BSXQ,1,scz_s234_eur,1,0,0.000,0.000,,,,,,
7,PT-BP9I,1,scz_s234_eur,1,1,137.559,137.559,21.0,37481955.0,37619514.0,4.0,147.000,100.0
8,PT-BR4P,1,scz_s234_eur,1,0,0.000,0.000,,,,,,
9,PT-BQGK,1,scz_s234_eur,1,1,1191.880,1191.880,15.0,97984319.0,99176197.0,3.0,811.310,934.0


In [62]:
# the occurrence of each CNV, no matter in cases or controls
query = '''
SELECT FID, CHR, BP1, BP2, PHE, count(FID)
FROM sw_cluster_indiv_cnv
WHERE NSEG != 0
GROUP BY CHR, BP1, BP2
ORDER BY count(FID) DESC
'''
n_CNV_occur = sqldf(query)
n_CNV_occur = n_CNV_occur.rename(columns={"count(FID)": "n_CNV_occur"})
print (n_CNV_occur)
### single occurrence CNVs
single_CNV = n_CNV_occur[n_CNV_occur["n_CNV_occur"]==1]
print (single_CNV)
# stats.fisher_exact([[2541, 2832], [4719-2541, 5917-2832]])

          FID   CHR          BP1          BP2  PHE  n_CNV_occur
0     PT-P29N   3.0   35826707.0   35938795.0    0           51
1     PT-OPVY   7.0    9128070.0    9229882.0    0           40
2     PT-CDFS   1.0  104109238.0  104268222.0    0           35
3     PT-1TB8  22.0   24283097.0   24396622.0    0           35
4     PT-CDH4  12.0   19474770.0   19576936.0    0           34
5     PT-OQ36   7.0   64679561.0   65088807.0    0           33
6     PT-P298   8.0    2346867.0    2582764.0    0           32
7     PT-3MID   1.0  161496900.0  161638530.0    0           31
8     PT-8W3I   3.0  100340068.0  100442478.0    0           30
9     PT-BQG2   9.0   43315670.0   43800186.0    0           29
10    PT-OQ4D   8.0   16416409.0   16526958.0    0           28
11    PT-BSWG   3.0   35803775.0   35941770.0    0           27
12    PT-8VX8   1.0  104155643.0  104268222.0    0           24
13    PT-OO6K   6.0  162724247.0  162914986.0    0           24
14    PT-OPJ6   7.0    9127173.0    9229

In [22]:
single_CNV_allindiv = pd.merge(sw_indiv_adj, single_CNV, how="outer", on=["FID", "PHE"])
# print (single_CNV_allindiv)
query = '''
SELECT FID, PHE, NSEG, tr_adj, count(n_CNV_occur)
FROM single_CNV_allindiv
GROUP BY FID
ORDER BY count(n_CNV_occur) DESC
'''
single_CNV_indiv = sqldf(query)
single_CNV_indiv = single_CNV_indiv.rename(columns={"count(n_CNV_occur)": "single_CNV_count"})
single_CNV_indiv["tmp"] = single_CNV_indiv.apply(lambda row: 1 if row["single_CNV_count"]>0 else 0, axis=1)
print (single_CNV_indiv)

                FID  PHE  NSEG  tr_adj  single_CNV_count  tmp
0           PT-1S18    1    11       1                10    1
1           PT-ES6N    0     7       3                 7    1
2           PT-OQ3N    1     8       3                 7    1
3           PT-9Z95    1     6       2                 6    1
4           PT-BPTG    1     6       2                 6    1
5           PT-BR5H    0     7       2                 6    1
6           PT-2867    1     5       2                 5    1
7           PT-286L    0     5       2                 5    1
8           PT-3MNV    1     6       2                 5    1
9      PT-8TV3_dup1    1     6       3                 5    1
10          PT-8UZK    1     5       2                 5    1
11          PT-8WE3    1     8       2                 5    1
12          PT-FFLV    0     5       3                 5    1
13          PT-ITLG    0     5       3                 5    1
14          PT-1RWE    0     4       1                 4    1
15      

In [23]:
indep_cols1 = single_CNV_indiv[["single_CNV_count", "tr_adj"]]
single_CNV_logit = sm.Logit(single_CNV_indiv["PHE"], indep_cols1)
res1 = single_CNV_logit.fit()
print (res1.summary())
print (np.exp(res1.params))

Optimization terminated successfully.
         Current function value: 0.685408
         Iterations 4
                           Logit Regression Results                           
Dep. Variable:                    PHE   No. Observations:                10636
Model:                          Logit   Df Residuals:                    10634
Method:                           MLE   Df Model:                            1
Date:                Mon, 17 Jul 2017   Pseudo R-squ.:                0.002013
Time:                        18:04:22   Log-Likelihood:                -7290.0
converged:                       True   LL-Null:                       -7304.7
                                        LLR p-value:                 5.873e-08
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
single_CNV_count     0.0901      0.025      3.667      0.000       0.042       0.138
tr_adj     

In [59]:
n_CNV_occur.groupby(["n_CNV_occur"]).size()

n_CNV_occur
1     5373
2      529
3      167
4       92
5       44
6       43
7       25
8       24
9       14
10      13
11       5
12      11
13       3
14       9
15       5
16       1
17       3
18       5
19       5
20       5
22       3
24       3
27       1
28       1
29       1
30       1
31       1
32       1
33       1
34       1
35       2
40       1
51       1
dtype: int64

In [60]:
n_CNV_occur["n_CNV_occur"].sum()

9723

In [37]:
# the occurrence of each CNV, in cases or controls separately
query = '''
SELECT FID, CHR, BP1, BP2, PHE, count(FID)
FROM sw_cluster_indiv_cnv
WHERE NSEG != 0
GROUP BY CHR, BP1, BP2, PHE
ORDER BY count(FID) DESC
'''
n_CNV_occur_sep = sqldf(query)
n_CNV_occur_sep = n_CNV_occur_sep.rename(columns={"count(FID)": "n_CNV_occur"})
print (n_CNV_occur_sep)
# 2-6 occurrence CNVs
query = '''
SELECT PHE, CHR, BP1, BP2, count(n_CNV_occur)
FROM n_CNV_occur_sep
WHERE n_CNV_occur >= 2 AND n_CNV_occur <= 6
GROUP BY PHE, CHR, BP1, BP2
ORDER BY count(n_CNV_occur) DESC
'''
sqldf(query)

          FID   CHR          BP1          BP2  PHE  n_CNV_occur
0     PT-P29N   3.0   35826707.0   35938795.0    0           30
1     PT-OPVY   7.0    9128070.0    9229882.0    0           26
2     PT-OQ36   7.0   64679561.0   65088807.0    0           24
3     PT-CDFS   1.0  104109238.0  104268222.0    0           21
4     PT-3MID   1.0  161496900.0  161638530.0    0           21
5     PT-P29T   3.0   35826707.0   35938795.0    1           21
6     PT-1TC3  22.0   24283097.0   24396622.0    1           20
7     PT-CDH4  12.0   19474770.0   19576936.0    0           19
8     PT-P298   8.0    2346867.0    2582764.0    0           18
9     PT-29CN   3.0  100340068.0  100442478.0    1           17
10    PT-OPJ6   7.0    9127173.0    9229882.0    0           17
11    PT-OQ4D   8.0   16416409.0   16526958.0    0           17
12    PT-BQG2   9.0   43315670.0   43800186.0    0           17
13    PT-BQOS   1.0  104155643.0  104268222.0    1           15
14    PT-9ZDN  12.0   19474770.0   19576

Unnamed: 0,PHE,CHR,BP1,BP2,count(n_CNV_occur)
0,0,1.0,12852748.0,13015495.0,1
1,0,1.0,12858053.0,12961019.0,1
2,0,1.0,12867090.0,13015495.0,1
3,0,1.0,16844932.0,17262247.0,1
4,0,1.0,16869363.0,16970456.0,1
5,0,1.0,16869363.0,16986851.0,1
6,0,1.0,16869363.0,17005978.0,1
7,0,1.0,16886135.0,16986851.0,1
8,0,1.0,16886135.0,17082591.0,1
9,0,1.0,16886135.0,17114712.0,1


In [25]:
# res1 = pd.DataFrame(columns=("FID", "IID", "PHE", "n_cnv"))
# i = 0
# for tup in sw_indiv.itertuples():
#     if tup.NSEG>0:
#         for d in range(tup.NSEG):
#             res1.loc[i] = [tup.FID, tup.IID, tup.PHE, 1]
#             i += 1
#     else:
#         res1.loc[i] = [tup.FID, tup.IID, tup.PHE, 0]
#         i += 1
# print (res1)

In [26]:
sw_indiv["NSEG"][sw_indiv["NSEG"]!=0].count()

6104

In [27]:
sw_indiv["NSEG"][sw_indiv["NSEG"]==0].count()

4532

In [28]:
sw_mds = pd.read_table("../data/swcnv/swcnv.mds", sep="\s+")

In [29]:
sw_mds["C_sum"] = sw_mds["C1"] + sw_mds["C2"] + sw_mds["C3"] + sw_mds["C4"] + sw_mds["C5"] + sw_mds["C6"] + sw_mds["C7"] + sw_mds["C8"] + sw_mds["C9"] + sw_mds["C10"]

In [30]:
print (np.mean(np.exp(sw_mds["C_sum"].tolist())))

1.00015782373
