# Create miRNASNP-v3 index for BIGD

Guide to for index file [NGDC](https://bigd.big.ac.cn/standards/dis) Bigsearch system (index.bs)

**index.bs** file structure as following:

```
DB JSON_ENCODED_STRING
ENTRY JSON_ENCODED_STRING
```

In [1]:
import json
import uuid
from pymongo import MongoClient

In [2]:
db_dict = {
    "id": "mirnasnp",
    "title": "miRNASNP-v3",
    "url": "http://bioinfo.life.hust.edu.cn/miRNASNP/",
    "description": "miRNASNP-v3 is a comprehensive database for SNPs and disease-related variations in miRNAs and miRNA targets",
    "basicInfo": "In miRNASNP-v3, 46,826 SNPs in human 1,897 pre-miRNAs (2,624 mature miRNAs) and 7,115,796  SNPs in 3'UTRs of 18,151 genes were characterized. Besides, 505,417 disease-related variations (DRVs) from GWAS, ClinVar and COSMIC were identified in miRNA and gene 3'UTR. Gene enrichment of target gain/loss by variations in miRNA seed region was provided.",
    "categories": ["miRNA", "mutation", "disease"],
    "species": ["Homo Sapiens"],
    "updatedAt": "2015-08-30 11:11:11"
}

In [3]:
class MongoMir:
    __mongo = MongoClient("mongodb://username:passwd@ip:port/dbname")

    def __init__(self, col_name = 'mirinfo'):
        self.__col_name = col_name
    
    def get_data(self, output={}, condition={}):
        output['_id'] = 0
        mcur = self.__mongo.mirnasnp[self.__col_name].find(
            condition, output, no_cursor_timeout=True
        )
        return mcur.count()

    def get_mirnas(self):
        mcur = self.__mongo.mirnasnp.pri_mir_summary.find(
            {}, {'_id': 0, 'mir_id': 1, 'mir_chr': 1, 'mir_start': 1, 'mir_end': 1}
        )
        # res = [{'mir_id': item['mir_id'], 'loci': f"{item['mir_chr']}:{item['mir_start']}-{item['mir_end']}"} for item in mcur]
        res = [item['mir_id'] for item in mcur]
        return res
    
    def get_genes(self):
        mcur = self.__mongo.mirnasnp.mutation_summary_genelist.find(
            {}, {'_id': 0, 'gene_symbol': 1}
        )

        m_symbol = set([item['gene_symbol'] for item in mcur])

        mcur = self.__mongo.mirnasnp.snp_summary_genelist.find(
            {}, {'_id': 0, 'gene_symbol': 1}
        )

        s_symbol = set([item['gene_symbol'] for item in mcur])

        return list(m_symbol.union(s_symbol))


ValueError: Port must be an integer between 0 and 65535: port

In [4]:
class ENTRY(object):
    def __init__(self, type, title, url):
        self.id = str(uuid.uuid4())
        self.type = type
        self.title = title
        self.url = url
        self.dbId = "mirnasnp"
        self.updatedAt = "2015-08-30 11:11:11"
        self.description = ""
        self.basicInfo = ""
        self.species = ["Homo Sapiens"]
        self.attrs = {
            "symbol": title,
        }
    def __getattr__(self, attr):
        return self[attr]

In [5]:
def get_entry(it, type = 'miRNA ID'):
    if type.startswith('miRNA'):
        url = f'http://bioinfo.life.hust.edu.cn/miRNASNP/#!/mirna?mirna_id={it}'
    else:
        url = f'http://bioinfo.life.hust.edu.cn/miRNASNP/#!/gene?query_gene={it}&has_snp=1&has_phenotype=1'

    e = ENTRY(type, it, url)

    return json.dumps(e.__dict__)

In [6]:
mongo_mirnasnp = MongoMir()
mirna_ids = mongo_mirnasnp.get_mirnas()
gene_ids = mongo_mirnasnp.get_genes()

NameError: name 'MongoMir' is not defined

In [7]:
with open('/home/liucj/tmp/index.bs', 'w') as fh:
    header = 'DB' + '\t' + json.dumps(db_dict) + '\n'
    fh.write(header)

    for it in mirna_ids:
        line = 'ENTRY' + '\t' + get_entry(it = it, type = 'miRNA ID') + '\n'

        fh.write(line)
    
    for it in gene_ids:
        line = 'ENTRY' + '\t' + get_entry(it = it, type = 'Official gene symbol')  + '\n'

        fh.write(line)

NameError: name 'mirna_ids' is not defined

Check the index.bs file

```
/home/miaoyr/software/BSChecker/bschecker-1.1.4-bin/bin/bschecker /home/liucj/tmp/index.bs
```