# Create miRNASNP-v3 index for BIGD

Guide to for index file [NGDC](https://bigd.big.ac.cn/standards/dis) Bigsearch system (index.bs)

**index.bs** file structure as following:

```
DB JSON_ENCODED_STRING
ENTRY JSON_ENCODED_STRING
```

In [57]:
import json
import uuid
from pymongo import MongoClient

In [58]:
db_dict = {
    "id": "mirnasnp",
    "title": "miRNASNP-v3",
    "url": "http://bioinfo.life.hust.edu.cn/miRNASNP/",
    "description": "miRNASNP-v3 is a comprehensive database for SNPs and disease-related variations in miRNAs and miRNA targets",
    "basicInfo": "In miRNASNP-v3, 46,826 SNPs in human 1,897 pre-miRNAs (2,624 mature miRNAs) and 7,115,796  SNPs in 3'UTRs of 18,151 genes were characterized. Besides, 505,417 disease-related variations (DRVs) from GWAS, ClinVar and COSMIC were identified in miRNA and gene 3'UTR. Gene enrichment of target gain/loss by variations in miRNA seed region was provided.",
    "categories": ["miRNA", "mutation", "disease"],
    "species": ["Homo Sapiens"],
    "updateAt": "2020-09-02 11:11:11"
}

In [157]:
class MongoMir:
    # __mongo = MongoClient("mongodb://username:passwd@ip:port/dbname")
    

    def __init__(self, col_name = 'mirinfo'):
        self.__col_name = col_name
    
    def get_data(self, output={}, condition={}):
        output['_id'] = 0
        mcur = self.__mongo.mirnasnp[self.__col_name].find(
            condition, output, no_cursor_timeout=True
        )
        return mcur.count()

    def get_mirnas(self):
        mcur = self.__mongo.mirnasnp.pri_mir_summary.find(
            {}, {'_id': 0, 'mir_id': 1, 'mir_chr': 1, 'mir_start': 1, 'mir_end': 1}
        ).limit(2)
        # res = [{'mir_id': item['mir_id'], 'loci': f"{item['mir_chr']}:{item['mir_start']}-{item['mir_end']}"} for item in mcur]
        res = [item['mir_id'] for item in mcur]
        return res
    
    def get_genes(self):
        mcur = self.__mongo.mirnasnp.mutation_summary_genelist.find(
            {}, {'_id': 0, 'gene_symbol': 1}
        ).limit(10)

        m_symbol = set([item['gene_symbol'] for item in mcur])

        mcur = self.__mongo.mirnasnp.snp_summary_genelist.find(
            {}, {'_id': 0, 'gene_symbol': 1}
        ).limit(10)

        s_symbol = set([item['gene_symbol'] for item in mcur])

        return m_symbol.union(s_symbol)


In [158]:
m1 = MongoMir()
m1.get_mirnas()

['hsa-miR-6859-5p', 'hsa-miR-6859-3p']

In [159]:
mm = MongoMir()
mm.get_genes()

{'A1BG',
 'A1CF',
 'A2M',
 'A2ML1',
 'A4GALT',
 'A4GNT',
 'AAAS',
 'AACS',
 'AADAC',
 'AADACL2'}

In [84]:
mm = MongoMir('mutation_summary_genelist')
mm.get_data()

19889

In [86]:
mm = MongoMir('snp_summary_genelist')
mm.get_data()

21895

In [59]:
class ENTRY(object):
    def __init__(self):
        self.id = str(uuid.uuid4())
        self.type = "gene"
        self.title = ""
        self.url = "http://bioinfo.life.hust.edu.cn/miRNASNP/#!/mirna?mirna_id=hsa-miR-10b-5p"
        self.dbId = "mirnasnp"
        self.updatedAt = "2020-09-02 11:11:11"
        self.description = ""
        self.basicInfo = ""
        self.species = ["Homo Sapiens"]
        self.attrs = {
            "accession": "",
        }
    def __getattr__(self, attr):
        return self[attr]