In [1]:
import os, django

# check or modify environment variable, like "PATH" .etc
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'gene_search.settings')
# check this https://stackoverflow.com/questions/61926359/django-synchronousonlyoperation-you-cannot-call-this-from-an-async-context-u
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
django.setup()

## Details are in https://docs.djangoproject.com/en/5.1/topics/db/queries/

In [2]:
from search.models import BigTableV2

### get all rows

In [15]:
len(BigTableV2.objects.all())

65338

### find certain row

In [4]:
b = BigTableV2.objects.get(gene_name="aap-1")
b

<BigTableV2: 
            id:2
            gene_id:WBGene00000001
            status:Live
            sequence_name:Y110A7A.10
            gene_name:aap-1
            other_name:CELE_Y110A7A.10
            transcript_name:Y110A7A.10.1
            type:coding_transcript
        >

### filter by some columns value

In [5]:
b = BigTableV2.objects.filter(gene_name="aat-3")
b

<QuerySet [<BigTableV2: 
            id:5
            gene_id:WBGene00000004
            status:Live
            sequence_name:F52H2.2
            gene_name:aat-3
            other_name:CELE_F52H2.2
            transcript_name:F52H2.2a.1
            type:coding_transcript
        >, <BigTableV2: 
            id:6
            gene_id:WBGene00000004
            status:Live
            sequence_name:F52H2.2
            gene_name:aat-3
            other_name:CELE_F52H2.2
            transcript_name:F52H2.2b.1
            type:coding_transcript
        >]>

### delete certain row

In [13]:
b_list = BigTableV2.objects.all()
b_list.delete() # delete all rows :)

(65336, {'search.BigTableV2': 65336})

### Insert rows into database (inefficiency due to one by one)

one can see that it took 5m 24.5s to insert 65338 rows

In [8]:
path = "big_table_v2.csv"

with open(path, 'r') as file:
    for x in file.readlines()[1:]: # skip first row
        """
            Gene_ID       , Status, Sequence_Name, Gene_Name, Other_Name     , Transcript_Name, Type
            WBGene00000001, Live  , Y110A7A.10   , aap-1    , CELE_Y110A7A.10, Y110A7A.10.1   , coding_transcript

            ......
        """
        gene_id,status,sequence_name,gene_name,other_name,transcript_name,type = x.strip("\n").split(",") # strip off "\n", then split by ","
        b = BigTableV2(
            gene_id = gene_id,
            status = status,
            sequence_name = sequence_name,
            gene_name = gene_name,
            other_name = other_name,
            transcript_name = transcript_name,
            type = type,
        )  
        b.save() # can use "BigTableV2.objects.create(...)" instead

### Insert rows into database (Efficiency)

checkout https://docs.djangoproject.com/en/5.1/ref/models/querysets/#bulk-create

one can see that it only take 2s

In [14]:
from search.models import BigTableV2
path = "big_table_v2.csv"

data_list = []
with open(path, 'r') as file:
    for x in file.readlines()[1:]: # skip first row
        """
            Gene_ID       , Status, Sequence_Name, Gene_Name, Other_Name     , Transcript_Name, Type
            WBGene00000001, Live  , Y110A7A.10   , aap-1    , CELE_Y110A7A.10, Y110A7A.10.1   , coding_transcript

            ......
        """
        
        gene_id,status,sequence_name,gene_name,other_name,transcript_name,type = x.strip("\n").split(",") # strip off "\n", then split by ","
        data_list.append(
            BigTableV2(
                gene_id = gene_id,
                status = status,
                sequence_name = sequence_name,
                gene_name = gene_name,
                other_name = other_name,
                transcript_name = transcript_name,
                type = type,
            )    
        )
        
# Insert many many data in only one query (or less queries, if you set a batch size)
BigTableV2.objects.bulk_create(data_list, batch_size=10000)  

[<BigTableV2: 
             id:65340
             gene_id:WBGene00000001
             status:Live
             sequence_name:Y110A7A.10
             gene_name:aap-1
             other_name:CELE_Y110A7A.10
             transcript_name:Y110A7A.10.1
             type:coding_transcript
         >,
 <BigTableV2: 
             id:65341
             gene_id:WBGene00000002
             status:Live
             sequence_name:F27C8.1
             gene_name:aat-1
             other_name:CELE_F27C8.1
             transcript_name:F27C8.1.1
             type:coding_transcript
         >,
 <BigTableV2: 
             id:65342
             gene_id:WBGene00000003
             status:Live
             sequence_name:F07C3.7
             gene_name:aat-2
             other_name:CELE_F07C3.7
             transcript_name:F07C3.7.1
             type:coding_transcript
         >,
 <BigTableV2: 
             id:65343
             gene_id:WBGene00000004
             status:Live
             sequence_name:F52H2.2
