# Adding instances to postgres database

First of all, we can remember the database scheme, so we have a better perspective of the commands we have to design.

![title](../Images/DB_Design.png)

Once more, we will use the `psycopg2` library to connect to the postgres database.

In [4]:
import psycopg2 as psql

## Adding percentile instance

In order to insert a percentile to the database, we have to design two commands, one will insert a percentile group:

```postgresql
INSERT INTO percentile_groups(project_id, 
                              metadata,
                              number_genes,
                              number_cells)
VALUES (project_id_value, 
        '{ "metadata0": "metadata0_value", ... , "metadataN": "metadataN_value"}',
        number_genes_value,
        number_cells_value)
        
RETURNING id;
```

And the other will insert the percentile of a concrete gene of the group:

```postgresql
INSERT INTO percentile(gene_name,
                       percentile,
                       percentile_group)
VALUES (gene_name_value,
        percentile_value,
        percentile_group_value)
        
RETURNING id;
```

As we can see, the JSON object is inserted as a string. However, postgres will treat it as a JSON object. Another important thing is that we need to return the **id** in the transaction because we may want to use it later on.

We are going to design a function that, given a percentile (the values for each variable), creates a command and insert the percentile into the database and return the corresponding id.

In [5]:
from Postgres_connection import PostgresConnection

In [7]:
def get_percentile_group_id(project_id, metadata, number_genes, number_cells):
    metadata_str = str(metadata).replace("'", '"')

    command = f"""
        SELECT id
        FROM percentil_groups
        WHERE
            project_id = '{project_id}' AND
            metadata::jsonb @> '{metadata_str}'::jsonb AND
            '{metadata_str}'::jsonb @>  metadata::jsonb AND
            number_genes = {number_genes} AND
            number_cells = {number_cells}
    """
    
    with PostgresConnection() as conn:
        cur = conn.cursor()
        # read tables
        cur.execute(command)
        percentile_group_id = cur.fetchone()
        # close communication with the PostgreSQL database server
        cur.close()
        # commit the changes
        conn.commit()
    
    return percentile_group_id

In [8]:
def add_percentile_group(project_id, metadata, number_genes, number_cells):
    percentile_group_id = get_percentile_group_id(project_id, metadata, number_genes, number_cells)
    
    if percentile_group_id is not None:
        return percentile_group_id
    
    command = f"""
        INSERT INTO percentil_groups (project_id, 
                                 metadata,
                                 number_genes,
                                 number_cells)
        VALUES ('{project_id}', 
                '{metadata_str}',
                {number_genes},
                {number_cells})

        RETURNING id;
    """
    
    with PostgresConnection() as conn:
        cur = conn.cursor()
        # read tables
        cur.execute(command)
        percentile_group_id = cur.fetchone()
        print(percentile_group_id)
        # close communication with the PostgreSQL database server
        cur.close()
        # commit the changes
        conn.commit()
    
    return percentile_group_id

In [15]:
def add_percentile_with_group(project_id, gene_name, percentile, number_genes, number_cells, metadata={}):
    percentile_id = -1
    
    percentile_group_id = add_percentile_group(project_id, metadata, number_genes, number_cells)[0]
    
    command = f"""
        INSERT INTO percentiles (gene_name,
                                 percentile,
                                 percentil_group)
        VALUES ('{gene_name}', 
                {percentile},
                {percentile_group_id})

        RETURNING id;
    """
    
    with PostgresConnection() as conn:
        cur = conn.cursor()

        # read tables
        cur.execute(command)
        percentile_id = cur.fetchone()
        # close communication with the PostgreSQL database server
        cur.close()
        # commit the changes
        conn.commit()
    
    return percentile_id[0], percentile_group_id

In [16]:
add_percentile_with_group("E-CURD-55", "ENDG000001", 36.5, 1254, 7654, metadata={'m': 'n'})

(6, 1)

In [17]:
def add_percentile(gene_name, percentile, percentile_group_id):
    command = f"""
        INSERT INTO percentiles (gene_name,
                                 percentile,
                                 percentil_group)
        VALUES ('{gene_name}', 
                {percentile},
                {percentile_group_id})

        RETURNING id;
    """
    
    with PostgresConnection() as conn:
        cur = conn.cursor()

        # read tables
        cur.execute(command)
        percentile_id = cur.fetchone()
        # close communication with the PostgreSQL database server
        cur.close()
        # commit the changes
        conn.commit()
    
    return percentile_id[0]

## Add sampling percentiles

In [None]:
def add_sampling_percentiles(percentiles, sampling_info):
    project_id = sampling_info['project_id']
    metadata = sampling_info['metadata']
    number_genes = sampling_info['number_genes']
    number_cells = sampling_info['number_cells']
    
    percentile_group_id = add_percentile_group(project_id, metadata, number_genes, number_cells)
    
    for _, row in df.iterrows():
        gene_name = row['gene_name']
        percentile = row['percentile']
        
        add_percentile(gene_name, 
                       percentile, 
                       percentile_group_id)
    
    return percentile_group_id