In [35]:
import psycopg2
from psycopg2 import sql
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import time
import torch
from torch_geometric.utils import sort_edge_index
import os

In [36]:
def extract_execution_time(explain_output):
    """
    Extracts the total execution time from the given query plan.

    The function parses the query plan to find the execution time, which is typically
    represented in the format 'Execution Time: X ms'. It returns the execution time
    in seconds.

    Parameters:
    query_plan (list): A list of strings representing the lines of the query plan output.

    Returns:
    float: The total execution time in ms. If the execution time cannot be found,
           returns None.
    """

    execution_time = 0.0
    pattern = re.compile(r"Execution Time: (\d+\.\d+) ms")
    for row in explain_output:
        match = pattern.search(row[0])
        if match:
            execution_time += float(match.group(1))
    return execution_time
    
def connect_to_postgres(dbname = "postgres"):
    """Connect to the PostgreSQL database server."""
    try:
        conn = psycopg2.connect(
            dbname=dbname,  # Connect to default db to create new db
            user='postgres',
            password='password',
            host='localhost'
        )
        print("Connection successful.")
        return conn
    except Exception as e:
        print(f"Error connecting to database: {e}")
        raise

def create_database(conn, new_db_name):
    """Create a new database."""
    try:
        conn.autocommit = True
        with conn.cursor() as cursor:
            cursor.execute(sql.SQL("CREATE DATABASE {};").format(sql.Identifier(new_db_name)))
            print(f"Database '{new_db_name}' created successfully.")
    except Exception as e:
        print(f"Error creating database: {e}")
        raise
    finally:
        conn.autocommit = False

def create_schema(conn, schema_sql):
    """Create the database schema."""
    try:
        with conn.cursor() as cursor:
            cursor.execute(schema_sql)
            conn.commit()
            print("Schema created successfully.")
    except Exception as e:
        conn.rollback()
        print(f"Error creating schema: {e}")
        raise

def upload_csv_to_table(conn, csv_file_path, table_name):
    """Upload a CSV file to a table using the COPY command."""
    try:
        with conn.cursor() as cursor:
            with open(csv_file_path, 'r') as f:
                cursor.copy_expert(
                    sql.SQL("""
                        COPY {} FROM STDIN WITH (FORMAT CSV, HEADER TRUE, DELIMITER ',');
                    """).format(sql.Identifier(table_name)), f
                )
            conn.commit()
            print(f"Data from '{csv_file_path}' uploaded to table '{table_name}' successfully using COPY.")
    except Exception as e:
        conn.rollback()
        print(f"Error uploading CSV data using COPY: {e}")
        raise

def create_index(conn, table_name, column_name):
    """Create an index on a specific column."""
    try:
        with conn.cursor() as cursor:
            index_name = f"{table_name}_{column_name}_idx"
            cursor.execute(sql.SQL("CREATE INDEX {} ON {} ({});").format(
                sql.Identifier(index_name),
                sql.Identifier(table_name),
                sql.Identifier(column_name)
            ))
            conn.commit()
            print(f"Index '{index_name}' created successfully.")
    except Exception as e:
        conn.rollback()
        print(f"Error creating index: {e}")
        raise

def read_data(conn, query):
    """Read data from the database."""
    try:
        with conn.cursor() as cursor:
            cursor.execute(query)
            results = cursor.fetchall()
            print("Data read successfully.")
            return results
    except Exception as e:
        print(f"Error reading data: {e}")
        raise

def update_data(conn, query, params):
    """Update data in the database."""
    try:
        with conn.cursor() as cursor:
            cursor.execute(query, params)
            conn.commit()
            print("Data updated successfully.")
    except Exception as e:
        conn.rollback()
        print(f"Error updating data: {e}")
        raise

def delete_database(conn, db_name):
    """Delete the specified database including all its schemas, tables, and indexes."""
    try:
        conn.autocommit = True
        with conn.cursor() as cursor:
            cursor.execute(sql.SQL("DROP DATABASE IF EXISTS {};").format(sql.Identifier(db_name)))
            print(f"Database '{db_name}' deleted successfully.")
    except Exception as e:
        print(f"Error deleting database: {e}")
        raise
    finally:
        conn.autocommit = False

def create_edges_table(conn):
    """Create an edges table with foreign key constraints to the main table."""
    edges_schema = """
    CREATE TABLE edges (
        source_id INTEGER NOT NULL,
        target_id INTEGER NOT NULL,
        FOREIGN KEY (source_id) REFERENCES nodes(id) ON DELETE CASCADE,
        FOREIGN KEY (target_id) REFERENCES nodes(id) ON DELETE CASCADE
    );
    """
    try:
        with conn.cursor() as cursor:
            cursor.execute(edges_schema)
            conn.commit()
            print("Edges table created successfully.")
    except Exception as e:
        conn.rollback()
        print(f"Error creating edges table: {e}")
        raise

def upload_edges_csv_to_table(conn, csv_file_path):
    """Upload a CSV file to the edges table using the COPY command."""
    try:
        with conn.cursor() as cursor:
            with open(csv_file_path, 'r') as f:
                cursor.copy_expert(
                    sql.SQL("""
                        COPY edges (source_id, target_id) FROM STDIN WITH (FORMAT CSV, HEADER TRUE, DELIMITER ',');
                    """), f
                )
            conn.commit()
            print(f"Edges data from '{csv_file_path}' uploaded successfully.")
    except Exception as e:
        conn.rollback()
        print(f"Error uploading edges CSV data: {e}")
        raise

def recurse_edge_index_iterative(source_nodes, edge_index, max_depth):
    """
    Optimized function to compute the subgraph around the source nodes up to a given depth.
    Uses an iterative approach instead of recursion.
    """
    visited_nodes = set(source_nodes)
    current_frontier = np.array(source_nodes)
    
    subgraph_edges = []

    for _ in range(max_depth):
        # Find edges where the target node is in the current frontier
        target_mask = np.isin(edge_index[1], current_frontier)
        subgraph_edge_index = edge_index[:, target_mask]
        subgraph_edges.append(subgraph_edge_index)

        # Update the current frontier with the source nodes of these edges
        current_frontier = np.setdiff1d(subgraph_edge_index[0], list(visited_nodes))
        visited_nodes.update(current_frontier)
        
        if len(current_frontier) == 0:
            break

    # Combine edges from all hops
    return np.concatenate(subgraph_edges, axis=1) if subgraph_edges else np.empty((2, 0), dtype=edge_index.dtype)


def get_subgraph_from_in_mem_graph_optimized(X, y, i, edge_index, hops):
    """
    Optimized version of subgraph extraction.
    """
    subgraph_edge_index = recurse_edge_index_iterative([i], edge_index, hops)
    unique_node_ids, remapping = np.unique(subgraph_edge_index, return_inverse=True)
    
    # Extract features and labels
    features = X.iloc[unique_node_ids, :].values
    labels = y.iloc[unique_node_ids, :].values.squeeze()

    # Remap edge indices
    remapped_edge_index = remapping.reshape(2, -1)
    return remapped_edge_index, features, labels, unique_node_ids

def create_db(node_file_name):
    # Create a new database
    conn = connect_to_postgres(dbname = "postgres")
    new_db_name = node_file_name.split(".")[0]
    create_database(conn, new_db_name)
    conn.close()
    conn = connect_to_postgres(new_db_name)
    return conn, new_db_name

def create(conn, node_file_name, edge_file_name, X_and_y):
    column_types = ["id SERIAL PRIMARY KEY"]
    for col in X_and_y.columns:
        if col == "y":
            column_types.append(f"{col} INTEGER[]")
            continue
        column_types.append(f"{col} REAL[]")
        
    node_schema = f"""
    CREATE TABLE nodes (
        {",".join(column_types)}
    );
    """
    start = time.time()
    create_schema(conn, node_schema)
    create_edges_table(conn)
    
    csv_file_path = f"syn_data/{node_file_name}"  # Replace with your CSV file path
    upload_csv_to_table(conn, csv_file_path, "nodes")
    create_index(conn, "nodes", "id")
    upload_edges_csv_to_table(conn, f"syn_data/{edge_file_name}")
    create_index(conn, "edges", "target_id")
    creation_time = time.time() - start
    return creation_time

def read(conn, hops, X_and_y, X, y, edge_index, random_sample_size = 1_000):
    np.random.seed(42)
    seed_node_ids = np.random.choice(np.arange(X_and_y.shape[0]), size = random_sample_size, replace = False)
    
    with conn.cursor() as cursor:
        complete_time = 0
        complete_test_time = 0
        for seed_node_id in tqdm(seed_node_ids):
            try:
                start = time.time()
                cursor.execute(f"""
        WITH RECURSIVE NestedTargets AS (
            SELECT 0 AS depth, source_id, target_id
            FROM edges
            WHERE target_id = {seed_node_id}
            
            UNION ALL
            
            SELECT nt.depth + 1, e.source_id, e.target_id
            FROM edges e
            JOIN NestedTargets nt ON e.target_id = nt.source_id
            WHERE nt.depth < {hops - 1}
        ),
        
        node_ids AS (
            SELECT DISTINCT id FROM (
                SELECT source_id AS id FROM NestedTargets
                UNION
                SELECT target_id AS id FROM NestedTargets
            ) AS combined_ids
        ),
        
        node_data AS (
            SELECT 
                id, 
                {", ".join(X_and_y.columns)}
            FROM nodes
            WHERE id IN (SELECT id FROM node_ids)
            ORDER BY id
        )
        
        SELECT
            (SELECT array_agg({", ".join(X_and_y.columns[:-1])}) FROM node_data) AS node_table,
            (SELECT array_agg({X_and_y.columns[-1]}) FROM node_data) AS label_table,
            (SELECT array_agg(array[source_id, target_id])
             FROM (SELECT DISTINCT source_id, target_id FROM NestedTargets) AS edges) AS edge_table,
             (SELECT array_agg(id) FROM node_data) AS node_ids;
    """)
                results = cursor.fetchall()[0]
                labels = np.array(results[1])
                subgraph_node_features = np.array(results[0])
                if results[0] is None:
                    continue
                
                subgraph_edges = np.array(results[2]).transpose()
                
                node_ids = np.array(results[-1]) #subgraph_node_features[:, 0]
                _, cols_source = np.nonzero((subgraph_edges[0] == node_ids[:, None]).transpose())
                _, cols_target = np.nonzero((subgraph_edges[1] == node_ids[:, None]).transpose())
                remapped_edge_index = np.concatenate([np.expand_dims(cols_source, axis = 0), np.expand_dims(cols_target, axis = 0)], axis = 0)
                features = subgraph_node_features #[:, 1:]
                overall_run_time = time.time() - start 
                
                complete_time += overall_run_time
                # Testing
                test_time = time.time()
                remapped_edge_index_test, features_test, labels_test, unique_node_ids = get_subgraph_from_in_mem_graph_optimized(X, y, seed_node_id, edge_index, hops)                    
                complete_test_time += time.time() - test_time
                assert (sort_edge_index(torch.from_numpy(remapped_edge_index_test)) == sort_edge_index(torch.from_numpy(remapped_edge_index))).sum() / (remapped_edge_index_test.shape[-1] * remapped_edge_index_test.shape[0]), "Edges doesnt match"
                assert np.allclose(features, features_test), "features doe not match"
                assert np.allclose(labels_test, labels), "Labels does not match"
                # print(f"Fetched {remapped_edge_index.shape} edges, {labels.shape} labels, {features.shape} features in ({overall_run_time} s)")
            except Exception as e:
                conn.rollback()
                print(f"Error uploading edges CSV data: {e}")
                raise   
    return (complete_time, complete_test_time)

def update_nodes(conn, X_and_y, X, y, random_sample_size = 1000):
    with conn.cursor() as cursor:
        np.random.seed(42)
        node_ids = np.random.choice(np.arange(X_and_y.shape[0]), size = random_sample_size, replace = False).tolist()
        start = time.time()
        for node_id in tqdm(node_ids):
            np.random.seed(42)
            features = np.random.rand(X.shape[-1]).tolist()  # Random values between 0 and 1
            labels = np.random.randint(0, 2, size=y.shape[-1]).tolist()  # Adjust label range as needed            
            sql_query = f"""
            UPDATE nodes 
            SET X = %s, y = %s
            WHERE id = %s;
            """
            values = (features, labels, node_id)
            cursor.execute(sql_query, values)
        return time.time() - start

def update_edges(conn, edge_index, X_and_y, random_sample_size = 1000):
    with conn.cursor() as cursor:
        np.random.seed(42)
        edge_ids = np.random.choice(np.arange(edge_index.shape[-1]), size = random_sample_size, replace = False).tolist()
        selected_edges = edge_index[:, edge_ids].transpose(-1, 0 )
        start = time.time()
        for selected_edge in tqdm(selected_edges):
            source_id, target_id = selected_edge
            np.random.seed(42)
            new_target_id = int(np.random.randint(0, X_and_y.shape[0]))
            sql_query = "UPDATE edges SET target_id = %s WHERE source_id = %s AND target_id = %s;"
            values = (new_target_id, int(source_id), int(target_id))
            cursor.execute(sql_query, values)
        return time.time() - start

def delete(conn, new_db_name):
    start = time.time()
    conn.close()
    conn = connect_to_postgres()
    delete_database(conn, new_db_name)
    conn.close()
    return time.time() - start

In [37]:
edge_file_name = "ppi_edge_index.csv"
node_file_name = "X_y_ppi.csv"

X = pd.read_csv(f"data/ppi_x.csv")
y = pd.read_csv(f"data/ppi_y.csv")
edges = pd.read_csv("data/" + edge_file_name)
edges.columns = ["source_id", "target_id"]
edges.to_csv("syn_data/" + edge_file_name, index = False)
edge_index = edges.values.transpose(-1, 0 )

if not os.path.exists(f"syn_data/{node_file_name}"):
    X_and_y = pd.DataFrame()
    X_and_y["X"] = X.apply(lambda row: [row[column] for column in X.columns], axis=1)
    X_and_y["y"] = y.apply(lambda row: [int(row[column]) for column in y.columns], axis=1)
    
    X_and_y["X"] = X_and_y["X"].apply(lambda x: f"{{{','.join(map(str, x))}}}")
    X_and_y["y"] = X_and_y["y"].apply(lambda x: f"{{{','.join(map(str, x))}}}")
    
    
    X_and_y.to_csv(f"syn_data/{node_file_name}", sep = ",", index = True)
else: 
    X_and_y = pd.read_csv(f"syn_data/{node_file_name}", sep = ",", index_col = 0)

## TODO need to copy ppi file in /syn_data

In [38]:
delete_time = delete(conn, new_db_name)

Connection successful.
Database 'X_y_ppi' deleted successfully.


In [34]:
conn, new_db_name = create_db(node_file_name)
create_time = create(conn, node_file_name, edge_file_name, X_and_y)
read_times = dict()
read_times_mem = dict()
for hops in tqdm(range(1, 4)):
    read_time, read_time_mem = read(conn, hops, X_and_y, X, y, edge_index, 1_000)
    read_times[hops] = read_time
    read_times_mem[hops] = read_time_mem
update_time_nodes = update_nodes(conn, X_and_y, X, y, 1000)
update_time_edges = update_edges(conn, edge_index, X_and_y, 1000)
delete_time = delete(conn, new_db_name)

Connection successful.
Database 'X_y_ppi' created successfully.
Connection successful.
Schema created successfully.
Edges table created successfully.
Data from 'syn_data/X_y_ppi.csv' uploaded to table 'nodes' successfully using COPY.
Index 'nodes_id_idx' created successfully.
Edges data from 'syn_data/ppi_edge_index.csv' uploaded successfully.
Index 'edges_target_id_idx' created successfully.


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

Connection successful.
Database 'X_y_ppi' deleted successfully.


In [39]:
output_df = pd.DataFrame(columns = ["name", "create", "read", "read_mem", "update_nodes", "update_edges", "delete"])

for num_nodes in tqdm([1_000, 10_000, 100_000]):
    for num_edges in tqdm(["5_edges", "10_edges", "20_edges", "scale_free"]):
        feature_file_name = f"X_{str(num_nodes)}_nodes_{num_edges}.csv"
        label_file_name = f"y_{str(num_nodes)}_nodes_{num_edges}.csv"
        edge_file_name = f"edge_index_{str(num_nodes)}_nodes_{num_edges}.csv"
        assert os.path.exists(f"syn_data/{feature_file_name}"), "Feature file does not exist"
        assert os.path.exists(f"syn_data/{label_file_name}"), "Label file does not exist"
        assert os.path.exists(f"syn_data/{edge_file_name}"), "Edge file does not exist"
        
        X = pd.read_csv(f"syn_data/{feature_file_name}")
        y = pd.read_csv(f"syn_data/{label_file_name}")
        edges = pd.read_csv(f"syn_data/{edge_file_name}")
        edges.columns = ["source_id", "target_id"]
        edge_index = edges.values.transpose(-1, 0)
        node_file_name = f"X_and_y_{str(num_nodes)}_nodes_{num_edges}.csv"
        if not os.path.exists(f"syn_data/{node_file_name}"):
            X_and_y = pd.DataFrame()
            X_and_y["X"] = X.apply(lambda row: [row[column] for column in X.columns], axis=1)
            X_and_y["y"] = y.apply(lambda row: [int(row[column]) for column in y.columns], axis=1)
            
            X_and_y["X"] = X_and_y["X"].apply(lambda x: f"{{{','.join(map(str, x))}}}")
            X_and_y["y"] = X_and_y["y"].apply(lambda x: f"{{{','.join(map(str, x))}}}")
            X_and_y.to_csv(f"syn_data/{node_file_name}", sep = ",", index = True)
        else:
            X_and_y = pd.read_csv(f"syn_data/{node_file_name}", sep = ",", index_col = 0)
        conn, new_db_name = create_db(node_file_name)
        create_time = create(conn, node_file_name, edge_file_name, X_and_y)
        read_times = dict()
        read_times_mem = dict()
        for hops in tqdm(range(1, 4)):
            read_time, read_time_mem = read(conn, hops, X_and_y, X, y, edge_index, 1_000)
            read_times[hops] = read_time
            read_times_mem[hops] = read_time_mem
        update_time_nodes = update_nodes(conn, X_and_y, X, y, 1000)
        update_time_edges = update_edges(conn, edge_index, X_and_y, 1000)
        delete_time = delete(conn, new_db_name)
        new_row_dict = {"name": f"{str(num_nodes)}_nodes_{num_edges}", "create": create_time, "update_nodes": update_time_nodes, "update_edges": update_time_edges, "delete": delete_time}
        for hops in read_times:
            new_row_dict[f"read_{hops}"] = read_times[hops]
            new_row_dict[f"read_in_mem_{hops}"] = read_times_mem[hops]
        new_row = pd.DataFrame([new_row_dict])
        output_df = pd.concat((output_df, new_row), ignore_index=True)

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

Connection successful.
Database 'X_and_y_1000_nodes_5_edges' created successfully.
Connection successful.
Schema created successfully.
Edges table created successfully.
Data from 'syn_data/X_and_y_1000_nodes_5_edges.csv' uploaded to table 'nodes' successfully using COPY.
Index 'nodes_id_idx' created successfully.
Edges data from 'syn_data/edge_index_1000_nodes_5_edges.csv' uploaded successfully.
Index 'edges_target_id_idx' created successfully.


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

Connection successful.
Database 'X_and_y_1000_nodes_5_edges' deleted successfully.
Connection successful.


  output_df = pd.concat((output_df, new_row), ignore_index=True)


Database 'X_and_y_1000_nodes_10_edges' created successfully.
Connection successful.
Schema created successfully.
Edges table created successfully.
Data from 'syn_data/X_and_y_1000_nodes_10_edges.csv' uploaded to table 'nodes' successfully using COPY.
Index 'nodes_id_idx' created successfully.
Edges data from 'syn_data/edge_index_1000_nodes_10_edges.csv' uploaded successfully.
Index 'edges_target_id_idx' created successfully.


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

Connection successful.
Database 'X_and_y_1000_nodes_10_edges' deleted successfully.
Connection successful.
Database 'X_and_y_1000_nodes_20_edges' created successfully.
Connection successful.
Schema created successfully.
Edges table created successfully.
Data from 'syn_data/X_and_y_1000_nodes_20_edges.csv' uploaded to table 'nodes' successfully using COPY.
Index 'nodes_id_idx' created successfully.
Edges data from 'syn_data/edge_index_1000_nodes_20_edges.csv' uploaded successfully.
Index 'edges_target_id_idx' created successfully.


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

Connection successful.
Database 'X_and_y_1000_nodes_20_edges' deleted successfully.
Connection successful.
Database 'X_and_y_1000_nodes_scale_free' created successfully.
Connection successful.
Schema created successfully.
Edges table created successfully.
Data from 'syn_data/X_and_y_1000_nodes_scale_free.csv' uploaded to table 'nodes' successfully using COPY.
Index 'nodes_id_idx' created successfully.
Edges data from 'syn_data/edge_index_1000_nodes_scale_free.csv' uploaded successfully.
Index 'edges_target_id_idx' created successfully.


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

Connection successful.
Database 'X_and_y_1000_nodes_scale_free' deleted successfully.


  0%|          | 0/4 [00:00<?, ?it/s]

Connection successful.
Database 'X_and_y_10000_nodes_5_edges' created successfully.
Connection successful.
Schema created successfully.
Edges table created successfully.
Data from 'syn_data/X_and_y_10000_nodes_5_edges.csv' uploaded to table 'nodes' successfully using COPY.
Index 'nodes_id_idx' created successfully.
Edges data from 'syn_data/edge_index_10000_nodes_5_edges.csv' uploaded successfully.
Index 'edges_target_id_idx' created successfully.


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

Connection successful.
Database 'X_and_y_10000_nodes_5_edges' deleted successfully.
Connection successful.
Database 'X_and_y_10000_nodes_10_edges' created successfully.
Connection successful.
Schema created successfully.
Edges table created successfully.
Data from 'syn_data/X_and_y_10000_nodes_10_edges.csv' uploaded to table 'nodes' successfully using COPY.
Index 'nodes_id_idx' created successfully.
Edges data from 'syn_data/edge_index_10000_nodes_10_edges.csv' uploaded successfully.
Index 'edges_target_id_idx' created successfully.


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

Connection successful.
Database 'X_and_y_10000_nodes_10_edges' deleted successfully.
Connection successful.
Database 'X_and_y_10000_nodes_20_edges' created successfully.
Connection successful.
Schema created successfully.
Edges table created successfully.
Data from 'syn_data/X_and_y_10000_nodes_20_edges.csv' uploaded to table 'nodes' successfully using COPY.
Index 'nodes_id_idx' created successfully.
Edges data from 'syn_data/edge_index_10000_nodes_20_edges.csv' uploaded successfully.
Index 'edges_target_id_idx' created successfully.


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

Connection successful.
Database 'X_and_y_10000_nodes_20_edges' deleted successfully.
Connection successful.
Database 'X_and_y_10000_nodes_scale_free' created successfully.
Connection successful.
Schema created successfully.
Edges table created successfully.
Data from 'syn_data/X_and_y_10000_nodes_scale_free.csv' uploaded to table 'nodes' successfully using COPY.
Index 'nodes_id_idx' created successfully.
Edges data from 'syn_data/edge_index_10000_nodes_scale_free.csv' uploaded successfully.
Index 'edges_target_id_idx' created successfully.


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

Connection successful.
Database 'X_and_y_10000_nodes_scale_free' deleted successfully.


  0%|          | 0/4 [00:00<?, ?it/s]

Connection successful.
Database 'X_and_y_100000_nodes_5_edges' created successfully.
Connection successful.
Schema created successfully.
Edges table created successfully.
Data from 'syn_data/X_and_y_100000_nodes_5_edges.csv' uploaded to table 'nodes' successfully using COPY.
Index 'nodes_id_idx' created successfully.
Edges data from 'syn_data/edge_index_100000_nodes_5_edges.csv' uploaded successfully.
Index 'edges_target_id_idx' created successfully.


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

Connection successful.
Database 'X_and_y_100000_nodes_5_edges' deleted successfully.
Connection successful.
Database 'X_and_y_100000_nodes_10_edges' created successfully.
Connection successful.
Schema created successfully.
Edges table created successfully.
Data from 'syn_data/X_and_y_100000_nodes_10_edges.csv' uploaded to table 'nodes' successfully using COPY.
Index 'nodes_id_idx' created successfully.
Edges data from 'syn_data/edge_index_100000_nodes_10_edges.csv' uploaded successfully.
Index 'edges_target_id_idx' created successfully.


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

Connection successful.
Database 'X_and_y_100000_nodes_10_edges' deleted successfully.
Connection successful.
Database 'X_and_y_100000_nodes_20_edges' created successfully.
Connection successful.
Schema created successfully.
Edges table created successfully.
Data from 'syn_data/X_and_y_100000_nodes_20_edges.csv' uploaded to table 'nodes' successfully using COPY.
Index 'nodes_id_idx' created successfully.
Edges data from 'syn_data/edge_index_100000_nodes_20_edges.csv' uploaded successfully.
Index 'edges_target_id_idx' created successfully.


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

Connection successful.
Database 'X_and_y_100000_nodes_20_edges' deleted successfully.
Connection successful.
Database 'X_and_y_100000_nodes_scale_free' created successfully.
Connection successful.
Schema created successfully.
Edges table created successfully.
Data from 'syn_data/X_and_y_100000_nodes_scale_free.csv' uploaded to table 'nodes' successfully using COPY.
Index 'nodes_id_idx' created successfully.
Edges data from 'syn_data/edge_index_100000_nodes_scale_free.csv' uploaded successfully.
Index 'edges_target_id_idx' created successfully.


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

Connection successful.
Database 'X_and_y_100000_nodes_scale_free' deleted successfully.


In [40]:
output_df.to_csv("first_postgres_list_res.csv")

In [41]:
output_df

Unnamed: 0,name,create,read,read_mem,update_nodes,update_edges,delete
0,1000_nodes_5_edges,0.15362,25.124993,3.252326,1.851639,0.790025,0.132719
1,1000_nodes_10_edges,0.294178,56.117448,2.029108,1.793453,0.862554,0.10287
2,1000_nodes_20_edges,0.33031,134.906644,2.317524,1.351911,0.799714,0.137017
3,1000_nodes_scale_free,0.155881,4.66358,0.479488,1.82582,0.906387,0.115398
4,10000_nodes_5_edges,1.290384,20.726816,6.666162,1.839293,0.829,0.121861
5,10000_nodes_10_edges,1.366571,63.288304,4.445678,1.285792,0.874628,0.107782
6,10000_nodes_20_edges,2.338412,528.357733,7.987612,1.107436,0.690643,0.115146
7,10000_nodes_scale_free,0.717312,31.601307,0.661709,1.809194,1.261678,0.093916
8,100000_nodes_5_edges,7.65208,13.582478,16.421966,1.247169,0.82224,0.229198
9,100000_nodes_10_edges,12.684927,63.763234,25.867091,1.244511,0.884679,0.214767


In [None]:
## TODO one million

In [3]:
conn = connect_to_postgres(dbname = 'X_and_y_1000_nodes_5_edges')
delete_time = delete(conn, new_db_name = "X_and_y_1000_nodes_5_edges")

Connection successful.
Connection successful.
Database 'X_and_y_1000_nodes_5_edges' deleted successfully.
