In [36]:
import pandas as pd
import numpy as np
import random
import networkx as nx
import csv

np.random.seed(0)

In [None]:
# create fake data for testing with 10 columns name t0, t1, t2, ..., t5
# and 1000 rows with random values between 0 and 1
# and index with the following format: "gene" + "celltype"

n_rows = 1000
n_cols = 6
data = np.random.rand(n_rows, n_cols)
columns = ["t" + str(i) for i in range(n_cols)]
genes = ["gene" + str(i) for i in range(n_rows // 10)] * 10  # Repeat gene names
cell_types = ["Bcell", "Tcell", "NKcell", "Monocyte", "Dendritic"] * (n_rows // 5)
index = [f"{gene}_{cell_type}" for gene, cell_type in zip(genes, cell_types)]
df = pd.DataFrame(data, columns=columns)
df.index = index
df.to_csv("random_data.csv")

In [38]:
# Number of nodes
gene_count = 100
genes = [f"gene{i}" for i in range(1, gene_count + 1)]

# Create a connected random graph
G = nx.Graph()
G.add_nodes_from(genes)

# Ensure connectivity by creating a spanning tree
nodes = genes[:]
random.shuffle(nodes)
for i in range(len(nodes) - 1):
    G.add_edge(nodes[i], nodes[i + 1])

# Add more random edges to simulate interactions
extra_edges = random.randint(gene_count, 2 * gene_count)  # Random additional edges
for _ in range(extra_edges):
    u, v = random.sample(genes, 2)
    G.add_edge(u, v)

# Write edges to CSV
with open("random_ppi.csv", "w", newline="") as csvfile:
  writer = csv.writer(csvfile, delimiter="\t")
  writer.writerow(["gene1", "gene2"])
  for edge in G.edges():
    writer.writerow([edge[0], edge[1]])