## RData to CSV

In [2]:
import pyreadr

# Read the .RData file
result = pyreadr.read_r('../data/Data.RData')

# result is an OrderedDict where keys are object names
# and values are pandas DataFrames (or other appropriate types)

# See what objects are in the file
print("Objects in the file:")
print(result.keys())

# Access a specific object
for obj_name, obj_data in result.items():
    print(f"\nObject: {obj_name}")
    print(f"Type: {type(obj_data)}")
    if hasattr(obj_data, 'shape'):
        print(f"Shape: {obj_data.shape}")
    print(obj_data.head() if hasattr(obj_data, 'head') else obj_data)

Objects in the file:
odict_keys(['NeuronID', 'NPPpairs', 'Neuropeptides', 'ReceptorsNPP', 'NPPpairsbyneuron_sr', 'NPPpairsbyneuron_mr', 'NPPpairsbyneuron_lr'])

Object: NeuronID
Type: <class 'pandas.core.frame.DataFrame'>
Shape: (302, 4)
   nodeID nodeLabel nodetype  Cell.Class
0       1       I1L  Pharynx           1
1       2       I1R  Pharynx           1
2       3       I2L  Pharynx           2
3       4       I2R  Pharynx           2
4       5        I3  Pharynx           3

Object: NPPpairs
Type: <class 'pandas.core.frame.DataFrame'>
Shape: (92, 1)
              V1
0   NLP-40 AEX-2
1   NLP-12 CKR-1
2   NLP-12 CKR-2
3  FLP-10 DMSR-1
4  FLP-11 DMSR-1

Object: Neuropeptides
Type: <class 'pandas.core.frame.DataFrame'>
Shape: (49, 1)
  Neuropeptides
0        NLP-40
1        NLP-12
2        FLP-10
3        FLP-11
4        FLP-12

Object: ReceptorsNPP
Type: <class 'pandas.core.frame.DataFrame'>
Shape: (92, 2)
  GeneGPCR GeneLigand
0    AEX-2     NLP-40
1    CKR-1     NLP-12
2    CKR-2  

In [3]:
# Convert each object to a CSV file
for obj_name, obj_data in result.items():
    output_file = f"../data/{obj_name}.csv"
    obj_data.to_csv(output_file, index=True)
    print(f"Saved {obj_name} to {output_file}")

print("\nConversion complete!")

Saved NeuronID to NeuronID.csv
Saved NPPpairs to NPPpairs.csv
Saved Neuropeptides to Neuropeptides.csv
Saved ReceptorsNPP to ReceptorsNPP.csv
Saved NPPpairsbyneuron_sr to NPPpairsbyneuron_sr.csv
Saved NPPpairsbyneuron_mr to NPPpairsbyneuron_mr.csv
Saved NPPpairsbyneuron_lr to NPPpairsbyneuron_lr.csv

Conversion complete!


## XLSX to CSV

In [None]:
import pandas as pd

def convert_xlsx_to_csv(xlsx_file, csv_file):
    # Load the Excel file
    # By default, it reads the first sheet (index 0)
    df = pd.read_excel(xlsx_file)

    # Save to CSV
    # index=False prevents pandas from writing row numbers into the CSV
    df.to_csv(csv_file, index=False)
    
    print(f"Successfully converted {xlsx_file} to {csv_file}")

# Usage
# convert_xlsx_to_csv('../data/GPCR_per_neuron.xlsx', '../data/GPCR_per_neuron.csv')
# convert_xlsx_to_csv('../data/Neuropeptides_per_neuron.xlsx', '../data/Neuropeptides_per_neuron.csv')
# convert_xlsx_to_csv('../data/Degree_table_networks_comparison.xlsx', '../data/Degree_table_networks_comparison..csv')
# convert_xlsx_to_csv('../data/Neuronal_bundle_information.xlsx', '../data/Neuronal_bundle_information.csv')
# convert_xlsx_to_csv('../data/Neuron_information.xlsx', '../data/Neuron_information.csv')
# convert_xlsx_to_csv('../data/NPP-GPCR_pairs_information.xlsx', '../data/NPP-GPCR_pairs_information.csv')
# convert_xlsx_to_csv('../data/Figure_S2A.xlsx', '../data/Figure_S2A.csv')
# convert_xlsx_to_csv('../data/Figure_S2B.xlsx', '../data/Figure_S2B.csv')
# convert_xlsx_to_csv('../data/Figure_S11.xlsx', '../data/Figure_S11.csv')

Successfully converted ../data/Figure_S11.xlsx to ../data/Figure_S11.csv


## One-hot encoding for sensory modalities

In [None]:
import pandas as pd

# Reload the original data
df = pd.read_csv('../data/RipollSanchez2023/neuroanatomy.csv')

# Define the mapping
mapping = {
    'sensoryType:chemical': 'chemosensory',
    'sensoryType:odor': 'odorsensory',
    'sensoryType:pheromone': 'pheromonesensory',
    'sensoryType:osmolarity': 'osmoceptor',
    'sensoryType:oxygen': 'oxygen sensory',
    'sensoryType:carbon_dioxide': 'carbondioxide sensory',
    'sensoryType:photon': 'photosensory',
    'sensoryType:thermal': 'thermosensory',
    'sensoryType:electrical': 'electrosensory',
    'sensoryType:mechanical': 'mechanosensory',
    'sensoryType:stretch': 'strech sensory',
    'sensoryType:nociceptive': 'nocioceptor'
}

# Apply the mapping
for col, substring in mapping.items():
    df[col] = df['sensoryType'].astype(str).apply(lambda x: 1 if substring in x else 0)

# Save to CSV
output_filename = '../data/RipollSanchez2023/neuroanatomy.csv'
df.to_csv(output_filename, index=False)

print(f"File {output_filename} saved successfully with {len(df)} rows.")

File ../data/RipollSanchez2023/neuroanatomy.csv saved successfully with 302 rows.


## Merge CSV files into single CSV file

In [None]:
import pandas as pd

# Paths
p_info    = "../data/Neurons_merged_onehot.csv"
# p_network = "../data/Degree_table_networks_comparison.csv"

# --- Load csv ---
df_info = pd.read_csv(p_info)
# df_network = pd.read_csv(p_network)

In [None]:
df_info.head()

Unnamed: 0,neuronIndex,neuronID,neuronClass,cellType,cellType:pharynx,cellType:sensory,cellType:interneuron,cellType:motor,cellType:unknown,segment,...,segment:tail,npp:outdegree,npp:indegree,npp:degree,npp:outdegree-midrange,npp:indegree-midrange,npp:degree-midrange,npp:outdegree-shortrange,npp:indegree-shortrange,npp:degree-shortrange
0,1,I1L,1,Pharynx,1,0,0,0,0,Head,...,0,168,262,430,124,192,316,3,16,19
1,2,I1R,1,Pharynx,1,0,0,0,0,Head,...,0,168,262,430,124,192,316,3,16,19
2,3,I2L,2,Pharynx,1,0,0,0,0,Head,...,0,145,2,147,104,2,106,1,0,1
3,4,I2R,2,Pharynx,1,0,0,0,0,Head,...,0,145,2,147,104,2,106,1,0,1
4,5,I3,3,Pharynx,1,0,0,0,0,Head,...,0,173,249,422,123,181,304,6,15,21


In [None]:
import pandas as pd
import os

# Assuming your dataframe is named 'df'
# Group by the original numeric/index class and find the common prefix of the IDs
class_mapping = df_info.groupby('neuronClass')['neuronID'].apply(
    lambda x: os.path.commonprefix(list(x))
).to_dict()

# Map the new string labels back to the neuronClass column
df_info['neuronClass'] = df_info['neuronClass'].map(class_mapping)

# Display the first few rows to verify
print(df_info[['neuronID', 'neuronClass']].head())

  neuronID neuronClass
0      I1L          I1
1      I1R          I1
2      I2L          I2
3      I2R          I2
4       I3          I3


In [None]:
df_info.to_csv(p_info, index=False)

In [None]:
import pandas as pd

# Paths
p_ids  = "../data/NeuronID.csv"
p_info = "../data/Neuron_information.csv"

# --- Load NeuronID.csv (has an extra unnamed index column in your paste) ---
df_ids = pd.read_csv(p_ids)
# If the first column is just a row index, drop it
if df_ids.columns[0].startswith("Unnamed") or df_ids.columns[0] == "":
    df_ids = df_ids.drop(columns=df_ids.columns[0])

# Clean key
df_ids["nodeLabel"] = df_ids["nodeLabel"].astype(str).str.strip()

# --- Load Neuron_information.csv ---
# Your paste shows a 2nd "header-like" row (",,,,Mid-Range,Short-Range,...").
# We skip that row (row index 1 after the header).
df_info = pd.read_csv(
    p_info,
    header=0,
    skiprows=[1],
    engine="python",
    on_bad_lines="skip",  # helps if there are ragged/comment lines
)

# Clean key + align column name for merge
df_info["Neuron"] = df_info["Neuron"].astype(str).str.strip()
df_info = df_info.rename(columns={"Neuron": "nodeLabel"})

# Optional: clean the adjacency ID if you want it as int
if "ID (Adjacency Matrix)" in df_info.columns:
    df_info["ID (Adjacency Matrix)"] = (
        pd.to_numeric(df_info["ID (Adjacency Matrix)"], errors="coerce")
        .astype("Int64")
    )

# --- Sanity checks: nodeLabel should be unique in both tables ---
assert df_ids["nodeLabel"].is_unique,  "Duplicate nodeLabel(s) in NeuronID.csv"
assert df_info["nodeLabel"].is_unique, "Duplicate nodeLabel(s) in Neuron_information.csv"

# --- Merge (inner keeps only matches; use how='left' to keep all from df_ids) ---
merged = df_ids.merge(df_info, on="nodeLabel", how="inner", validate="one_to_one")

# Save
merged.to_csv("../data/NeuronID_merged.csv", index=False)

print("Merged rows:", len(merged))
print(merged.head())


Merged rows: 302
   nodeID nodeLabel nodetype  Cell.Class  ID (Adjacency Matrix)     Type  \
0       1       I1L  Pharynx           1                      1  Pharynx   
1       2       I1R  Pharynx           1                      2  Pharynx   
2       3       I2L  Pharynx           2                      3  Pharynx   
3       4       I2R  Pharynx           2                      4  Pharynx   
4       5        I3  Pharynx           3                      5  Pharynx   

  Segment                                                                                  (Fig. 2,3)  \
0                                               Head                                                    
1                                               Head                                                    
2                                               Head                                                    
3                                               Head                                                    
4

In [None]:
import pandas as pd

in_path  = "../data/NeuronID_merged.csv"
out_path = "../data/NeuronID_merged_onehot.csv"

df = pd.read_csv(in_path)

# 1) Drop columns
df = df.drop(columns=["nodetype", "ID (Adjacency Matrix)"], errors="ignore")

# Rename the long Segment column name -> Segment (if present)
seg_long = "Segment                                                                                  (Fig. 2,3)"
if seg_long in df.columns and "Segment" not in df.columns:
    df = df.rename(columns={seg_long: "Segment"})
elif seg_long in df.columns and "Segment" in df.columns:
    # If both exist for some reason, prefer the existing "Segment" and drop the long one
    df = df.drop(columns=[seg_long])


# 2) One-hot encode "Type" -> cell type: ...
# Normalize strings a bit (lowercase + strip)
if "Type" not in df.columns:
    raise KeyError("Column 'Type' not found in NeuronID_merged.csv")

type_norm = (
    df["Type"].astype(str)
      .str.strip()
      .str.lower()
)

type_map = {
    "pharynx": "cell type: pharynx",
    "sensory neuron": "cell type: sensory",
    "motor neuron": "cell type: motor",
    "interneuron": "cell type: interneuron",
    "unknown": "cell type: unknown",
}

type_onehot = pd.get_dummies(type_norm.map(type_map).fillna("cell type: unknown"))

# Ensure all expected columns exist (even if absent in data)
type_cols = [
    "cell type: pharynx",
    "cell type: sensory",
    "cell type: motor",
    "cell type: interneuron",
    "cell type: unknown",
]
for c in type_cols:
    if c not in type_onehot.columns:
        type_onehot[c] = 0
type_onehot = type_onehot[type_cols].astype(int)

# 3) One-hot encode "Segment" -> segment: head/midbody/tail
# Segment entries can be like "Head, Midbody" or "Head" or '"Head, Tail"' etc.
seg_onehot = pd.DataFrame(index=df.index)
seg_str = df.get("Segment", pd.Series([""] * len(df), index=df.index)).astype(str)

seg_onehot["segment: head"] = seg_str.str.contains(r"\bhead\b", case=False, na=False).astype(int)
seg_onehot["segment: midbody"] = seg_str.str.contains(r"\bmidbody\b", case=False, na=False).astype(int)
seg_onehot["segment: tail"] = seg_str.str.contains(r"\btail\b", case=False, na=False).astype(int)

# 4) Attach new columns (keep original Type/Segment unless you want to drop them)
df = pd.concat([df, type_onehot, seg_onehot], axis=1)

# Optional: drop the original categorical columns now that they're expanded
# df = df.drop(columns=["Type", "Segment"], errors="ignore")

df.to_csv(out_path, index=False)
print(f"Wrote: {out_path}")
print(df.head())


Wrote: ../data/NeuronID_merged_onehot.csv
   nodeID nodeLabel  Cell.Class     Type Segment  Degree (EC50 500nM)  \
0       1       I1L           1  Pharynx    Head                  329   
1       2       I1R           1  Pharynx    Head                  329   
2       3       I2L           2  Pharynx    Head                  143   
3       4       I2R           2  Pharynx    Head                  143   
4       5        I3           3  Pharynx    Head                  332   

   Unnamed: 5  In-degree (EC50 500nM)  Unnamed: 7  Out-degree (EC50 500nM)  \
0          19                     199          16                      130   
1          19                     199          16                      130   
2           6                      27           5                      116   
3           6                      27           5                      116   
4          27                     183          15                      149   

   ...  Unnamed: 14                               