# RSA Computation
- Parse DSSP structural files
- Compute normalized solvent accessibility (RSA)
- Clean and export residue-level RSA dataset

> **Note:**  
> Use the DATASET TEMPLATES below to run the codes

In [36]:
# INPUT_FILE = "raw_data/PSEN1_AF3.txt"
# OUTPUT_FILE = "processed_data/PSEN1_AF3_RSA.csv"

In [37]:
# INPUT_FILE = "raw_data/PSEN2_AF3.txt"
# OUTPUT_FILE = "processed_data/PSEN2_AF3_RSA.csv"

In [38]:
# INPUT_FILE = "raw_data/APP_AF3.txt"
# OUTPUT_FILE = "processed_data/APP_AF3_RSA.csv"

In [39]:
import csv

In [40]:
# --- 1️⃣ Max ASA table (Tien et al. 2013) ---
max_asa = {
    'A': 121.0, 'R': 265.0, 'N': 187.0, 'D': 187.0, 'C': 148.0,
    'Q': 214.0, 'E': 214.0, 'G': 97.0,  'H': 216.0, 'I': 195.0,
    'L': 191.0, 'K': 230.0, 'M': 203.0, 'F': 228.0, 'P': 154.0,
    'S': 143.0, 'T': 163.0, 'W': 264.0, 'Y': 255.0, 'V': 165.0
}

### Change the cell below to read the dataset you want

In [None]:
# --- 2️⃣ User Inputs (EDIT THESE ONLY) ---
INPUT_FILE = "raw_data/APP_AF3.txt"
OUTPUT_FILE = "processed_data/APP_AF3_RSA.csv"


In [42]:
# --- 3️⃣ Function to parse DSSP file and compute RSA ---
def compute_rsa(dssp_file):
    rsa_data = []
    parsing = False

    with open(dssp_file, "r") as f:
        for line in f:
            # Start reading after DSSP header "#"
            if line.strip().startswith("#"):
                parsing = True
                continue

            if not parsing or len(line) < 35:
                continue

            try:
                res_num = int(line[5:10].strip())
                aa = line[13].strip()
                acc = int(line[34:38].strip())

                # skip invalid residue symbols
                if aa in ("!", "*", ""):
                    continue

                if aa not in max_asa:
                    continue

                rsa = acc / max_asa[aa]
                rsa_data.append((res_num, aa, acc, round(rsa, 3)))

            except ValueError:
                continue

    return rsa_data

In [43]:
# --- 4️⃣ Run and save ---
rsa_values = compute_rsa(INPUT_FILE)

print(f"Parsed {len(rsa_values)} residues from {INPUT_FILE}\n")
print(f"{'ResNum':>6} {'AA':>3} {'ACC':>5} {'RSA':>6}")
for r in rsa_values[:20]:  # preview first 20 rows
    print(f"{r[0]:6} {r[1]:>3} {r[2]:5} {r[3]:6.3f}")

# Save to CSV
with open(OUTPUT_FILE, "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["ResNum", "AA", "ACC", "RSA"])
    writer.writerows(rsa_values)

print(f"\n✅ Saved RSA data to {OUTPUT_FILE}")

Parsed 770 residues from raw_data/APP_AF3.txt

ResNum  AA   ACC    RSA
     1   M   229  1.128
     2   L   151  0.791
     3   P   101  0.656
     4   G    45  0.464
     5   L   115  0.602
     6   A    56  0.463
     7   L   126  0.660
     8   L   126  0.660
     9   L   118  0.618
    10   L   113  0.592
    11   A    53  0.438
    12   A    59  0.488
    13   W   218  0.826
    14   T    90  0.552
    15   A    77  0.636
    16   R   221  0.834
    17   A    83  0.686
    18   L   145  0.759
    19   E   173  0.808
    20   V   108  0.655

✅ Saved RSA data to processed_data/APP_AF3_RSA.csv
