In [5]:
import requests
import csv
import pandas as pd
import ast 

In [2]:

url = "https://rostlab.org/~erckert/localization_PP2_WS23/annotations.txt"

response = requests.get(url)
data = response.text

In [8]:
local_file_path = 'annotations.txt'
with open(local_file_path, 'w', encoding='utf-8') as file:
    file.write(data)

print(f"Data has been saved to {local_file_path}")

Data has been saved to annotations.txt


In [9]:

def parse_annotations(data):
    darkness_bins_table = []
    localization_table = []
    
    lines = data.split('\n')
    
    for line in lines:
        if line.startswith('#') or not line.strip():
            continue  

        parts = line.split(':')
        uniprot_id = parts[0].strip()
        rest = ':'.join(parts[1:]).strip()

        if "'localization':" in rest:
            entry_dict = ast.literal_eval(rest)

            if 'darkness_bins' in entry_dict:
                darkness_bins_str = entry_dict['darkness_bins']
                
                try:
                    lower_bin, upper_bin = map(float, darkness_bins_str.strip('()[]').split(','))
                    darkness_bins_table.append({
                        'UniprotID': uniprot_id,
                        'Lower_Darkness_Bin': lower_bin,
                        'Upper_Darkness_Bin': upper_bin
                    })
                except ValueError:
                    print(f"Skipping entry for {uniprot_id} due to invalid darkness_bins format.")

            if 'localization' in entry_dict and entry_dict['localization']:
                for loc, evidence_code in entry_dict['localization'].items():
                    localization_table.append({
                        'UniprotID': uniprot_id,
                        'Localization': loc,
                        'Localization_Evidence_Code': evidence_code
                    })

    return darkness_bins_table, localization_table

darkness_bins_table, localization_table = parse_annotations(data)

darkness_bins_df = pd.DataFrame(darkness_bins_table)
localization_df = pd.DataFrame(localization_table)

darkness_bins_csv_file_path = 'parsed_data/darkness_bins.csv'
localization_csv_file_path = 'parsed_data/localization.csv'

darkness_bins_df.to_csv(darkness_bins_csv_file_path, index=False)
localization_df.to_csv(localization_csv_file_path, index=False)

print(f"Darkness bins data has been saved to {darkness_bins_csv_file_path}")
print(f"Localization data has been saved to {localization_csv_file_path}")


Darkness bins data has been saved to darkness_bins.csv
Localization data has been saved to localization.csv
