# Part 2: Search Uniprot for GO terms

 Say you have a list of genes and you need to perform Gene Ontology using Uniprot and save in .csv file. You can use the following code to extract GO terms from Uniprot. This code helps to search GO terms for many genes at a time.

In [None]:
#Search uniprot for each accession number and retrieve GO terms for individual ID
#This is an example for one ID
def get_go_terms(uniprot_id):
    url = f"https://www.uniprot.org/uniprot/{uniprot_id}.txt"
    response = requests.get(url)
    if response.ok:
        content = response.text.strip().split("\n")
        go_terms = []
        for line in content:
            if line.startswith("DR   GO;"):
                go_info = line.split(";")[1:]
                aspect = go_info[0].strip()
                term = go_info[1].strip()
                go_terms.append({"Aspect": aspect, "Term": term})
        return go_terms
    return None

uniprot_id = 'P60010', 'P00330', 'P00924', 'P00924', 'P00358', 'P02829', 'P00549', 'P00692', 'P00950', 'P00359' 
go_terms = get_go_terms(uniprot_id)
if go_terms:
    print(f"UniProt ID: {uniprot_id}")
    for term in go_terms:
        print(f"Aspect: {term['Aspect']}, Term: {term['Term']}")
else:
    print(f"No GO terms found for UniProt ID {uniprot_id}")

In [None]:
#Search uniprot for each accession number and retrieve GO terms for multiple IDs at once, using the above format
def get_go_terms(uniprot_ids):
    go_terms = []
    for uniprot_id in uniprot_ids:
        uniprot_id = uniprot_id.strip("' ")  # Remove single quotes and spaces from each UniProt ID
        url = f"https://www.uniprot.org/uniprot/{uniprot_id}.txt"
        response = requests.get(url)
        if response.ok:
            content = response.text.strip().split("\n")
            has_go_terms = False  # Flag to track if GO terms are found for the current UniProt ID
            for line in content:
                if line.startswith("DR   GO;"):0
                    has_go_terms = True
                    go_info = line.split(";")[1:]
                    aspect = go_info[0].strip()
                    term = go_info[1].strip()
                    go_terms.append({"UniProt ID": uniprot_id, "Aspect": aspect, "Term": term})
            if not has_go_terms:
                go_terms.append({"UniProt ID": uniprot_id, "Aspect": "N/A", "Term": "No GO terms found"})
        else:
            go_terms.append({"UniProt ID": uniprot_id, "Aspect": "N/A", "Term": "Request failed"})

    return go_terms

uniprot_ids = ['A0A0W0CZH5']
go_terms = get_go_terms(uniprot_ids)

if go_terms:
    df = pd.DataFrame(go_terms)
    print(df)
else:
    print("No UniProt IDs provided.")


In [None]:
#save the output in new .csv file
output_file_path = '/terms_uniprot.csv'  
df.to_csv(output_file_path, index=False)

print("CSV file saved successfully.")

In [None]:
#combine CC, BP, MF, into one cell
#example
input_string = '''
'''

# Split the input string by newline character and strip any leading/trailing whitespaces
split_values = [value.strip() for value in input_string.split('\n')]

# Join the split values with ', ' in between and enclose them in single quotes
result_string = ', '.join([f"'{value}'" for value in split_values])

print(result_string)

In [None]:
#use the above example, but applied to .csv files
def convert_format(input_file, output_file):
    converted_data = {}
    
    with open(input_file, 'r') as file:
        reader = csv.reader(file)
        for row in reader:
            uniprot_id = row[0]
            go_id = row[1]
            category = row[2]
            term = row[3]
            
            if uniprot_id not in converted_data:
                converted_data[uniprot_id] = {'C': [], 'F': [], 'P': []}
            
            if category == 'C':
                converted_data[uniprot_id]['C'].append(term)
            elif category == 'F':
                converted_data[uniprot_id]['F'].append(term)
            elif category == 'P':
                converted_data[uniprot_id]['P'].append(term)
    
    with open(output_file, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['', 'C', 'F', 'P'])
        
        for uniprot_id, terms in converted_data.items():
            row = [uniprot_id] + ["'" + ', '.join(terms[key]) + "'" for key in ['C', 'F', 'P']]
            writer.writerow(row)

# Example usage
input_file = '/GO terms_uniprot.csv' 
output_file = "/GO_Terms_Output.csv"
convert_format(input_file, output_file)

print("CSV file saved successfully.")