In [3]:
import csv
import pandas as pd

def csv_to_triples(csv_file, relationship_mapping, output_csv, combined_column_mappings):
    triples = []
    
    with open(csv_file, newline='', encoding='utf-8') as file:
        reader = csv.reader(file)
        headers = next(reader) 
        
        for row in reader:
            subject = row[0].strip()  #First column as subject
            
            for i in range(1, len(row)):
                column_name = headers[i].strip()
                if column_name in relationship_mapping:  #Skip columns not in mapping
                    relationship = relationship_mapping[column_name]
                    obj = row[i].strip()
                    if obj:
                        triples.append((subject, relationship, obj))
            
            for combined_columns, combined_relationship in combined_column_mappings:

                combined_values = [str(row[headers.index(col)].strip()) for col in combined_columns if row[headers.index(col)].strip()]
                
                if combined_values:
                    combined_obj = ', '.join(combined_values)
                
                    triples.append((subject, combined_relationship, combined_obj))
    
    with open(output_csv, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(["Subject", "Relationship", "Object"]) 
        writer.writerows(triples)
    
    return triples

#Custom relationships
relationship_mapping = {
    "Main_Services": "main_services_are",
    "Other_Services": "other_services_are",
    "Phone_Number": "phone_number_is",
    "Website": "website_is",
    "Location_Address": "is_located_at",
    "Location_URL_Map": "url_is_located_at",
    "Availability": "has_an_availability_status_of",
    "Description": "description_is",
    "Languages": "speaks",
    "Cost": "costs",
    "Google_Review": "has_Google_Reviews",
    "Coverage": "covers",
    "Zipcode": "zipcode_is",
    "24hour": "24hours_status_is",
    "Monday": "Monday's_time_open",
    "Tuesday": "Tuesday's_time_open",
    "Wednesday": "Wednesday's_time_open",
    "Thursday": "Thursday's_time_open",
    "Friday": "Friday's_time_open",
    "Saturday": "Saturday's_time_open",
    "Sunday": "Sunday's_time_open",
    "Google_Rating": "has_a_Google_Rating_of",
    "Service_Type": "offers"
}

#Multiple column combinations and their respective relationships
combined_column_mappings = [
    (["Serving", "Eligibility"], "serves"),
    (["Facebook_URL", "Twitter_URL"], "socials_are"),
    (["Latitude", "Longitude"], "coordinates_are")
]

csv_file_path = 'Leo/Riverside/FindHelp_extracted_data_riv_mental_health.csv' #Change to your path
output_csv = "Riv_mental_health_triples.csv"  #Change to yours
triples = csv_to_triples(csv_file_path, relationship_mapping, output_csv, combined_column_mappings)

df = pd.read_csv("Riv_mental_health_triples.csv") #Make sure matches the output_csv

df.to_csv("Riv_mental_health_triples.csv", index=False, encoding='utf-8')

df

Unnamed: 0,Subject,Relationship,Object
0,National Perinatal Hotline (PPD MOMS),main_services_are,"['mental health care', 'help hotlines']"
1,National Perinatal Hotline (PPD MOMS),phone_number_is,833-852-6262
2,National Perinatal Hotline (PPD MOMS),has_an_availability_status_of,available
3,National Perinatal Hotline (PPD MOMS),description_is,This program provides a crisis postpartum depr...
4,National Perinatal Hotline (PPD MOMS),speaks,['English']
...,...,...,...
3012,"Substance Use Community Access, Referral, Eval...",24hours_status_is,True
3013,"Substance Use Community Access, Referral, Eval...",has_a_Google_Rating_of,0
3014,"Substance Use Community Access, Referral, Eval...",offers,Hotline
3015,"Substance Use Community Access, Referral, Eval...",serves,"['adults', 'young adults', 'teens', 'seniors',..."
