In [1]:
import csv
import pandas as pd

def csv_to_triples(csv_file, relationship_mapping, output_csv, combined_column_mappings):
    triples = []
    
    with open(csv_file, newline='', encoding='utf-8') as file:
        reader = csv.reader(file)
        headers = next(reader) 
        
        for row in reader:
            subject = row[0].strip()  #First column as subject
            
            for i in range(1, len(row)):
                column_name = headers[i].strip()
                if column_name in relationship_mapping:  #Skip columns not in mapping
                    relationship = relationship_mapping[column_name]
                    obj = row[i].strip()
                    if obj:
                        triples.append((subject, relationship, obj))
            
            for combined_columns, combined_relationship in combined_column_mappings:

                combined_values = [str(row[headers.index(col)].strip()) for col in combined_columns if row[headers.index(col)].strip()]
                
                if combined_values:
                    combined_obj = ', '.join(combined_values)
                    triples.append((subject, combined_relationship, combined_obj))
    
    with open(output_csv, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(["Subject", "Relationship", "Object"]) 
        writer.writerows(triples)
    
    return triples

#Custom relationships
relationship_mapping = {
    "Main_Services": "main_services_are",
    "Other_Services": "other_services_are",
    "Phone_Number": "phone_number_is",
    "Website": "website_is",
    "Location_Address": "is_located_at",
    "Location_URL_Map": "url_is_located_at",
    "Availability": "has_an_availability_status_of",
    "Description": "description_is",
    "Languages": "speaks",
    "Cost": "costs",
    "Google_Review": "has_Google_Reviews",
    "Coverage": "covers",
    "Zipcode": "zipcode_is",
    "24hour": "24hours_status_is"
}

#Multiple column combinations and their respective relationships
combined_column_mappings = [
    (["Serving", "Eligibility"], "serves"),
    (["Facebook_URL", "Twitter_URL"], "socials_are"),
    (["Latitude", "Longitude"], "coordinates_are"),
    (["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"], "is_open_from")
]

csv_file_path = 'Leo/Riverside/FindHelp_extracted_data_riv_shelter.csv' #Change to your path
output_csv = "Riv_shelter_triples.csv"  #Change to yours
triples = csv_to_triples(csv_file_path, relationship_mapping, output_csv, combined_column_mappings)

df = pd.read_csv("Riv_shelter_triples.csv") #Make sure matches the output_csv

df.to_csv("Riv_shelter_triples.csv", index=False, encoding='utf-8')

df

Unnamed: 0,Subject,Relationship,Object
0,(For Disasters) Disaster Relief And Recovery,main_services_are,"['disaster response', 'emergency food', 'tempo..."
1,(For Disasters) Disaster Relief And Recovery,other_services_are,"['psychiatric emergency services', 'mental hea..."
2,(For Disasters) Disaster Relief And Recovery,phone_number_is,888-831-0031
3,(For Disasters) Disaster Relief And Recovery,website_is,https://www.redcross.org/local/california/sout...
4,(For Disasters) Disaster Relief And Recovery,is_located_at,"6235 South Riverside Avenue, Colton, CA 92507"
...,...,...,...
200,Riverside University Health System HHOPE,zipcode_is,92507.0
201,Riverside University Health System HHOPE,24hours_status_is,False
202,Riverside University Health System HHOPE,serves,"['adults 18+', 'homeless', 'near homeless', 'a..."
203,Riverside University Health System HHOPE,coordinates_are,"33.9874923, -117.3537087"
