In [1]:
from bs4 import BeautifulSoup
import csv
import re
import os 
import pandas as pd
import glob

In [2]:
data = []

# reading html files from folder

#'Riv_Shelter_HTML_Files'is the name for the shelter HTML Files Riverside
#'Riv_Food_Pantry_HTML_Files'is the name for the food pantry HTML Files for Riverside 
#'Riv_Mental_Health_HTML_Files'is the name for the mental health HTML Files for Riverside
#'Downtown_LA_Food_Pantry_HTML_Files' is the name for the food pantry HTML Files for downtown LA
#'Irvine_Mental_Health_HTML_Files' is the name for the mental health HTML Files for Irvine
folder_path = 'Downtown_LA_Shelter_HTML_Files'

# Loop through each file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.html'):
        file_path = os.path.join(folder_path, filename)

        with open(file_path, 'r', encoding='utf-8') as file:
            html_content = file.read()

        soup = BeautifulSoup(html_content, 'html.parser')


        # Find the <ul> element with class "best-programs"
        ul_element = soup.find('ul', {'class': 'best-programs'})

        # Find all <li> elements within the <ul>
        li_elements = ul_element.find_all('li', {'class': 'search-result card card-v3 program-info'})

        # Loop through the found <li> elements and extract labels and values
        for li_element in li_elements:
            # Use BeautifulSoup to parse the HTML of each li_element
            li_soup = BeautifulSoup(str(li_element), 'html.parser')

            # Find the program heading 
            program_heading_element = li_soup.find('div', {'class': 'card-heading'})
            Service_name = program_heading_element.find('a', {'class': 'activity-log ph-flyout-click cwdc-flyout-click click-cookie'}).text
            print('Service name: ', Service_name)
    
            # Extract program URL
            program_url_element = program_heading_element.find('a', {'class': 'activity-log ph-flyout-click cwdc-flyout-click click-cookie'})
            program_url = program_url_element['href'] if program_url_element else None
            Service_url = "https://www.auntbertha.com/"+program_url
            print('URL: ', Service_url)
            
            is_reviewed = False

            # Reviewed on
            Reviewd_on_element = program_heading_element.find('div', {'class': 'last-reviewed'})
            if Reviewd_on_element:  # Check if the element exists
                Reviewd_on_text = Reviewd_on_element.get_text(strip=True)  # Clean up the text
                match = re.search(r'\d{2}/\d{2}/\d{4}', Reviewd_on_text)
                Reviewd_on = match.group() if match else None
                if Reviewd_on:  # If we found a review date
                    is_reviewed = True  # Set the flag to True
                print("Reviewed on:", Reviewd_on)
            else:
                Reviewd_on = None  # Set to None if not found
                print("No review date found.")
    
            # Access class="program-tags"
            program_tags = li_soup.find('div', {'class': 'program-tags'})

            # Main Services
            main_s = []
            main_service_list = program_tags.find('ul', {'class': 'list-inline'})
            main_service_items = main_service_list.find_all('li')

            for item in main_service_items:
                main_service = item.find('a', {'class': 'loading-on-click activity-log'}).text.strip()
                main_s.append(main_service)

            print("Main Services:", main_s)

            # Try to access the "Other Services" section
            other_service_list = program_tags.find('div', {'class': 'secondary-tags'})
            if other_service_list:
                other_s = []
                other_service_items = other_service_list.find('ul', {'class': 'list-inline'}).find_all('li')

                for item in other_service_items:
                    other_service = item.find('a', {'class': 'loading-on-click activity-log'}).text.strip()
                    other_s.append(other_service)
            else:
                other_s = None

            print("Other Services:", other_s)

            # Access the "Serving" section
            serving_section = program_tags.find('div', {'class': 'attribute-tags'})

            # Initialize a list to store the serving information
            serving_ = []
    
            # Find all the <li> elements within the serving section
            serving_items = serving_section.find_all('li')

            # Iterate through the serving items and extract the text from the <a> elements
            for item in serving_items:
                link = item.find('a', {'class': 'loading-on-click activity-log'})
    
                # Check if the link was found
                if link:  # Only proceed if the link exists
                    serving_text = link.text.strip()
                    serving_.append(serving_text)

            print("Serving:", serving_)

    
            # accessing next-steps-module, extract phone number, location, hours 
    
            next_steps_module = li_soup.find('div', {'class': 'next-steps-module'})

            # Extract phone number
            phone_number_elements = next_steps_module.find_all('span', {'class': 'result-next-step-item'})

            # Also find all 'a' elements with href attributes containing 'tel:'
            tel_link_elements = next_steps_module.find_all('a', href=True)

            phone_number = None  # Initialize phone number as None

            # Extract phone numbers from text-based spans
            for element in phone_number_elements:
                phone_number_text = element.text.strip() if element else None
    
                # Regex to extract digits (handling separators like spaces or hyphens)
                phone_number_matches = re.findall(r'[\d-]+', phone_number_text)
    
                # Join the digits into a single phone number string if matches are found
                if phone_number_matches:
                    phone_number = ''.join(phone_number_matches)
                    break  # Stop once we've found a phone number

            # If no phone number found from text, check the 'tel:' href links
            if not phone_number:
                for element in tel_link_elements:
                    href_value = element['href']
        
                    # Check if the href contains 'tel:' and extract digits
                    if 'tel:' in href_value:
                        phone_number_matches = re.findall(r'[\d-]+', href_value)
                        if phone_number_matches:
                            phone_number = ''.join(phone_number_matches)
                            break  # Stop once we've found a phone number

            # Print the extracted phone number
            if phone_number:
                print("Phone Number:", phone_number)
            else:
                print("No phone number found.")

            # Extract location address
            location_address_element = next_steps_module.find('a', {'class': 'activity-log ph-flyout-click cwdc-flyout-click map-link with-address'})
            location_address = location_address_element.text.strip() if location_address_element else None
            location_address = re.sub(r'\s+', ' ', location_address) if location_address_element else None
            print("Location Address:", location_address)

            # Extract URL
            location_url_map = location_address_element['href'] if location_address_element else None
            print("Location url map:", location_url_map)
            
            hours_info = {
                '24_hour': False,
                'Monday': None,
                'Tuesday': None,
                'Wednesday': None,
                'Thursday': None,
                'Friday': None,
                'Saturday': None,
                'Sunday': None
            }
            
            # Extract hours based on structure
            hours_element = next_steps_module.find('div', {'class': 'office-hours-schedule see-hours-dropdown'})
            if hours_element:
                # Find all the <span> elements within the hours_element
                day_spans = hours_element.find_all('span')

                # Ensure that we have even pairs of day and corresponding hours
                if len(day_spans) % 2 == 0:
                    # Iterate through the spans in pairs (day and its corresponding hours)
                    for i in range(0, len(day_spans), 2):
                        day_span = day_spans[i]          # This is the day (e.g., "Monday:")
                        hours_span = day_spans[i + 1]    # This is the corresponding hours (e.g., "Closed" or time)

                        # Extract the day name and remove the colon
                        day = day_span.text.strip()[:-1]  # Remove the ':' at the end
                        full_day = day.strip()            # Ensure there's no extra space

                        # Check if the day is valid and exists in hours_info
                        if full_day in hours_info:
                            # Check if the corresponding hours span indicates "Closed"
                            if 'Closed' in hours_span.text:
                                hours_info[full_day] = 'Closed'
                            else:
                                # Extract hours for non-closed days
                                hours_info[full_day] = hours_span.text.strip()
                else:
                    print("Unexpected hours format. Ensure that each day has corresponding hours.")

            # Handle 24-hour information if it exists
            else:
                hours_element = next_steps_module.find('span', {'class': 'result-geo-hours'})
                if hours_element:
                    # Get 24-hour text strip
                    hours_text = hours_element.get_text(strip=True)

                    # Assuming the presence of this text means 24-hour operation
                    if '24' in hours_text or '24-hour' in hours_text.lower():
                        hours_info['24_hour'] = True
                    else:
                        print("Unable to determine if the office is 24 hours.")
                else:
                    print("Hours information not available.")

            # Output the result
            print(hours_info)
    
    
            # Loop through the found <li> elements and extract labels and values
            Extra_element = li_element.find('div', {'class': 'panel-wrapper more-info-panel'})
            elig = []
            eligibility_rules_element = Extra_element.find('div', {'class': 'eligibility-rules'})
    
            # Check if eligibility_rules_element is found
            if eligibility_rules_element:
                # Check if eligibility_rules_element contains a list (ul)
                ul_element = eligibility_rules_element.find('ul')
        
                if ul_element:
                    # If it contains a list, extract list items and store them in a flat list
                    eligibility_list = [li.text.strip() for li in ul_element.find_all('li')]
                    elig.extend(eligibility_list)  # Use extend to add elements to the list directly
                    #print("Eligibility:", eligibility_list)
                else:
                    # If it doesn't contain a list, store the text as is
                    eligibility_text = eligibility_rules_element.text.strip()
                    #print("Eligibility:", [eligibility_text])  # Wrap in a list to maintain consistency
                    elig.append(eligibility_text)

            else:
                # Skip if 'eligibility-rules' class is not found
                pass
    
        
            print("Eligibility:", elig)
    
            # Extract Availability
            availability_element = Extra_element.find('strong', {'data-translate': 'Availability'})
            
            if availability_element:
                availability = availability_element.find_next('div', {'class': 'col-md-10'}).text.strip()
            else:
                availability = "Not specified"

            # Extract Description
            description_element = Extra_element.find('strong', {'data-translate': 'Description'})
            description = description_element.find_next('div', {'class': 'col-md-10'}).text.strip()

            # Extract Languages
            languages_element = Extra_element.find('strong', {'data-translate': 'Languages'})
            languages = languages_element.find_next('div', {'class': 'col-md-10'}).text.strip()
            languages_data = [lang.strip() for lang in languages.split(',')]
            
            # Extract Cost
            cost_element = Extra_element.find('strong', string='Cost:')
            
            if cost_element:  # Check if cost_element exists
                cost = cost_element.find_next('div', {'class': 'col-md-10'}).text.strip()
            else:
                cost = "Not specified"  # Default value if cost_element is not found
            
            # Extract Website URLs if they exist, or set them to None
            website_element = Extra_element.find('div', {'data-translation': 'Website'})
            website_url = website_element.find_next('a', {'class': 'activity-log descriptionProgramWebsite'})['href'] if website_element else None

            # Extract Facebook and Twitter URLs if they exist, or set them to None
            facebook_element = Extra_element.find('strong', {'data-translate': 'Facebook'})
            facebook_url = facebook_element.find_next('a', {'class': 'activity-log descriptionProgramFacebook'})['href'] if facebook_element else None

            twitter_element = Extra_element.find('strong', {'data-translate': 'Twitter'})
            twitter_url = twitter_element.find_next('a', {'class': 'activity-log descriptionProgramTwitter'})['href'] if twitter_element else None

            # Extract Coverage Area
            coverage_element = Extra_element.find('strong', {'data-translate': 'Coverage Area'})
            coverage = coverage_element.find_next('div', {'class': 'col-md-10'}).text.strip()
            
            # Initialize latitude and longitude as None by default
            latitude = None
            longitude = None
            zipcode = None

            # Find the element with class "office-hour-address"
            location_element = Extra_element.find('div', {'class': 'office-hours-address _js_address address notranslate'})

            # Check if the element exists and has the required attributes
            if location_element:
                latitude = location_element['data-latitude'] if location_element.has_attr('data-latitude') else None
                longitude = location_element['data-longitude'] if location_element.has_attr('data-longitude') else None
                print("Latitude:", latitude)
                print("Longitude:", longitude)
                
                # Extract all text within location_element
                address_text = location_element.get_text(separator=" ").strip()  # Get all text as a single string

                # Use regular expression to search for the ZIP code pattern anywhere in the text
                zip_matches = re.findall(r'(?<!\d)(\b\d{5}\b)(?!\d)', address_text)
                zipcode = int(zip_matches[-1]) if zip_matches else None
                print("ZIP Code:", zipcode)

            # Print or use the extracted values as needed
            print("Availability:", availability)
            print("Description:", description)
            print("Languages:", languages_data)
            print("Cost:", cost)
            print("Facebook URL:", facebook_url)
            print("Twitter URL:", twitter_url)
            print("Coverage Area:", coverage)

            data.append([
            Service_name,
            Service_url,
            main_s,
            other_s,
            serving_,
            phone_number,
            website_url,
            location_address,
            location_url_map,
            elig,
            availability,
            description,
            languages_data,
            cost,
            is_reviewed,
            facebook_url,
            twitter_url,
            coverage,
            latitude,
            longitude,
            zipcode,
            hours_info['24_hour'],
            hours_info['Monday'],      
            hours_info['Tuesday'],     
            hours_info['Wednesday'],   
            hours_info['Thursday'],   
            hours_info['Friday'],     
            hours_info['Saturday'],  
            hours_info['Sunday']])    
            print("***************************************************************************")

Service name:  Financial Assistance for Military Families
URL:  https://www.auntbertha.com//american-red-cross--washington-dc--financial-assistance-for-military-families/5931501920976896?postal=90013
Reviewed on: 12/18/2024
Main Services: ['emergency payments', 'emergency food', 'temporary shelter', 'burial & funeral help', 'financial assistance']
Other Services: None
Serving: ['adults 18+', 'active duty', 'national guard', 'veterans', 'families', 'spouses', 'emergency']
Phone Number: 877-272-7337
Location Address: None
Location url map: None
{'24_hour': True, 'Monday': None, 'Tuesday': None, 'Wednesday': None, 'Thursday': None, 'Friday': None, 'Saturday': None, 'Sunday': None}
Eligibility: ['Eligible for financial assistance if you are: An active duty service member OR', 'A member of an activated National Guard or Reserve unit OR', 'An immediate family member of a service member in the above two categories OR', 'A military retiree or spouse/widow(er) of a retiree.']
Availability: avai

Service name:  Angel's Flight for Runaway and Homeless Youth
URL:  https://www.auntbertha.com//catholic-charities-of-los-angeles%252C-inc.--los-angeles-ca--angel%2527s-flight-for-runaway-and-homeless-youth/5802764892897280?postal=90013
Reviewed on: 12/17/2024
Main Services: ['temporary shelter', 'meals', 'clothing', 'substance abuse counseling', 'medical care', 'counseling', 'mental health care', 'community support services', 'recreation', 'case management', 'more education', 'tutoring', 'youth development', 'help find work']
Other Services: None
Serving: ['teens', 'homeless', 'runaways']
Phone Number: 213-251-3400
Location Address: 1531 James M Wood Boulevard, Los Angeles, CA 90015
Location url map: https://www.google.com/maps/?q=1531+James+M+Wood+Boulevard,+Los+Angeles,+CA+90015/
{'24_hour': False, 'Monday': '8:00 AM - 5:00 PM  PST', 'Tuesday': '8:00 AM - 5:00 PM  PST', 'Wednesday': '8:00 AM - 5:00 PM  PST', 'Thursday': '8:00 AM - 5:00 PM  PST', 'Friday': '8:00 AM - 5:00 PM  PST', 'S

Service name:  Recuperative Care
URL:  https://www.auntbertha.com//serenity-recuperative-care--los-angeles-ca--recuperative-care/5661138241060864?postal=90013
Reviewed on: 12/16/2024
Main Services: ['temporary shelter', 'medical care', 'one-on-one support']
Other Services: None
Serving: ['adults 18+', 'post-treatment', 'individuals', 'homeless']
Phone Number: 323-987-0048
Location Address: 1124 North Chicago Street, Los Angeles, CA 90033
Location url map: https://www.google.com/maps/?q=1124+North+Chicago+Street,+Los+Angeles,+CA+90033/
{'24_hour': False, 'Monday': '8:00 AM - 5:00 PM  PST', 'Tuesday': '8:00 AM - 5:00 PM  PST', 'Wednesday': '8:00 AM - 5:00 PM  PST', 'Thursday': '8:00 AM - 5:00 PM  PST', 'Friday': '8:00 AM - 5:00 PM  PST', 'Saturday': 'Closed', 'Sunday': 'Closed'}
Eligibility: ['This program serves homeless individuals.']
Latitude: 34.0566312
Longitude: -118.2054676
ZIP Code: 90033
Availability: available
Description: The Recuperative Care program offers those that are hom

Service name:  Recovery from Homelessness Program (RFHP)
URL:  https://www.auntbertha.com//whittier%2527s-first-day--whittier-ca--recovery-from-homelessness-program-%2528rfhp%2529/5745506855157760?postal=90013
Reviewed on: 12/17/2024
Main Services: ['temporary shelter', 'short-term housing']
Other Services: ['meals', 'navigating the system', 'case management', 'financial education', 'help find work']
Serving: ['adults 18+', 'homeless']
Phone Number: 562-693-4097
Location Address: None
Location url map: None
{'24_hour': False, 'Monday': '8:00 AM - 5:00 PM  PST', 'Tuesday': '8:00 AM - 5:00 PM  PST', 'Wednesday': '8:00 AM - 5:00 PM  PST', 'Thursday': '8:00 AM - 5:00 PM  PST', 'Friday': '8:00 AM - 5:00 PM  PST', 'Saturday': 'Closed', 'Sunday': 'Closed'}
Eligibility: ['This program helps people who are older than 17 years old.', 'Must be homeless.']
Availability: available
Description: The Recovery from Homelessness Program provides temporary and transitional housing for up to 45 homeless a

Service name:  The Midnight Mssion Courtyard Outreach
URL:  https://www.auntbertha.com//the-midnight-mission--los-angeles-ca--the-midnight-mssion-courtyard-outreach/5189375556059136?postal=90013
Reviewed on: 12/15/2024
Main Services: ['temporary shelter', 'personal hygiene', 'community support services']
Other Services: None
Serving: ['anyone in need', 'all ages', 'individuals', 'families', 'homeless', 'emergency']
No phone number found.
Location Address: 601 San Pedro Street, Los Angeles, CA 90014
Location url map: https://www.google.com/maps/?q=601+San+Pedro+Street,+Los+Angeles,+CA+90014/
{'24_hour': True, 'Monday': None, 'Tuesday': None, 'Wednesday': None, 'Thursday': None, 'Friday': None, 'Saturday': None, 'Sunday': None}
Eligibility: ['This program serves people experiencing homelessness.']
Latitude: 34.0421564
Longitude: -118.2459925
ZIP Code: 90014
Availability: available
Description: The Midnight Mission provides the Courtyard Outreach Program for individuals who need a safe pl

Service name:  Hacienda of Hope Program
URL:  https://www.auntbertha.com//project-return-peer-support-network-%2528prpsn%2529--huntington-park-ca--hacienda-of-hope-program/5100596400488448?postal=90013
No review date found.
Main Services: ['temporary shelter', 'recreation', 'safe housing', 'peer support', 'support groups', 'understand mental health']
Other Services: ['one-on-one support', 'peer support']
Serving: ['adults 18+', 'limited english', 'all mental health']
Phone Number: 323-346-0960
Location Address: None
Location url map: None
{'24_hour': False, 'Monday': '8:00 AM - 5:00 PM  PST', 'Tuesday': '8:00 AM - 5:00 PM  PST', 'Wednesday': '8:00 AM - 5:00 PM  PST', 'Thursday': '8:00 AM - 5:00 PM  PST', 'Friday': '8:00 AM - 5:00 PM  PST', 'Saturday': 'Closed', 'Sunday': 'Closed'}
Eligibility: ['This program helps people who are older than 17 years old.', 'This program serves individuals living with mental health challenges who are experiencing a stressful iife event or crisis.']
Avail

Service name:  Emergency Shelter
URL:  https://www.auntbertha.com//child-family-center--santa-clarita-ca--emergency-shelter/5745445426954240?postal=90013
Reviewed on: 12/17/2024
Main Services: ['temporary shelter']
Other Services: ['emergency food', 'clothing', 'transportation', 'parenting education', 'counseling', 'family counseling', 'individual counseling', 'navigating the system', 'one-on-one support', 'advocacy & legal aid']
Serving: ['children', 'individuals', 'families', 'abuse or neglect survivors', 'domestic violence survivors', 'emergency']
Phone Number: 661-259-8175
Location Address: 21545 Centre Pointe Parkway, Santa Clarita, CA 91350
Location url map: https://www.google.com/maps/?q=21545+Centre+Pointe+Parkway,+Santa+Clarita,+CA+91350/
{'24_hour': False, 'Monday': '9:00 AM - 6:00 PM  PST', 'Tuesday': '9:00 AM - 6:00 PM  PST', 'Wednesday': '9:00 AM - 6:00 PM  PST', 'Thursday': '9:00 AM - 6:00 PM  PST', 'Friday': '9:00 AM - 6:00 PM  PST', 'Saturday': 'Closed', 'Sunday': 'Clos

Service name:  Transitional Living Program
URL:  https://www.auntbertha.com//youth-emerging-stronger-%2528yes%2529--los-angeles-ca--transitional-living-program/6525503538200576?postal=90013
Reviewed on: 11/13/2024
Main Services: ['temporary shelter', 'short-term housing', 'case management']
Other Services: ['daily life skills', 'mental health care', 'navigating the system', 'one-on-one support', 'more education', 'tutoring', 'skills & training']
Serving: ['young adults', 'teens', 'homeless']
Phone Number: 323-454-2886
Location Address: 1853 Taft Avenue, Los Angeles, CA 90028
Location url map: https://www.google.com/maps/?q=1853+Taft+Avenue,+Los+Angeles,+CA+90028/
{'24_hour': False, 'Monday': '8:00 AM - 5:00 PM  PST', 'Tuesday': '8:00 AM - 5:00 PM  PST', 'Wednesday': '8:00 AM - 5:00 PM  PST', 'Thursday': '8:00 AM - 5:00 PM  PST', 'Friday': '8:00 AM - 5:00 PM  PST', 'Saturday': 'Closed', 'Sunday': 'Closed'}
Eligibility: ['This program helps people who are 18 to 24 years old.']
Latitude: 

Service name:  Emergency Youth Shelter
URL:  https://www.auntbertha.com//1736-family-crisis-center--los-angeles-ca--emergency-youth-shelter/4816089525518336?postal=90013
Reviewed on: 12/13/2024
Main Services: ['temporary shelter', 'immediate safety']
Other Services: ['meals', 'home goods', 'personal care items', 'individual counseling']
Serving: ['teens', 'children', 'homeless', 'runaways', 'emergency']
Phone Number: 310-379-3620
Location Address: None
Location url map: None
{'24_hour': True, 'Monday': None, 'Tuesday': None, 'Wednesday': None, 'Thursday': None, 'Friday': None, 'Saturday': None, 'Sunday': None}
Eligibility: ['This program helps people who are 10 to 17 years old.']
Availability: available
Description: Emergency Youth Shelter Program provides runaway and homeless boys and girls ages 10 through 17 for a shelter stay of up to two weeks. This program provides:- Emergency shelterOther assistance includes:- Food & Clothing- Individual and group counseling- Medical services- Le

In [3]:
# Save to CSV file 
#"FindHelp_extracted_data_riv_shelter.csv" is the name for the shelter csv for riv
#"FindHelp_extracted_data_riv_food_pantry.csv" is the name for the food pantry csv for riv
#"FindHelp_extracted_data_riv_mental_health.csv" is the name for the food pantry csv for riv
#"FindHelp_extracted_data_downtown_la_food_pantry.csv" is the name for the food pantry csv for downtown la
csv_filename = "FindHelp_extracted_data_downtown_la_shelter.csv"

with open(csv_filename, 'w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)
    
    # The header 
    header = [
        "Service_name",
        "Service_url",
        "Main_Services",
        "Other_Services",
        "Serving",
        "Phone_Number",
        "Website",
        "Location_Address",
        "Location_URL_Map",
        "Eligibility",
        "Availability",
        "Description",
        "Languages",
        "Cost",
        "Google_Review",
        "Facebook_URL",
        "Twitter_URL",
        "Coverage",
        "Latitude",
        "Longitude",
        "Zipcode",
        "24hour",
        "Monday",
        "Tuesday",
        "Wednesday",
        "Thursday",
        "Friday",
        "Saturday",
        "Sunday"
    ]
    csv_writer.writerow(header)
    
    csv_writer.writerows(data)

In [4]:
#"FindHelp_extracted_data_riv_shelter.csv" is the name for the shelter csv
#"FindHelp_extracted_data_riv_food_pantry.csv" is the name for the food pantry csv
#"FindHelp_extracted_data_riv_mental_health.csv" is the name for the food pantry csv
#"FindHelp_extracted_data_downtown_la_food_pantry.csv"
#"FindHelp_extracted_data_irvine_food_pantry.csv"
#"FindHelp_extracted_data_irvine_shelter.csv"
#"FindHelp_extracted_data_irvine_mental_health.csv"
df = pd.read_csv("FindHelp_extracted_data_downtown_la_shelter.csv")

#riv_zipcodes = [92501, 92502, 92503, 92504, 92505, 92506, 92507, 92508, 92513, 92514, 92516, 92517, 92521, 92522]

downtown_la_zipcodes = [90012, 90013, 90014, 90015, 90017, 90021]

#irvine_zipcodes = [92602, 92603, 92604, 92606, 92612, 92614, 92617, 92618, 92620]

filtered_df = df[df['Zipcode'].isin(downtown_la_zipcodes) | (df['Location_Address'].isna() & df['Longitude'].isna() & df['Latitude'].isna())]

filtered_df = filtered_df.drop_duplicates(subset = ['Website', 'Service_name'])

filtered_df.to_csv("FindHelp_extracted_data_downtown_la_shelter.csv", index=False, encoding='utf-8')

filtered_df

Unnamed: 0,Service_name,Service_url,Main_Services,Other_Services,Serving,Phone_Number,Website,Location_Address,Location_URL_Map,Eligibility,...,Longitude,Zipcode,24hour,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday
0,Financial Assistance for Military Families,https://www.auntbertha.com//american-red-cross...,"['emergency payments', 'emergency food', 'temp...",,"['adults 18+', 'active duty', 'national guard'...",877-272-7337,https://www.redcross.org/get-help/military-fam...,,,['Eligible for financial assistance if you are...,...,,,True,,,,,,,
4,Long-Term Shelter,https://www.auntbertha.com//pacific-lifeline--...,['temporary shelter'],"['food pantry', 'baby supplies', 'clothes for ...","['adults', 'young adults', 'female', 'with chi...",909-931-2624,http://www.pacific-lifeline.org,,,['This program helps women and their children ...,...,,,False,9:00 AM - 5:00 PM PST,9:00 AM - 5:00 PM PST,9:00 AM - 5:00 PM PST,9:00 AM - 5:00 PM PST,9:00 AM - 5:00 PM PST,Closed,Closed
5,Domestic Violence Emergency Shelter,https://www.auntbertha.com//angel-step-inn--pi...,"['temporary shelter', 'help escape violence', ...","['parenting education', 'safety education', 'i...","['all ages', 'individuals', 'with children', '...",323-780-4357,https://charityadvantage.com/inActive.html,,,['This program helps survivors of domestic and...,...,,,True,,,,,,,
6,Congregational Network Shelter Program,https://www.auntbertha.com//family-promise-of-...,['temporary shelter'],"['help pay for gas', 'help find housing', 'per...","['families', 'homeless', 'all ages']",626-569-0991,https://www.familypromisesgv.org/,,,[],...,,,False,8:00 AM - 4:00 PM PST,8:00 AM - 4:00 PM PST,8:00 AM - 4:00 PM PST,8:00 AM - 4:00 PM PST,8:00 AM - 4:00 PM PST,Closed,Closed
8,Transitional Housing - The CARE Cottages at Pu...,https://www.auntbertha.com//family-service-age...,"['temporary shelter', 'immediate safety', 'phy...",['case management'],"['all ages', 'individuals', 'families', 'domes...",818-845-7671,https://familyserviceagencyofburbank.org/progr...,,,['This program serves families that are fleein...,...,,,False,9:00 AM - 9:00 PM PST,9:00 AM - 9:00 PM PST,9:00 AM - 9:00 PM PST,9:00 AM - 9:00 PM PST,9:00 AM - 9:00 PM PST,8:00 AM - 6:00 PM PST,Closed
10,Angel's Flight for Runaway and Homeless Youth,https://www.auntbertha.com//catholic-charities...,"['temporary shelter', 'meals', 'clothing', 'su...",,"['teens', 'homeless', 'runaways']",213-251-3400,https://catholiccharitiesla.org/angels-flight-...,"1531 James M Wood Boulevard, Los Angeles, CA 9...",https://www.google.com/maps/?q=1531+James+M+Wo...,['This program helps people who are 10 to 17 y...,...,-118.272225,90015.0,False,8:00 AM - 5:00 PM PST,8:00 AM - 5:00 PM PST,8:00 AM - 5:00 PM PST,8:00 AM - 5:00 PM PST,8:00 AM - 5:00 PM PST,Closed,Closed
11,Transform Homelessness Program - McGill Street...,https://www.auntbertha.com//catholic-charities...,"['temporary shelter', 'short-term housing', 'c...","['nutrition education', 'help find housing', '...","['teens', 'children', 'adults 18+', 'female', ...",213-251-3400,https://catholiccharitiesla.org/mcgill-street-...,"1531 James M Wood Boulevard, Los Angeles, CA 9...",https://www.google.com/maps/?q=1531+James+M+Wo...,['This program serves women and children who a...,...,-118.272225,90015.0,False,8:00 AM - 5:00 PM PST,8:00 AM - 5:00 PM PST,8:00 AM - 5:00 PM PST,8:00 AM - 5:00 PM PST,8:00 AM - 5:00 PM PST,Closed,Closed
12,Emergency Shelter Services,https://www.auntbertha.com//los-angeles-house-...,"['temporary shelter', 'help escape violence', ...","['nutrition education', 'mental health care', ...","['children', 'families', 'homeless', 'domestic...",323-266-4139,http://www.losangeleshouseofruth.com,,,['Must be a family with children to access thi...,...,,,False,8:00 AM - 5:00 PM PST,8:00 AM - 5:00 PM PST,8:00 AM - 5:00 PM PST,8:00 AM - 5:00 PM PST,8:00 AM - 5:00 PM PST,Closed,Closed
13,Good Shepherd Center,https://www.auntbertha.com//catholic-charities...,"['temporary shelter', 'short-term housing', 'n...","['meals', 'more education', 'help find work']","['all ages', 'all disabilities', 'female', 'wi...",213-251-3400,https://catholiccharitiesla.org/good-shepherd-...,"1531 James M Wood Boulevard, Los Angeles, CA 9...",https://www.google.com/maps/?q=1531+James+M+Wo...,['This program serves mothers with children ex...,...,-118.272259,90015.0,False,8:00 AM - 5:00 PM PST,8:00 AM - 5:00 PM PST,8:00 AM - 5:00 PM PST,8:00 AM - 5:00 PM PST,8:00 AM - 5:00 PM PST,Closed,Closed
21,JFS Hope - Assistance for Domestic Violence Su...,https://www.auntbertha.com//jewish-family-serv...,"['temporary shelter', 'immediate safety', 'sho...",,"['all ages', 'individuals', 'families', 'domes...",818-505-0900,https://www.jfsla.org/our-services/domestic-vi...,,,['This program serves domestic violence surviv...,...,,,False,8:00 AM - 5:00 PM PST,8:00 AM - 5:00 PM PST,8:00 AM - 5:00 PM PST,8:00 AM - 5:00 PM PST,8:00 AM - 5:00 PM PST,Closed,Closed
