# Scraping Bank1

## 1. Module Imports

In [None]:
!pip install selenium

In [1]:
import requests
from bs4 import BeautifulSoup
import json
import os
import time
import random
import logging
import csv  
from config import load_config
from log_utils import setup_anonymized_logging
from scraper_utils import add_random_delay
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from datetime import datetime, timedelta

In [3]:
# This was taking too long (5 hours for 100 profiles)

# class HumanlikeBehavior:
#     """Helper class for human-like browsing behavior"""
#     @staticmethod
#     def add_human_delay(action_type='page_view'):
#         """Simulate human-like delays based on action type"""
#         delays = {
#             'page_view': (20, 45),  # Time to read a profile
#             'scroll': (2, 5),       # Scrolling delay
#             'click': (1, 3),        # Clicking delay
#             'session_break': (1800, 3600)  # 30-60 minute break between sessions
#         }
#         delay = random.uniform(*delays[action_type])
#         time.sleep(delay)
    
#     @staticmethod
#     def simulate_scrolling(driver):
#         """Simulate human-like scrolling behavior"""
#         total_height = driver.execute_script("return document.body.scrollHeight")
#         viewport_height = driver.execute_script("return window.innerHeight")
#         current_position = 0
        
#         while current_position < total_height:
#             scroll_amount = random.randint(100, 300)
#             current_position = min(current_position + scroll_amount, total_height)
            
#             driver.execute_script(f"window.scrollTo({{top: {current_position}, behavior: 'smooth'}})")
#             HumanlikeBehavior.add_human_delay('scroll')
            
#             if random.random() < 0.1:
#                 scroll_up = random.randint(50, 150)
#                 current_position = max(0, current_position - scroll_up)
#                 driver.execute_script(f"window.scrollTo({{top: {current_position}, behavior: 'smooth'}})")
#                 HumanlikeBehavior.add_human_delay('scroll')



In [2]:
"""
Each profile should take about 10-15 seconds instead of 45-60 seconds
Breaks will be 5-10 minutes instead of 30-60 minutes
It will take breaks every 50 profiles instead of 30
"""

class HumanlikeBehavior:
    """Helper class for human-like browsing behavior"""
    @staticmethod
    def add_human_delay(action_type='page_view'):
        """Simulate human-like delays based on action type"""
        delays = {
            'page_view': (5, 10),    # Reduced from 20-45 to 5-10 seconds
            'scroll': (1, 2),        # Reduced from 2-5 to 1-2 seconds
            'click': (0.5, 1),       # Reduced from 1-3 to 0.5-1 seconds
            'session_break': (300, 600)  # Reduced from 30-60 mins to 5-10 mins
        }
        delay = random.uniform(*delays[action_type])
        time.sleep(delay)
    
    @staticmethod
    def simulate_scrolling(driver):
        """Simulate human-like scrolling behavior"""
        total_height = driver.execute_script("return document.body.scrollHeight")
        viewport_height = driver.execute_script("return window.innerHeight")
        current_position = 0
        
        while current_position < total_height:
            scroll_amount = random.randint(100, 300)
            current_position = min(current_position + scroll_amount, total_height)
            
            driver.execute_script(f"window.scrollTo({{top: {current_position}, behavior: 'smooth'}})")
            HumanlikeBehavior.add_human_delay('scroll')
            
            if random.random() < 0.1:  # Reduced random scrolling up
                scroll_up = random.randint(50, 150)
                current_position = max(0, current_position - scroll_up)
                driver.execute_script(f"window.scrollTo({{top: {current_position}, behavior: 'smooth'}})")
                HumanlikeBehavior.add_human_delay('scroll')

class ScrapingSession:
    def __init__(self):
        self.start_time = datetime.now()
        self.profiles_scraped = 0
        
    def should_take_break(self):
        """Check if we should take a break based on time and profiles scraped"""
        session_duration = datetime.now() - self.start_time
        
        if (session_duration > timedelta(minutes=45) or  # Reduced from 2 hours to 45 minutes
            self.profiles_scraped >= 50):                # Increased from 30 to 50 profiles
            return True
        return False
    
    def take_break(self):
        """Take a break and reset session counters"""
        logger.info("Taking a session break...")
        HumanlikeBehavior.add_human_delay('session_break')
        self.start_time = datetime.now()
        self.profiles_scraped = 0

## 2. Configure logging

In [3]:
# Configure logging
logger = setup_anonymized_logging()

## 3. Loading Configurations and Initialize Session

In [4]:
# Load configuration 
config = load_config()
bank_id = 'bank1'
bank_config = config['banks'][bank_id]

# Initialize session
session = requests.Session()
session.headers.update({
    'User-Agent': random.choice(config['user_agents'])
})

## 4. Utility functions

In [5]:
# Utility functions
def get_total_pages(known_total_donors=478, donors_per_page=15):
    """Calculate total pages needed"""
    return (known_total_donors + donors_per_page - 1) // donors_per_page

def get_scraped_donor_ids():
    """Get list of donor IDs that have already been scraped"""
    try:
        # Try loading from the in-progress file first
        with open('/Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_in_progress.json', 'r') as f:
            profiles = json.load(f)
            return {profile['donor_id'] for profile in profiles}
    except FileNotFoundError:
        try:
            # If no in-progress file, try the completed profiles file
            with open('/Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles.json', 'r') as f:
                profiles = json.load(f)
                return {profile['donor_id'] for profile in profiles}
        except FileNotFoundError:
            return set()

## 5. Donor ID Collection

In [None]:
## 5. Donor ID Collection

def get_all_donor_ids(session, driver):
    """Get all unique donor IDs from search pages"""
    donor_ids = set()  # Using set to avoid duplicates
    total_pages = 32
    
    try:
        print(f"Starting to collect donor IDs...")
        
        for page in range(1, total_pages + 1):
            url = f"{bank_config['base_url']}/search/?donor_sort=default_Sort&page={page}"
            print(f"Scanning page {page} of {total_pages}")
            
            driver.get(url)
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, "donor-id"))
            )
            
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            donor_id_spans = soup.find_all('span', class_='donor-id')
            
            for span in donor_id_spans:
                donor_id = span.text.strip()
                donor_ids.add(donor_id)
            
            print(f"Found {len(donor_id_spans)} donors on page {page}. Total unique IDs so far: {len(donor_ids)}")
            add_random_delay(5, 10)  # Delay between pages
            
    except Exception as e:
        print(f"Error collecting donor IDs: {str(e)}")
        
    finally:
        print(f"Finished collecting donor IDs. Total unique donors found: {len(donor_ids)}")
        # Print first few IDs as a sample
        sample_ids = list(donor_ids)[:5]
        print(f"Sample of first 5 donor IDs: {sample_ids}")
        return list(donor_ids)

# Initialize Selenium and run collection
print("Initializing Selenium driver...")
options = webdriver.ChromeOptions()
options.add_argument(f'user-agent={random.choice(config["user_agents"])}')
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)

try:
    collected_donor_ids = get_all_donor_ids(session, driver)
finally:
    driver.quit()

# Print final results
if collected_donor_ids:
    print(f"\nSuccessfully collected {len(collected_donor_ids)} unique donor IDs")
    # Save IDs to file in bank1 directory
    output_directory = '/Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1'
    os.makedirs(output_directory, exist_ok=True)
    id_file_path = os.path.join(output_directory, 'donor_ids_bank1.txt') # changed file to donor_ids_bank1.txt for clarity
    with open(id_file_path, 'w') as f:
        for donor_id in collected_donor_ids:
            f.write(f"{donor_id}\n")
    print(f"Saved donor IDs to {id_file_path}")

## 6. Profile Scraping Function

In [6]:

def scrape_profile_bank1(relative_url, driver=None):
    """
    Scrape a single profile page for Bank1 using either Selenium or requests.
    """
    try:
        full_url = f"{bank_config['base_url']}{relative_url}"
        
        if driver:
            # Selenium approach
            driver.get(full_url)
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, "profile-details"))
            )
            
            # Simulate human reading behavior
            HumanlikeBehavior.simulate_scrolling(driver)
            HumanlikeBehavior.add_human_delay('page_view')
            
            response_text = driver.page_source
            soup = BeautifulSoup(response_text, 'html.parser')
        else:
            # Fallback to requests approach
            response = session.get(full_url)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
        
        # Initialize profile data dictionary
        profile_data = {
            'url': full_url,
            'donor_id': None,
            'headline': None,
            'donor_description': None,
            'donor_lookalikes': None,
            'height': None,
            'weight': None,
            'eye_color': None,
            'hair_color': None,
            'hair_texture': None,
            'complexion': None,
            'ethnic_origin': None,
            'ancestry': None,
            'religion': None,
            'jewish_ancestry': None,
            'education_level': None,
            'areas_of_study': None
        }
            
        # Extract donor ID
        donor_id_elem = soup.find('span', class_='donor-id')
        if donor_id_elem:
            id_span = donor_id_elem.find('span', class_='id')
        if id_span:
            profile_data['donor_id'] = id_span.text.strip()
        
        # Extract headline
        description_div = soup.find('div', id='description')
        if description_div:
            headline_elem = description_div.find('h2')
        if headline_elem:
            profile_data['headline'] = headline_elem.text.strip()
        
        # Extract donor description
        description_elem = soup.find('div', id='description')
        if description_elem and description_elem.find('p'):
            profile_data['donor_description'] = description_elem.find('p').text.strip()
        
        # Extract look-alikes
        lookalikes_div = soup.find('div', class_='look-a-likes')
        if lookalikes_div:
            profile_data['donor_lookalikes'] = lookalikes_div.text.strip()
            # If the text is empty, check for the "Not available" message
            if not profile_data['donor_lookalikes']:
                not_available = lookalikes_div.find('div', class_='not-available')
                if not_available:
                    profile_data['donor_lookalikes'] = not_available.text.strip()
        
        # Physical characteristics 
        physical_chars = soup.find('section', id='appearance')
        if physical_chars:
            characteristics_div = physical_chars.find('div', id='collapse-panel-1')
            if characteristics_div:
                label_to_key = {
                    'Height:': 'height',
                    'Weight:': 'weight',
                    'Eye Color:': 'eye_color',
                    'Hair Color:': 'hair_color',
                    'Hair Texture:': 'hair_texture',
                    'Complexion:': 'complexion'
                }
                
                for li in characteristics_div.find_all('li'):
                    label_span = li.find('span', class_='tab')
                    if label_span:
                        label = label_span.text.strip()
                        if label in label_to_key:
                            value = li.get_text(strip=True)
                            value = value.replace(label, '').strip()
                            profile_data[label_to_key[label]] = value

        # Heritage information
        heritage_section = soup.find('section', id='heritage')
        if heritage_section:
            heritage_div = heritage_section.find('div', id='collapse-panel-2')
            if heritage_div:
                label_to_key = {
                    'Ethnic Origin:': 'ethnic_origin',
                    'Ancestry (Self-Reported):': 'ancestry',
                    'Religion:': 'religion',
                    'Jewish Ancestry (Self-Reported):': 'jewish_ancestry'
                }
                
                for li in heritage_div.find_all('li'):
                    label_span = li.find('span', class_='tab')
                    if label_span:
                        label = label_span.text.strip()
                        print(f"Found label: '{label}'")  # Debug print
                        if label in label_to_key:
                            # Special IDs for some fields
                            if 'Ethnic Origin:' in label:
                                value_span = li.find('span', id='donor-profile-ethnicity')
                            elif 'Religion:' in label:
                                value_span = li.find('span', id='donor-profile-religiion')
                            elif 'Jewish Ancestry' in label:
                                value_span = li.find('span', id='donor-profile-jewish')
                            else:
                                value_span = li.find('span')
                            
                            if value_span:
                                profile_data[label_to_key[label]] = value_span.text.strip()

        # Education information
        education_section = soup.find('section', id='education')
        if education_section:
            education_div = education_section.find('div', id='collapse-panel-3')
            if education_div:
                label_to_key = {
                    'Education Level:': 'education_level',
                    'Areas of Study:': 'areas_of_study'
                }
                
                for li in education_div.find_all('li'):
                    label_span = li.find('span', class_='tab')
                    if label_span:
                        label = label_span.text.strip()
                        if label in label_to_key:
                            # Get the value directly from the <span> next to the label
                            value_span = li.find_all('span')[1]  # This gets the second <span>, which is the value
                            
                            if value_span:
                                profile_data[label_to_key[label]] = value_span.text.strip()

                    
        return profile_data
            
    except Exception as e:
        logger.error(f"Error scraping profile {full_url}: {str(e)}")
        return None
            

In [7]:
def scrape_donor_profiles(session, page_number, seen_donors, driver=None):
    """Scrape donors from a single page with human-like behavior"""
    should_quit_driver = False
    try:
        # Create driver only if not provided
        if not driver:
            options = webdriver.ChromeOptions()
            options.add_argument(f'user-agent={random.choice(config["user_agents"])}')
            options.add_argument('--headless')  # Optional: run in headless mode
            driver = webdriver.Chrome(options=options)
            should_quit_driver = True
        
        url = f"{bank_config['base_url']}/search/?donor_sort=default_Sort&page={page_number}"
        driver.get(url)
        
        # Wait for content to load and simulate human behavior
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "profile-details"))
        )
        HumanlikeBehavior.simulate_scrolling(driver)
        HumanlikeBehavior.add_human_delay('page_view')
        
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        donors_on_page = []
        
        profile_sections = soup.find_all('div', class_='profile-details')
        logger.info(f"Found {len(profile_sections)} profiles on page {page_number}")
        
        for profile in profile_sections:
            try:
                donor_id_elem = profile.find('span', class_='donor-id')
                if donor_id_elem:
                    donor_id = donor_id_elem.text.strip()
                    relative_url = f"/donor/{donor_id}"
                    
                    if donor_id not in seen_donors:
                        profile_data = scrape_profile_bank1(relative_url, driver)
                        if profile_data:
                            seen_donors.add(donor_id)
                            donors_on_page.append(profile_data)
                    else:
                        logger.info(f"Found duplicate donor {donor_id} on page {page_number}")
            
            except Exception as e:
                logger.error(f"Error processing profile with ID {donor_id if donor_id else 'unknown'}: {str(e)}")
                continue
        
        if should_quit_driver:
            driver.quit()
        return donors_on_page
        
    except Exception as e:
        logger.error(f"Error scraping page {page_number}: {str(e)}")
        if should_quit_driver and driver:
            driver.quit()
        return []

## 7. Session Management

In [8]:
class ScrapingSession:
    def __init__(self):
        self.start_time = datetime.now()
        self.profiles_scraped = 0
        
    def should_take_break(self):
        """Check if we should take a break based on time and profiles scraped"""
        session_duration = datetime.now() - self.start_time
        
        if (session_duration > timedelta(hours=2) or 
            self.profiles_scraped >= 30):
            return True
        return False
    
    def take_break(self):
        """Take a break and reset session counters"""
        logger.info("Taking a session break...")
        HumanlikeBehavior.add_human_delay('session_break')
        self.start_time = datetime.now()
        self.profiles_scraped = 0

## 8. Test Scraping 

In [None]:
def test_specific_profiles():
    try:
        # List of known donor IDs to test
        test_donor_ids = ['20035', '19807', '20400']
        profiles = []
        
        # Initialize Selenium for testing
        options = webdriver.ChromeOptions()
        options.add_argument(f'user-agent={random.choice(config["user_agents"])}')
        options.add_argument('--headless')
        driver = webdriver.Chrome(options=options)
        
        print("Starting test scrape of specific profiles...")
        
        for donor_id in test_donor_ids:
            relative_url = f"/donor/{donor_id}"
            logger.info(f"Scraping profile for donor {donor_id}")
            profile_data = scrape_profile_bank1(relative_url, driver)
            if profile_data:
                profiles.append(profile_data)
                print(f"Successfully scraped donor {donor_id}")
            else:
                print(f"Failed to scrape donor {donor_id}")
                
        print(f"\nProfiles scraped: {len(profiles)}")
        
        if profiles:
            # Save test results
            test_output_dir = '/Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw'
            os.makedirs(test_output_dir, exist_ok=True)
            
            with open(os.path.join(test_output_dir, 'test_specific_profiles.json'), 'w', encoding='utf-8') as f:
                json.dump(profiles, f, indent=2, ensure_ascii=False)
            print("Saved test results to raw data folder")
            
        return profiles
            
    except Exception as e:
        logger.error(f"Error in test scraping: {str(e)}")
        print(f"\nError details: {str(e)}")
        return None
    finally:
        if driver:
            driver.quit()

# Run test
test_profiles = test_specific_profiles()

## 9. Scrape and Save

In [9]:
## 9. Scrape and Save

def save_progress(profiles, bank_id):
    """Save intermediate results"""
    output_directory = '/Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1'
    temp_file = os.path.join(output_directory, f'profiles_{bank_id}_in_progress.json')
    
    # Save as JSON with timestamp in the filename
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    backup_file = os.path.join(output_directory, f'profiles_{bank_id}_backup_{timestamp}.json')
    
    # Save current progress
    with open(temp_file, 'w', encoding='utf-8') as f:
        json.dump(profiles, f, indent=2, ensure_ascii=False)
    
    # Create backup
    with open(backup_file, 'w', encoding='utf-8') as f:
        json.dump(profiles, f, indent=2, ensure_ascii=False)
        
    print(f"Saved progress: {len(profiles)} profiles")
    print(f"Backup created: {backup_file}")

def save_final_results(profiles, bank_id):
    """Save final results in both JSON and CSV formats with versioning"""
    output_directory = '/Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1'
    os.makedirs(output_directory, exist_ok=True)
    
    if profiles:
        try:
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            
            # Save as JSON with timestamp
            json_filename = os.path.join(output_directory, f'profiles_{timestamp}.json')
            with open(json_filename, 'w', encoding='utf-8') as f:
                json.dump(profiles, f, indent=2, ensure_ascii=False)
            
            # Save as CSV with timestamp
            csv_filename = os.path.join(output_directory, f'profiles_{timestamp}.csv')
            with open(csv_filename, 'w', newline='', encoding='utf-8') as f:
                writer = csv.DictWriter(f, fieldnames=profiles[0].keys())
                writer.writeheader()
                writer.writerows(profiles)
            
            # Also save as the main files (overwriting previous versions)
            main_json = os.path.join(output_directory, 'profiles.json')
            main_csv = os.path.join(output_directory, 'profiles.csv')
            
            with open(main_json, 'w', encoding='utf-8') as f:
                json.dump(profiles, f, indent=2, ensure_ascii=False)
            
            with open(main_csv, 'w', newline='', encoding='utf-8') as f:
                writer = csv.DictWriter(f, fieldnames=profiles[0].keys())
                writer.writeheader()
                writer.writerows(profiles)
            
            print(f"Data saved to:")
            print(f"- {json_filename}")
            print(f"- {csv_filename}")
            print(f"- {main_json}")
            print(f"- {main_csv}")
            
        except Exception as e:
            logger.error(f"Error saving final results: {str(e)}")
    else:
        logger.warning("No profiles were collected to save")

def main():
    try:
        # Load configuration
        global config, bank_config
        config = load_config()
        bank_id = 'bank1'
        bank_config = config['banks'][bank_id]

        # Load existing profiles
        try:
            with open('/Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_in_progress.json', 'r') as f:
                all_profiles = json.load(f)
            print(f"Loaded {len(all_profiles)} existing profiles from in-progress file")
        except FileNotFoundError:
            all_profiles = []
            print("Starting fresh scrape")

        # Get already scraped donor IDs
        scraped_ids = get_scraped_donor_ids()
        print(f"Found {len(scraped_ids)} already scraped donor IDs")

        # Load all donor IDs
        donor_ids_path = '/Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/donor_ids.txt'
        with open(donor_ids_path, 'r') as f:
            all_donor_ids = [line.strip() for line in f]
        
        # Filter out already scraped IDs
        remaining_donor_ids = [id for id in all_donor_ids if id not in scraped_ids]
        print(f"Remaining donors to scrape: {len(remaining_donor_ids)} out of {len(all_donor_ids)}")

        # Initialize session manager
        session_manager = ScrapingSession()

        # Initialize driver
        options = webdriver.ChromeOptions()
        options.add_argument(f'user-agent={random.choice(config["user_agents"])}')
        options.add_argument('--headless')
        driver = webdriver.Chrome(options=options)

        # Continue scraping remaining profiles
        for i, donor_id in enumerate(remaining_donor_ids, 1):
            try:
                logger.info(f"Scraping profile {i} of {len(remaining_donor_ids)} remaining")
                
                if session_manager.should_take_break():
                    session_manager.take_break()
                
                relative_url = f"/donor/{donor_id}"
                profile_data = scrape_profile_bank1(relative_url, driver)
                
                if profile_data:
                    all_profiles.append(profile_data)
                    session_manager.profiles_scraped += 1
                    
                    # Save progress every 10 profiles
                    if len(all_profiles) % 10 == 0:
                        save_progress(all_profiles, bank_id)
                        print(f"Saved progress: {len(all_profiles)} profiles collected")
                
            except Exception as e:
                logger.error(f"Error scraping profile {donor_id}: {str(e)}")
                # Save progress on error
                save_progress(all_profiles, bank_id)
                # Reinitialize driver on error
                try:
                    driver.quit()
                except:
                    pass
                driver = webdriver.Chrome(options=options)
                continue
            
    except Exception as e:
        logger.error(f"Error during scraping: {str(e)}")
        
    finally:
        try:
            driver.quit()
        except:
            pass
        
        # Save final results
        save_final_results(all_profiles, bank_id)

if __name__ == "__main__":
    main()

Loaded 100 existing profiles from in-progress file
Found 100 already scraped donor IDs
Remaining donors to scrape: 336 out of 436


2024-10-27 12:54:47 - INFO - Scraping profile 1 of 336 remaining
2024-10-27 12:55:37 - INFO - Scraping profile 2 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 12:56:26 - INFO - Scraping profile 3 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 12:57:18 - INFO - Scraping profile 4 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 12:58:02 - INFO - Scraping profile 5 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 12:58:51 - INFO - Scraping profile 6 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 12:59:46 - INFO - Scraping profile 7 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:00:38 - INFO - Scraping profile 8 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:01:31 - INFO - Scraping profile 9 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:02:12 - INFO - Scraping profile 10 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:02:56 - INFO - Scraping profile 11 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 110 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_130256.json
Saved progress: 110 profiles collected


2024-10-27 13:03:37 - INFO - Scraping profile 12 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:04:26 - INFO - Scraping profile 13 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:05:16 - INFO - Scraping profile 14 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:06:18 - INFO - Scraping profile 15 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:07:04 - INFO - Scraping profile 16 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:07:57 - INFO - Scraping profile 17 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:08:43 - INFO - Scraping profile 18 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:09:27 - INFO - Scraping profile 19 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:10:13 - INFO - Scraping profile 20 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:11:03 - INFO - Scraping profile 21 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 120 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_131103.json
Saved progress: 120 profiles collected


2024-10-27 13:11:51 - INFO - Scraping profile 22 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:12:39 - INFO - Scraping profile 23 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:13:28 - INFO - Scraping profile 24 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:14:19 - INFO - Scraping profile 25 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:15:18 - INFO - Scraping profile 26 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:16:11 - INFO - Scraping profile 27 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:17:01 - INFO - Scraping profile 28 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:17:50 - INFO - Scraping profile 29 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:18:44 - INFO - Scraping profile 30 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:19:26 - INFO - Scraping profile 31 of 336 remaining
2024-10-27 13:19:26 - INFO - Taking a session break...


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 130 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_131926.json
Saved progress: 130 profiles collected


2024-10-27 13:26:18 - INFO - Scraping profile 32 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:27:14 - INFO - Scraping profile 33 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:28:10 - INFO - Scraping profile 34 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:28:58 - INFO - Scraping profile 35 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:29:52 - INFO - Scraping profile 36 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:30:44 - INFO - Scraping profile 37 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:31:34 - INFO - Scraping profile 38 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:32:25 - INFO - Scraping profile 39 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:33:17 - INFO - Scraping profile 40 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:34:13 - INFO - Scraping profile 41 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 140 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_133413.json
Saved progress: 140 profiles collected


2024-10-27 13:35:07 - INFO - Scraping profile 42 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:35:55 - INFO - Scraping profile 43 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:36:46 - INFO - Scraping profile 44 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:37:42 - INFO - Scraping profile 45 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:38:33 - INFO - Scraping profile 46 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:39:21 - INFO - Scraping profile 47 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:40:21 - INFO - Scraping profile 48 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:41:02 - INFO - Scraping profile 49 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:41:53 - INFO - Scraping profile 50 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:42:43 - INFO - Scraping profile 51 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 150 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_134243.json
Saved progress: 150 profiles collected


2024-10-27 13:43:25 - INFO - Scraping profile 52 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:44:04 - INFO - Scraping profile 53 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:45:04 - INFO - Scraping profile 54 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:45:55 - INFO - Scraping profile 55 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:46:54 - INFO - Scraping profile 56 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:47:41 - INFO - Scraping profile 57 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:48:28 - INFO - Scraping profile 58 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:49:12 - INFO - Scraping profile 59 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:49:59 - INFO - Scraping profile 60 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:50:46 - INFO - Scraping profile 61 of 336 remaining
2024-10-27 13:50:46 - INFO - Taking a session break...


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 160 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_135046.json
Saved progress: 160 profiles collected


2024-10-27 13:58:36 - INFO - Scraping profile 62 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 13:59:23 - INFO - Scraping profile 63 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:00:18 - INFO - Scraping profile 64 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:01:06 - INFO - Scraping profile 65 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:01:53 - INFO - Scraping profile 66 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:02:42 - INFO - Scraping profile 67 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:03:28 - INFO - Scraping profile 68 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:04:20 - INFO - Scraping profile 69 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:05:07 - INFO - Scraping profile 70 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:06:01 - INFO - Scraping profile 71 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 170 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_140601.json
Saved progress: 170 profiles collected


2024-10-27 14:06:47 - INFO - Scraping profile 72 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:07:32 - INFO - Scraping profile 73 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:08:18 - INFO - Scraping profile 74 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:09:06 - INFO - Scraping profile 75 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:09:56 - INFO - Scraping profile 76 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:10:44 - INFO - Scraping profile 77 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:11:32 - INFO - Scraping profile 78 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:12:16 - INFO - Scraping profile 79 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:13:10 - INFO - Scraping profile 80 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:13:59 - INFO - Scraping profile 81 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 180 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_141359.json
Saved progress: 180 profiles collected


2024-10-27 14:14:47 - INFO - Scraping profile 82 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:15:33 - INFO - Scraping profile 83 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:16:20 - INFO - Scraping profile 84 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:17:14 - INFO - Scraping profile 85 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:18:00 - INFO - Scraping profile 86 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:18:51 - INFO - Scraping profile 87 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:19:45 - INFO - Scraping profile 88 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:20:26 - INFO - Scraping profile 89 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:21:13 - INFO - Scraping profile 90 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:22:00 - INFO - Scraping profile 91 of 336 remaining
2024-10-27 14:22:00 - INFO - Taking a session break...


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 190 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_142200.json
Saved progress: 190 profiles collected


2024-10-27 14:28:22 - INFO - Scraping profile 92 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:29:10 - INFO - Scraping profile 93 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:30:06 - INFO - Scraping profile 94 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:30:53 - INFO - Scraping profile 95 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:31:47 - INFO - Scraping profile 96 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:32:42 - INFO - Scraping profile 97 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:33:30 - INFO - Scraping profile 98 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:34:23 - INFO - Scraping profile 99 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:35:13 - INFO - Scraping profile 100 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:35:59 - INFO - Scraping profile 101 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 200 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_143559.json
Saved progress: 200 profiles collected


2024-10-27 14:37:00 - INFO - Scraping profile 102 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:37:52 - INFO - Scraping profile 103 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:38:45 - INFO - Scraping profile 104 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:39:39 - INFO - Scraping profile 105 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:40:34 - INFO - Scraping profile 106 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:41:18 - INFO - Scraping profile 107 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:42:11 - INFO - Scraping profile 108 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:43:10 - INFO - Scraping profile 109 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:44:00 - INFO - Scraping profile 110 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:44:47 - INFO - Scraping profile 111 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 210 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_144447.json
Saved progress: 210 profiles collected


2024-10-27 14:45:36 - INFO - Scraping profile 112 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:46:17 - INFO - Scraping profile 113 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:47:07 - INFO - Scraping profile 114 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:47:59 - INFO - Scraping profile 115 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:48:46 - INFO - Scraping profile 116 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:49:40 - INFO - Scraping profile 117 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:50:34 - INFO - Scraping profile 118 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:51:21 - INFO - Scraping profile 119 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:52:18 - INFO - Scraping profile 120 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:53:05 - INFO - Scraping profile 121 of 336 remaining
2024-10-27 14:53:05 - INFO - Taking a session break...


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 220 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_145305.json
Saved progress: 220 profiles collected


2024-10-27 14:58:56 - INFO - Scraping profile 122 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 14:59:45 - INFO - Scraping profile 123 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:00:35 - INFO - Scraping profile 124 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:01:29 - INFO - Scraping profile 125 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:02:14 - INFO - Scraping profile 126 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:02:58 - INFO - Scraping profile 127 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:03:43 - INFO - Scraping profile 128 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:04:34 - INFO - Scraping profile 129 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:05:31 - INFO - Scraping profile 130 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:06:25 - INFO - Scraping profile 131 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 230 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_150625.json
Saved progress: 230 profiles collected


2024-10-27 15:07:15 - INFO - Scraping profile 132 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:08:10 - INFO - Scraping profile 133 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:08:59 - INFO - Scraping profile 134 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:09:49 - INFO - Scraping profile 135 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:10:30 - INFO - Scraping profile 136 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:11:21 - INFO - Scraping profile 137 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:12:14 - INFO - Scraping profile 138 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:13:10 - INFO - Scraping profile 139 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:13:55 - INFO - Scraping profile 140 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:14:47 - INFO - Scraping profile 141 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 240 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_151447.json
Saved progress: 240 profiles collected


2024-10-27 15:15:43 - INFO - Scraping profile 142 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:16:30 - INFO - Scraping profile 143 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:17:20 - INFO - Scraping profile 144 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:18:03 - INFO - Scraping profile 145 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:19:03 - INFO - Scraping profile 146 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:20:00 - INFO - Scraping profile 147 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:20:48 - INFO - Scraping profile 148 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:21:44 - INFO - Scraping profile 149 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:22:30 - INFO - Scraping profile 150 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:23:23 - INFO - Scraping profile 151 of 336 remaining
2024-10-27 15:23:23 - INFO - Taking a session break...


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 250 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_152323.json
Saved progress: 250 profiles collected


2024-10-27 15:31:37 - INFO - Scraping profile 152 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:32:25 - INFO - Scraping profile 153 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:33:12 - INFO - Scraping profile 154 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:33:57 - INFO - Scraping profile 155 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:34:36 - INFO - Scraping profile 156 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:35:18 - INFO - Scraping profile 157 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:36:10 - INFO - Scraping profile 158 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:36:59 - INFO - Scraping profile 159 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:37:47 - INFO - Scraping profile 160 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:38:32 - INFO - Scraping profile 161 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 260 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_153832.json
Saved progress: 260 profiles collected


2024-10-27 15:39:22 - INFO - Scraping profile 162 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:40:13 - INFO - Scraping profile 163 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:41:10 - INFO - Scraping profile 164 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:42:00 - INFO - Scraping profile 165 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:42:51 - INFO - Scraping profile 166 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:43:45 - INFO - Scraping profile 167 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:44:39 - INFO - Scraping profile 168 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:45:39 - INFO - Scraping profile 169 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:46:26 - INFO - Scraping profile 170 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:47:13 - INFO - Scraping profile 171 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 270 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_154713.json
Saved progress: 270 profiles collected


2024-10-27 15:48:00 - INFO - Scraping profile 172 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:48:46 - INFO - Scraping profile 173 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:49:34 - INFO - Scraping profile 174 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:50:26 - INFO - Scraping profile 175 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:51:10 - INFO - Scraping profile 176 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:52:02 - INFO - Scraping profile 177 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:52:49 - INFO - Scraping profile 178 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:53:47 - INFO - Scraping profile 179 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:54:36 - INFO - Scraping profile 180 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 15:55:31 - INFO - Scraping profile 181 of 336 remaining
2024-10-27 15:55:31 - INFO - Taking a session break...


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 280 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_155531.json
Saved progress: 280 profiles collected


2024-10-27 16:04:27 - INFO - Scraping profile 182 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:05:16 - INFO - Scraping profile 183 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:06:04 - INFO - Scraping profile 184 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:06:49 - INFO - Scraping profile 185 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:07:38 - INFO - Scraping profile 186 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:08:28 - INFO - Scraping profile 187 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:09:15 - INFO - Scraping profile 188 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:10:17 - INFO - Scraping profile 189 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:11:05 - INFO - Scraping profile 190 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:11:47 - INFO - Scraping profile 191 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 290 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_161147.json
Saved progress: 290 profiles collected


2024-10-27 16:12:45 - INFO - Scraping profile 192 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:13:42 - INFO - Scraping profile 193 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:14:25 - INFO - Scraping profile 194 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:15:17 - INFO - Scraping profile 195 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:16:09 - INFO - Scraping profile 196 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:16:54 - INFO - Scraping profile 197 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:17:37 - INFO - Scraping profile 198 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:18:31 - INFO - Scraping profile 199 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:19:22 - INFO - Scraping profile 200 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:20:12 - INFO - Scraping profile 201 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 300 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_162012.json
Saved progress: 300 profiles collected


2024-10-27 16:20:58 - INFO - Scraping profile 202 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:21:47 - INFO - Scraping profile 203 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:22:35 - INFO - Scraping profile 204 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:23:25 - INFO - Scraping profile 205 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:24:17 - INFO - Scraping profile 206 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:25:03 - INFO - Scraping profile 207 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:25:16 - ERROR - Error scraping profile [ANONYMIZED_URL] Message: 
Stacktrace:
0   chromedriver                        0x0000000102783634 cxxbridge1$str$ptr + 3645404
1   chromedriver                        0x000000010277be94 cxxbridge1$str$ptr + 3614780
2   chromedriver                        0x00000001021e8104 cxxbridge1$string$len + 88416
3   chromedriver                        0x000000010222a364 cxxbridge1$string$len + 359360
4   chromedriver                        0x0000000102263bd0 cxxbridge1$string$len + 594988
5   chromedriver                        0x000000010221ef54 cxxbridge1$string$len + 313264
6   chromedriver                        0x000000010221fba4 cxxbridge1$string$len + 316416
7   chromedriver                        0x000000010274e1d4 cxxbridge1$str$ptr + 3427196
8   chromedriver                        0x0000000102751518 cxxbridge1$str$ptr + 3440320
9   chromedriver                        0x00000001027355f8 cxxbridge1$str$ptr + 3325856
10  chromedriver   

Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:26:56 - INFO - Scraping profile 210 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:27:49 - INFO - Scraping profile 211 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:28:36 - INFO - Scraping profile 212 of 336 remaining
2024-10-27 16:28:36 - INFO - Taking a session break...


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 310 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_162836.json
Saved progress: 310 profiles collected


2024-10-27 16:36:52 - INFO - Scraping profile 213 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:37:52 - INFO - Scraping profile 214 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:38:43 - INFO - Scraping profile 215 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:39:42 - INFO - Scraping profile 216 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:40:35 - INFO - Scraping profile 217 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:41:21 - INFO - Scraping profile 218 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:42:13 - INFO - Scraping profile 219 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:43:01 - INFO - Scraping profile 220 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:43:49 - INFO - Scraping profile 221 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:44:43 - INFO - Scraping profile 222 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 320 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_164443.json
Saved progress: 320 profiles collected


2024-10-27 16:45:33 - INFO - Scraping profile 223 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:46:19 - INFO - Scraping profile 224 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:47:13 - INFO - Scraping profile 225 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:48:04 - INFO - Scraping profile 226 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:49:03 - INFO - Scraping profile 227 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:49:50 - INFO - Scraping profile 228 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:50:36 - INFO - Scraping profile 229 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:51:27 - INFO - Scraping profile 230 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:52:17 - INFO - Scraping profile 231 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:53:05 - INFO - Scraping profile 232 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 330 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_165305.json
Saved progress: 330 profiles collected


2024-10-27 16:53:56 - INFO - Scraping profile 233 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:54:54 - INFO - Scraping profile 234 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:55:38 - INFO - Scraping profile 235 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:56:28 - INFO - Scraping profile 236 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:57:17 - INFO - Scraping profile 237 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:58:03 - INFO - Scraping profile 238 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:58:59 - INFO - Scraping profile 239 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 16:59:58 - INFO - Scraping profile 240 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:00:47 - INFO - Scraping profile 241 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:01:40 - INFO - Scraping profile 242 of 336 remaining
2024-10-27 17:01:40 - INFO - Taking a session break...


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 340 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_170140.json
Saved progress: 340 profiles collected


2024-10-27 17:11:22 - INFO - Scraping profile 243 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:12:20 - INFO - Scraping profile 244 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:13:27 - INFO - Scraping profile 245 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:14:30 - INFO - Scraping profile 246 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:15:20 - INFO - Scraping profile 247 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:16:07 - INFO - Scraping profile 248 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:17:02 - INFO - Scraping profile 249 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:17:43 - INFO - Scraping profile 250 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:18:40 - INFO - Scraping profile 251 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:19:30 - INFO - Scraping profile 252 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 350 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_171930.json
Saved progress: 350 profiles collected


2024-10-27 17:20:11 - INFO - Scraping profile 253 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:21:05 - INFO - Scraping profile 254 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:21:53 - INFO - Scraping profile 255 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:22:43 - INFO - Scraping profile 256 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:23:29 - INFO - Scraping profile 257 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:24:24 - INFO - Scraping profile 258 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:25:14 - INFO - Scraping profile 259 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:25:59 - INFO - Scraping profile 260 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:26:48 - INFO - Scraping profile 261 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:27:43 - INFO - Scraping profile 262 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 360 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_172743.json
Saved progress: 360 profiles collected


2024-10-27 17:28:35 - INFO - Scraping profile 263 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:29:18 - INFO - Scraping profile 264 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:30:08 - INFO - Scraping profile 265 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:30:56 - INFO - Scraping profile 266 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:31:49 - INFO - Scraping profile 267 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:32:31 - INFO - Scraping profile 268 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:33:25 - INFO - Scraping profile 269 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:34:12 - INFO - Scraping profile 270 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:35:03 - INFO - Scraping profile 271 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:35:53 - INFO - Scraping profile 272 of 336 remaining
2024-10-27 17:35:53 - INFO - Taking a session break...


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 370 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_173553.json
Saved progress: 370 profiles collected


2024-10-27 17:46:22 - INFO - Scraping profile 273 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:47:11 - INFO - Scraping profile 274 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:48:06 - INFO - Scraping profile 275 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:48:52 - INFO - Scraping profile 276 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:49:58 - INFO - Scraping profile 277 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:50:48 - INFO - Scraping profile 278 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:51:48 - INFO - Scraping profile 279 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:52:32 - INFO - Scraping profile 280 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:53:18 - INFO - Scraping profile 281 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:54:07 - INFO - Scraping profile 282 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 380 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_175407.json
Saved progress: 380 profiles collected


2024-10-27 17:55:02 - INFO - Scraping profile 283 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:56:00 - INFO - Scraping profile 284 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:56:52 - INFO - Scraping profile 285 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:57:32 - INFO - Scraping profile 286 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:58:22 - INFO - Scraping profile 287 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 17:59:10 - INFO - Scraping profile 288 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:00:07 - INFO - Scraping profile 289 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:00:52 - INFO - Scraping profile 290 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:01:40 - INFO - Scraping profile 291 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:02:27 - INFO - Scraping profile 292 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 390 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_180227.json
Saved progress: 390 profiles collected


2024-10-27 18:03:19 - INFO - Scraping profile 293 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:04:15 - INFO - Scraping profile 294 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:05:08 - INFO - Scraping profile 295 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:05:54 - INFO - Scraping profile 296 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:06:45 - INFO - Scraping profile 297 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:07:41 - INFO - Scraping profile 298 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:08:32 - INFO - Scraping profile 299 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:09:24 - INFO - Scraping profile 300 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:10:10 - INFO - Scraping profile 301 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:11:01 - INFO - Scraping profile 302 of 336 remaining
2024-10-27 18:11:01 - INFO - Taking a session break...


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 400 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_181101.json
Saved progress: 400 profiles collected


2024-10-27 18:20:53 - INFO - Scraping profile 303 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:21:42 - INFO - Scraping profile 304 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:22:31 - INFO - Scraping profile 305 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:23:20 - INFO - Scraping profile 306 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:24:06 - INFO - Scraping profile 307 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:25:01 - INFO - Scraping profile 308 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:25:55 - INFO - Scraping profile 309 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:26:42 - INFO - Scraping profile 310 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:27:30 - INFO - Scraping profile 311 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:28:30 - INFO - Scraping profile 312 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 410 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_182830.json
Saved progress: 410 profiles collected


2024-10-27 18:29:19 - INFO - Scraping profile 313 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:30:07 - INFO - Scraping profile 314 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:30:56 - INFO - Scraping profile 315 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:31:51 - INFO - Scraping profile 316 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:32:39 - INFO - Scraping profile 317 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:33:27 - INFO - Scraping profile 318 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:34:24 - INFO - Scraping profile 319 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:35:10 - INFO - Scraping profile 320 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:36:03 - INFO - Scraping profile 321 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:36:54 - INFO - Scraping profile 322 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 420 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_183654.json
Saved progress: 420 profiles collected


2024-10-27 18:37:49 - INFO - Scraping profile 323 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:38:45 - INFO - Scraping profile 324 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:39:30 - INFO - Scraping profile 325 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:40:29 - INFO - Scraping profile 326 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:41:26 - INFO - Scraping profile 327 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:42:13 - INFO - Scraping profile 328 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:43:00 - INFO - Scraping profile 329 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:43:54 - INFO - Scraping profile 330 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:44:34 - INFO - Scraping profile 331 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:45:25 - INFO - Scraping profile 332 of 336 remaining
2024-10-27 18:45:25 - INFO - Taking a session break...


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Saved progress: 430 profiles
Backup created: /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_bank1_backup_20241027_184525.json
Saved progress: 430 profiles collected


2024-10-27 18:54:32 - INFO - Scraping profile 333 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:55:29 - INFO - Scraping profile 334 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:56:20 - INFO - Scraping profile 335 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'


2024-10-27 18:57:12 - INFO - Scraping profile 336 of 336 remaining


Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Found label: 'Ethnic Origin:'
Found label: 'Ancestry (Self-Reported):'
Found label: 'Religion:'
Found label: 'Jewish Ancestry (Self-Reported):'
Data saved to:
- /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_20241027_185803.json
- /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_20241027_185803.csv
- /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles.json
- /Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles.csv


### Cross-checking if backup files match final outputs since scraping was interrupted several times.

In [2]:
!pip install pandas

Collecting pandas
  Downloading pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl.metadata (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting numpy>=1.26.0 (from pandas)
  Downloading numpy-2.1.2-cp312-cp312-macosx_14_0_arm64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.9/60.9 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2024.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2024.2-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl (11.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.4/11.4 MB[0m [31m20.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading numpy-2.1.2-cp312-cp312-macosx_14_0_arm64.whl (5.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[

In [4]:
import json
import pandas as pd

# Read the JSON files
with open('/Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_20241027_125054.json', 'r') as f:
    data_1250 = json.load(f)

with open('/Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_20241027_185803.json', 'r') as f:
    data_1858 = json.load(f)

with open('/Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles.json', 'r') as f:
    data_main = json.load(f)

# Read the CSV files
df_1250 = pd.read_csv('/Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_20241027_125054.csv')
df_1858 = pd.read_csv('/Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles_20241027_185803.csv')
df_main = pd.read_csv('/Users/cindylinsf/Documents/CCI/THESIS/Msc_Thesis_Project_Files/data/raw/bank1/profiles.csv')

# Print the number of profiles in each file
print("Number of profiles in each file:")
print(f"12:50 JSON: {len(data_1250)}")
print(f"12:50 CSV: {len(df_1250)}")
print(f"18:58 JSON: {len(data_1858)}")
print(f"18:58 CSV: {len(df_1858)}")
print(f"Main JSON: {len(data_main)}")
print(f"Main CSV: {len(df_main)}")

# Check if the final files (18:58) match the main files
print("\nDo final files match main files?")
print(f"JSON matches: {len(data_1858) == len(data_main)}")
print(f"CSV matches: {len(df_1858) == len(df_main)}")

# Get unique donor IDs in each version
donor_ids_1250 = set(df_1250['donor_id'])
donor_ids_1858 = set(df_1858['donor_id'])
donor_ids_main = set(df_main['donor_id'])

print("\nNumber of unique donor IDs:")
print(f"12:50 version: {len(donor_ids_1250)}")
print(f"18:58 version: {len(donor_ids_1858)}")
print(f"Main version: {len(donor_ids_main)}")

Number of profiles in each file:
12:50 JSON: 100
12:50 CSV: 100
18:58 JSON: 435
18:58 CSV: 435
Main JSON: 435
Main CSV: 435

Do final files match main files?
JSON matches: True
CSV matches: True

Number of unique donor IDs:
12:50 version: 100
18:58 version: 435
Main version: 435
