In [None]:
########## PART 1 ##########

# Set up FamilySearch authentication



In [None]:
# 1. Create a Developer Account

# Go to FamilySearch Developer Portal and sign up for a developer account.

# After signing up, you’ll be able to create applications for using their API.

# 2. Create a New App
# Once you're logged in:

# Go to the "Manage My Applications" page.

# Click "Create New Application".

# Fill out the form:

# Name: Your app's name (e.g., "Immigrant Linker").

# OAuth Redirect URL: This is the URL to which FamilySearch will redirect users after they authorize the application. For testing purposes, you can set it to http://localhost or use a tool like ngrok for local testing.
# Description: A short description of what your app does (e.g., "Bulk links immigrants with census records").
# Website: (Optional) If you have a website, you can link it here.
# Click Save.

# After creating your app, you’ll be given:

# Client ID

# Client Secret

# Redirect URI (provided when you set up your app)

# Note these down — you’ll need them in the code for authentication.

# 3. Install Required Libraries (Python)
# You’ll need a couple of libraries to handle OAuth authentication in Python:

# requests: For making HTTP requests.

# requests_oauthlib: For handling the OAuth flow.

In [None]:
pip install requests requests_oauthlib


In [None]:
# 4. Set Up OAuth Flow in Python
# FamilySearch uses OAuth 2.0 for authentication. This flow involves getting a token that allows your app to act on behalf of a user.

# 5. Basic Python Code to Authenticate with FamilySearch API
# Here’s a basic Python script that implements the OAuth 2.0 flow to authenticate with FamilySearch. The script will:

# Redirect the user to log in.

# Capture the Authorization Code.

# Use that to get an Access Token that can be used for API requests.

In [None]:
import requests
from requests_oauthlib import OAuth2Session
from oauthlib.oauth2 import WebApplicationClient

# Replace these with your own values from the FamilySearch Developer Portal
CLIENT_ID = 'your_client_id_here'
CLIENT_SECRET = 'your_client_secret_here'
REDIRECT_URI = 'http://localhost'  # Redirect URL you set in your app

# The FamilySearch OAuth endpoint
AUTHORIZATION_BASE_URL = 'https://identity.familysearch.org/cis-web/oauth2/v3/authorization'
TOKEN_URL = 'https://identity.familysearch.org/cis-web/oauth2/v3/token'

# Create OAuth session
client = WebApplicationClient(CLIENT_ID)
oauth = OAuth2Session(CLIENT_ID, redirect_uri=REDIRECT_URI)

# Step 1: Redirect user to FamilySearch for login
authorization_url, state = oauth.authorization_url(AUTHORIZATION_BASE_URL)
print(f'Please go to {authorization_url} and authorize access.')

# Step 2: After the user has granted permission, they’ll be redirected to the REDIRECT_URI
# They should send back the URL they were redirected to, which contains the authorization code.
redirect_response = input('Paste the full redirect URL here: ')

# Step 3: Get the authorization code from the redirect URL
oauth.fetch_token(TOKEN_URL, authorization_response=redirect_response, client_secret=CLIENT_SECRET)

# Step 4: Accessing the API with the token
# Now you can use the API with the access token
response = oauth.get('https://api.familysearch.org/platform/tree/persons/search?givenName=Amanda&surname=Anderson')
print(response.json())



In [None]:
# Explanation of the Script:
# OAuth Flow: The user is redirected to FamilySearch to log in, then FamilySearch will send an authorization code to your redirect URL.

# Exchange Code for Token: The authorization code is exchanged for an access token and refresh token.

# API Request: Once authenticated, you can use the access token to make API requests (such as searching for census records).

# 6. What Happens After Authentication?
# Once you have the access token, you can use it to make requests to the FamilySearch API.

# The access token will expire after a while, but you can use the refresh token to get a new access token.

# 7. Handling Token Expiry (Refreshing Token)
# If your access token expires, you can refresh it by sending the following request:

In [None]:
# Refresh the token using the refresh token
new_token = oauth.refresh_token(TOKEN_URL, client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
print(new_token)


In [None]:
#8. Put it All Together:
# With this setup, you can now:

# Authenticate with FamilySearch and retrieve records.

# Automate the process for hundreds or thousands of immigrants (searching their census records).

# Store the access token and refresh token securely to avoid re-authenticating each time.

In [None]:
######### PART 2 ##############
# Link Ellis Island immigration data with Census data


# Step 1. Read immigrants.csv with 100 immigrants           
# Step 2. For each immigrant, send a FamilySearch API search using lastname, firstname, origin country, year of immigration, year of birth
# Step 3. Pull census data and family structure              
# Step 4. Check if any child born around immigration year   
# Step 5. Save results into a new CSV                    


In [None]:

import pandas as pd
import requests
import time

# Settings
API_KEY = 'your_familysearch_api_key'
BASE_URL = 'https://api.familysearch.org/platform/tree/persons/search'

# Load the CSV
immigrants = pd.read_csv('immigrants.csv')

# Prepare output
results = []

# Loop over each immigrant
for index, row in immigrants.iterrows():
    firstname = row['FirstName']
    lastname = row['LastName']
    birth_year = row['BirthYear']
    birth_country = row['BirthCountry']
    immigration_year = row['ImmigrationYear']

    # Create search query
    params = {
        'givenName': firstname,
        'surname': lastname,
        'birthDate': birth_year,
        'birthPlace': birth_country,
        'access_token': API_KEY
    }

    response = requests.get(BASE_URL, params=params)
    
    if response.status_code == 200:
        data = response.json()
        
        # Very simple match checking
        if 'persons' in data:
            for person in data['persons']:
                # Assume first matched person for now
                children = person.get('persons', [])
                child_found = False
                child_name = ''
                child_birthyear = ''
                
                # Check if any child born close to immigration year
                for child in children:
                    if 'display' in child:
                        child_birth = child['display'].get('birthDate', '')
                        if child_birth and abs(int(child_birth[:4]) - int(immigration_year)) <= 1:
                            child_found = True
                            child_name = child['display'].get('name', '')
                            child_birthyear = child_birth
                            break

                # Store results
                results.append({
                    'FirstName': firstname,
                    'LastName': lastname,
                    'MatchFound': True,
                    'ChildBornNearImmigration': child_found,
                    'ChildName': child_name,
                    'ChildBirthYear': child_birthyear
                })
                
                break  # Only first match for now
        else:
            results.append({
                'FirstName': firstname,
                'LastName': lastname,
                'MatchFound': False,
                'ChildBornNearImmigration': False,
                'ChildName': '',
                'ChildBirthYear': ''
            })
    else:
        print(f"API error for {firstname} {lastname}: {response.status_code}")

    time.sleep(1)  # Be nice to the API: 1 second delay between calls

# Save to a new CSV
results_df = pd.DataFrame(results)
results_df.to_csv('immigrant_census_links.csv', index=False)

print("Finished matching!")
