In [58]:
from datetime import datetime
import pandas as pd

## Load data

In [59]:
cleaned_deduped_authors_filename = "MCK-CX_2022-10-09magazine_authors_newly_merged.csv"
newly_parsed_authors_filename = "magazine_authors_parsed.csv"
newly_merged_authors_filename = "magazine_authors-2023-02-10-merged.csv"

In [60]:
cleaned_deduped_authors = pd.read_csv(cleaned_deduped_authors_filename)

In [61]:
cleaned_deduped_authors.head()

Unnamed: 0,drupal_full_name,drupal_author_id,duplicate of ID,given_name,family_name,organization_name,meeting_name,Notes
0,Carl Abbott,1352,,Carl,Abbott,,,
1,Marge Abbott,2931,343.0,Margery Post,Abbott,,,
2,Margery Post Abbott,3430,343.0,Margery Post,Abbott,,,
3,Margery Abbott,3267,343.0,Margery Post,Abbott,,,
4,Margery Post Abbott,343,,Margery Post,Abbott,,,


In [62]:
cleaned_deduped_authors.shape

(2774, 8)

In [63]:
newly_parsed_authors = pd.read_csv(newly_parsed_authors_filename)

In [64]:
newly_parsed_authors.head()

Unnamed: 0,given_name,family_name,drupal_full_name,drupal_author_id
0,A. N.,Whitehead,A. N. Whitehead,1832
1,A.,S.,A. S.,1597
2,A. Sidney,Wright,A. Sidney Wright,1789
3,A. Stanley,Thompson,A. Stanley Thompson,2801
4,A.F.,Anderson,A.F. Anderson,2615


In [65]:
newly_parsed_authors.shape

(2933, 4)

## Merge new authors into existing spreadsheet

We will use the Drupal ID to skip existing rows and add only new authors to the merged spreadsheet.

In [83]:
# select newly parsed authors that are NOT in the cleaned deduped authors
new_authors = newly_parsed_authors[~newly_parsed_authors["drupal_author_id"].isin(cleaned_deduped_authors["drupal_author_id"])]

In [85]:
# Add a column to show the date new authors were added to the CSV
date_today = datetime.today().strftime('%Y-%m-%d')
new_authors.assign(date_added_to_list=date_today)

Unnamed: 0,given_name,family_name,drupal_full_name,drupal_author_id,date_added_to_list
5,Abiel,Locke,Abiel Locke,5455,2023-02-10
10,African Great Lakes Initiative of Friends Peace,Teams,African Great Lakes Initiative of Friends Peac...,5367,2023-02-10
11,AFSC Leadership,Team,AFSC Leadership Team,5373,2023-02-10
75,Anastacia,Ebi,Anastacia Ebi,5437,2023-02-10
77,Andrew,Pangelina,Andrew Pangelina,5456,2023-02-10
...,...,...,...,...,...
2872,What Canst Thou,Say?,What Canst Thou Say?,5452,2023-02-10
2907,Windy,Cooler,Windy Cooler,5337,2023-02-10
2915,Working Group on Right Relationship with,Animals,Working Group on Right Relationship with Animals,5473,2023-02-10
2916,World Beyond,War,World Beyond War,5451,2023-02-10


In [86]:
# Clear out any previous added date
cleaned_deduped_authors.assign(date_added_to_list=None)

Unnamed: 0,drupal_full_name,drupal_author_id,duplicate of ID,given_name,family_name,organization_name,meeting_name,Notes,date_added_to_list
0,Carl Abbott,1352,,Carl,Abbott,,,,
1,Marge Abbott,2931,343.0,Margery Post,Abbott,,,,
2,Margery Post Abbott,3430,343.0,Margery Post,Abbott,,,,
3,Margery Abbott,3267,343.0,Margery Post,Abbott,,,,
4,Margery Post Abbott,343,,Margery Post,Abbott,,,,
...,...,...,...,...,...,...,...,...,...
2769,Sixth World Conference of Friends,189,,,,Sixth World Conference of Friends,,,
2770,South Mountain Friends Meeting,1146,,,,,South Mountain Friends Meeting,,
2771,V B H,1830,,,,Friends Bulletin Committee,,,
2772,World War II Conscientious Objectors,298,,,,World War II Conscientious Objectors,,,


In [107]:
# TODO: determine why date_added_to_list column isn't included in the newly merged authors for export
newly_merged_authors_for_export = pd.concat([cleaned_deduped_authors, new_authors])

In [96]:
newly_merged_authors_for_export

Unnamed: 0,drupal_full_name,drupal_author_id,duplicate of ID,given_name,family_name,organization_name,meeting_name,Notes
0,Carl Abbott,1352,,Carl,Abbott,,,
1,Marge Abbott,2931,343.0,Margery Post,Abbott,,,
2,Margery Post Abbott,3430,343.0,Margery Post,Abbott,,,
3,Margery Abbott,3267,343.0,Margery Post,Abbott,,,
4,Margery Post Abbott,343,,Margery Post,Abbott,,,
...,...,...,...,...,...,...,...,...
2872,What Canst Thou Say?,5452,,What Canst Thou,Say?,,,
2907,Windy Cooler,5337,,Windy,Cooler,,,
2915,Working Group on Right Relationship with Animals,5473,,Working Group on Right Relationship with,Animals,,,
2916,World Beyond War,5451,,World Beyond,War,,,


In [99]:
newly_merged_authors_for_export.sort_values(by="drupal_full_name", inplace=True)

In [104]:
# make sure the row counts match
assert new_authors.shape[0] + cleaned_deduped_authors.shape[0] == newly_merged_authors_for_export.shape[0]

## Export

In [106]:
newly_merged_authors_for_export.to_csv(newly_merged_authors_filename, index=False)