# Participant and Room Schedules

In [2]:
import pandas as pd
import numpy as np

First, we need to bring in all our datasets.

In [3]:
# Make sure to load only the relevant files from the specific date we want
run_date = '2025-02-13'

final_room_pairings_Friday = pd.read_excel(f"Outputs/Finalized datasets/Editor-agent pairings for Friday_{run_date}.xlsx")
final_saturday_rooms = pd.read_excel(f"Outputs/Finalized datasets/Final saturday rooms_{run_date}.xlsx")
registered = pd.read_excel(f"Outputs/Finalized Datasets/Registered_cleaned_{run_date}.xlsx", dtype={'phone': str})

# Schedules
final_friday_assignments2 = pd.read_excel(f"Outputs/Finalized datasets/Friday query letter critique assignments_{run_date}.xlsx", dtype={'phone': str})
final_sataft_assignments2 = pd.read_excel(f"Outputs/Finalized datasets/Saturday pitch assignments_{run_date}.xlsx", dtype={'phone': str})
final_satmorn_assignments2 = pd.read_excel(f"Outputs/Finalized datasets/Saturday manuscript critique assignments_{run_date}.xlsx", dtype={'phone': str})
coaching_schedule = pd.read_excel(f"Outputs/Finalized datasets/Finalized coaching schedule_{run_date}.xlsx", dtype={'phone': str})
wait_all = pd.read_excel(f"Outputs/Finalized Datasets/Waitlist participants.xlsx", dtype={'phone': str})

##### Publishers and Rooms on both days

1) Print-outs of all 9 rooms on Friday and their schedules

In [4]:
#pip install python-docx

In [5]:
import docx
from docx.shared import Pt, RGBColor
from docx.oxml import OxmlElement
from docx.shared import Inches

# Create Word document for the publisher pairings and their rooms
pairings_doc = docx.Document()

# Add a header
title = pairings_doc.add_heading('Friday Query Letter Critiques', level=1)
title.alignment = 1  # Center align
for run in title.runs:
    run.font.size = Pt(20)
    run.font.name = 'Arial'

# Add the second part of the header
title2 = pairings_doc.add_heading('Publisher Pairings and Room Locations', level=2)
title2.alignment = 1  # Center align
for run in title2.runs:
    run.font.size = Pt(16)
    run.font.name = 'Arial'

# Add a line break - the title was too close to the table
pairings_doc.add_paragraph("")  # Adds an empty paragraph for spacing
pairings_doc.add_paragraph("")  # Adds an empty paragraph for spacing

# Add a table
table = pairings_doc.add_table(rows=1, cols=2)

# Format the header row
header_cells = table.rows[0].cells
header_cells[0].text = "Publishers"
header_cells[1].text = "Room Location"

# Set header font style
for cell in header_cells:
    for paragraph in cell.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(16)
            run.font.name = 'Arial'
            run.font.bold = True  # Make the header text bold
            run.font.color.rgb = RGBColor(0, 0, 0)  # Black text

# Adjust column widths (the publisher pairings column needs more space)
table.columns[0].width = Inches(4.5)  # Pairing column
table.columns[1].width = Inches(1.5)  # Location column

# Add rows to the table
for _, row in final_room_pairings_Friday.iterrows():
    pairing = f"{row['pubname1']} & {row['pubname2']}"
    location = row['room_name']
    row_cells = table.add_row().cells
    row_cells[0].text = pairing
    row_cells[1].text = location
    for cell in row_cells:
        for paragraph in cell.paragraphs:
            run = paragraph.runs[0]
            run.font.size = Pt(16)
            run.font.name = "Arial"

# Remove table borders
tbl = table._element
tbl_borders = tbl.xpath(".//w:tblBorders")
for border in tbl_borders:
    border.getparent().remove(border)

# Save the Word document
pairings_doc.save("Outputs/Print-outs/Friday-pairings_and_rooms.docx")



Great. Now onto...

2) Saturday Rooms Assignments

In [6]:
del(cell, pairing, location, pairings_doc, row_cells, run, title, title2)

# Create Word document for the publisher pairings and their rooms
pairings_doc = docx.Document()

# Change the margins
sections = pairings_doc.sections
for section in sections:
    section.top_margin = Inches(0.5)

# Add a header
title = pairings_doc.add_heading('Saturday Publishers and Room Locations', level=1)
title.alignment = 1  # Center align
for run in title.runs:
    run.font.size = Pt(20)
    run.font.name = 'Arial'

# Add a line break - the title was too close to the table
pairings_doc.add_paragraph("")  # Adds an empty paragraph for spacing

# Add a table
table = pairings_doc.add_table(rows=1, cols=2)

# Format the header row
header_cells = table.rows[0].cells
header_cells[0].text = "Publishers"
header_cells[1].text = "Room Location"

# Set header font style
for cell in header_cells:
    for paragraph in cell.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(16)
            run.font.name = 'Arial'
            run.font.bold = True  # Make the header text bold
            run.font.color.rgb = RGBColor(0, 0, 0)  # Black text

# Adjust column widths (the publisher pairings column needs more space)
table.columns[0].width = Inches(4)  # Pairing column
table.columns[1].width = Inches(2)  # Location column

# Add rows to the table
for _, row in final_saturday_rooms.iterrows():
    pairing = row['lit_guest_name']
    location = row['room_name']
    row_cells = table.add_row().cells
    row_cells[0].text = pairing
    row_cells[1].text = location
    for cell in row_cells:
        for paragraph in cell.paragraphs:
            run = paragraph.runs[0]
            run.font.size = Pt(16)
            run.font.name = "Arial"

# Remove table borders
#tbl = table._element
#tbl_borders = tbl.xpath(".//w:tblBorders")
#for border in tbl_borders:
#    border.getparent().remove(border)

# Save the Word document
pairings_doc.save("Outputs/Print-outs/Saturday-Publishers_and_rooms.docx")



##### Schedules for each individual room on Friday

Let's now create the word docs (iteratively) with the schedules for each of the rooms. First, we need to add a column that is the start-end time, and we also need to add a row that says 'break' at 3:15-3:30.

In [7]:
# Create the break timeslot at 3:15
break_time = pd.Timestamp('2025-05-02 15:15:00')

# Create a dataset with it (should have 9 rows)
break_data = {
    'timeslot_start': [break_time] * len(final_friday_assignments2['room_name'].unique()),  # Create a break for each room
    'room_name': final_friday_assignments2['room_name'].unique(),  # Each room gets the break
    'First Name': ['BREAK'] * len(final_friday_assignments2['room_name'].unique()),  # 'Break' for all participants at the break slot
}

break_df = pd.DataFrame(break_data)


In [8]:
# Append it to the friday_prints
friday_prints2 = pd.concat([final_friday_assignments2, break_df], ignore_index=True).sort_values(by=['room_name', 'timeslot_start'])


In [9]:
from datetime import timedelta

# Function to format time slots
def format_timeslot(ts):
    start_time = ts.strftime('%I:%M').lstrip('0')  # Remove leading zero for hours
    end_time = (ts + timedelta(minutes=15)).strftime('%I:%M').lstrip('0')  # Add 15 minutes
    return f'{start_time}-{end_time}'

friday_prints2['formatted_timeslot'] = friday_prints2['timeslot_start'].apply(format_timeslot)

In [10]:
# Sort by room, then timeslot
friday_prints2 = friday_prints2.sort_values(by=['room_name', 'timeslot_start'])

In [11]:
def create_word_doc(room_df):
    for room, room_data in room_df.groupby('room_name'):
        doc = docx.Document()

        # Get the first valid row for publishers, excluding the break row
        room_data_no_break = room_data[room_data['First Name'] != 'BREAK']
        pubname1 = room_data_no_break['pubname1'].iloc[0]  # First valid publisher
        pubname2 = room_data_no_break['pubname2'].iloc[0]  # First valid publisher
 
        # Room header
        title = doc.add_heading(f"{room}", level=1)
        title.alignment = 1  # Center align
        for run in title.runs:
            run.font.size = Pt(20)
            run.font.name = 'Arial'

        title.runs[0].font.color.rgb = RGBColor(0, 0, 0)  # RGB for black

        title.paragraph_format.space_after = 0  # Remove space after header
        title.paragraph_format.space_before = 0  # Remove space before header
      

        # Add subheader 
        title2 = doc.add_heading("Query Letter Critique", level=2)
        doc.add_paragraph("")  # Adds an empty paragraph for spacing
        title2.runs[0].font.color.rgb = RGBColor(0, 0, 0)  # RGB for black

        title3 = doc.add_heading(f"{pubname1} and {pubname2}", level=1) # Print out the publishers assigned to that room
        #title2.paragraph_format.space_after = 0  # Remove space after header
        title2.paragraph_format.space_before = 0  # Remove space before header
        title3.paragraph_format.space_after = 0  # Remove space after header
        title3.paragraph_format.space_before = 0  # Remove space before header
      
        title2.alignment = 1  # Center align
        for run in title2.runs:
            run.font.size = Pt(16)
            run.font.name = 'Arial'

        title3.alignment = 1  # Center align
        for run in title3.runs:
            run.font.size = Pt(20)
            run.font.name = 'Arial'

        title3.runs[0].font.color.rgb = RGBColor(0, 0, 0)  # RGB for black


        # Add a line break - the title was too close to the table
        doc.add_paragraph("")  # Adds an empty paragraph for spacing

        # Iterate over the timeslots and participants
        for _, row in room_data.iterrows():
            time_slot = row['formatted_timeslot']

            if pd.isna(row['Last Name']):  
                full_name = "BREAK"
                para = doc.add_paragraph(f"{time_slot}\t\t{full_name}")
                para.runs[0].bold = True  # Make "Break" bold
            else:
                full_name = f"{row['First Name']} {row['Last Name']}"

                # Add (ZOOM) if they're virtual
                if row['virtual'] == 'Virtual':  # Add (ZOOM) if virtual
                    full_name += " (ZOOM)"
                para = doc.add_paragraph(f"{time_slot}\t\t{full_name}")
            
            # Set the font size and font family
            run = para.runs[0]
            run.font.size = Pt(16)
            run.font.name = 'Arial'
        
        # Save the document with the room name as filename
        doc.save(f"Outputs/Print-outs/Friday_{room}_meeting_schedule.docx")

# Generate documents
for room, room_data in friday_prints2.groupby('room_name'):
    create_word_doc(friday_prints2[friday_prints2['room_name'] == room])


In [12]:
# Create the break timeslot at 3:15
break_time = pd.Timestamp('2025-05-03 11:30:00')

# Create a dataset with it (should have 9 rows)
break_data = {
    'timeslot_start': [break_time] * len(final_satmorn_assignments2['room_name'].unique()),  # Create a break for each room
    'room_name': final_satmorn_assignments2['room_name'].unique(),  # Each room gets the break
    'First Name': ['BREAK'] * len(final_satmorn_assignments2['room_name'].unique()),  # 'Break' for all participants at the break slot
}

break_morn = pd.DataFrame(break_data)

# Create the break timeslot at 3:15
break_time = pd.Timestamp('2025-05-03 15:45:00')

# Create a dataset with it (should have 9 rows)
break_data = {
    'timeslot_start': [break_time] * len(final_sataft_assignments2['room_name'].unique()),  # Create a break for each room
    'room_name': final_sataft_assignments2['room_name'].unique(),  # Each room gets the break
    'First Name': ['BREAK'] * len(final_sataft_assignments2['room_name'].unique()),  # 'Break' for all participants at the break slot
}

break_after = pd.DataFrame(break_data)


In [13]:
# Add these break times to their respective datasets and sort by the timeslots in each room
sat_morn2 = pd.concat([final_satmorn_assignments2, break_morn], ignore_index=True).sort_values(by=['room_name', 'timeslot_start'])
sat_after2 = pd.concat([final_sataft_assignments2, break_after], ignore_index=True).sort_values(by=['room_name', 'timeslot_start'])


In [14]:
# Lastly, format the time variables how we want them:
sat_morn2['formatted_timeslot'] = sat_morn2['timeslot_start'].apply(format_timeslot)
sat_after2['formatted_timeslot'] = sat_after2['timeslot_start'].apply(format_timeslot)

Now let's actually run the code to output the print-outs.

In [15]:
def create_word_doc_for_day(morning_df, afternoon_df):
    for room in morning_df['room_name'].unique():
        doc = docx.Document()

        # Room header (title with pubtype1 + room_name)
        room_data_no_break = morning_df[morning_df['First Name'] != 'BREAK']
        publisher = room_data_no_break['publisher'].iloc[0]  # First valid publisher

        room_header = doc.add_heading(f"{publisher} {room}", level=1)
        room_header.alignment = 1  # Center-align the header
        room_header.paragraph_format.space_after = 0  # Remove space after header
        room_header.paragraph_format.space_before = 0  # Remove space before header
        run1 = room_header.runs[0]
        run1.font.size = Pt(20)
        run1.font.name = 'Arial'
        run1.font.color.rgb = RGBColor(0, 0, 0)  # RGB for black

        
        # Add subheader "Critique Schedule"
        title2 = doc.add_heading("Manuscript Critique Schedule", level=2)
        #doc.add_paragraph()  # Add paragraph break
        run = title2.runs[0]
        run.font.size = Pt(16)
        run.font.name = 'Arial'
        run.font.color.rgb = RGBColor(0, 0, 0)  # RGB for black

        
        # Add morning schedule (ms_data) and handle break times
        morning_data = morning_df[morning_df['room_name'] == room]
        for _, row in morning_data.iterrows():
            time_slot = row['formatted_timeslot']
            if time_slot == "11:30-11:45":  # Break time for morning session
                full_name = "BREAK"
                para = doc.add_paragraph(f"{time_slot}\t\t{full_name}")
                para.runs[0].bold = True  # Make "Break" bold
            else:
                full_name = f"{row['First Name']} {row['Last Name']}"
                if pd.isna(row['Last Name']):
                    full_name = row['First Name']
                if row['virtual'] == 'Virtual':  # Add (ZOOM) if virtual
                    full_name += " (ZOOM)"
                para = doc.add_paragraph(f"{time_slot}\t\t{full_name}")
            
            para.runs[0].font.size = Pt(16)
            para.runs[0].font.name = 'Arial'
            para.paragraph_format.space_after = 0  # Remove space after each paragraph
            para.paragraph_format.space_before = 0  # Remove space before each paragraph
       
        
        # Add paragraph break before "Pitch Schedule"
        #doc.add_paragraph()  # Add paragraph break
        
        # Add subheader "Pitch Schedule"
        title3 = doc.add_heading("Pitch Schedule", level=2)
        run2 = title3.runs[0]
        run2.font.size = Pt(16)
        run2.font.name = 'Arial'
        run2.font.color.rgb = RGBColor(0, 0, 0)  # RGB for black


        # Add afternoon schedule (pitch_data) and handle break times
        afternoon_data = afternoon_df[afternoon_df['room_name'] == room]
        for _, row in afternoon_data.iterrows():
            time_slot = row['formatted_timeslot']
            if time_slot == "3:45-4:00":  # Break time for afternoon session
                full_name = "BREAK"
                para = doc.add_paragraph(f"{time_slot}\t\t\t{full_name}")
                para.runs[0].bold = True  # Make "Break" bold
            else:
                full_name = f"{row['First Name']} {row['Last Name']}"
                if pd.isna(row['Last Name']):
                    full_name = row['First Name']
                if row['virtual'] == 'Virtual':  # Add (ZOOM) if virtual
                    full_name += " (ZOOM)"
                para = doc.add_paragraph(f"{time_slot}\t\t\t{full_name}")
            
            para.runs[0].font.size = Pt(16)
            para.runs[0].font.name = 'Arial'
            para.paragraph_format.space_after = 0  # Remove space after each paragraph
            para.paragraph_format.space_before = 0  # Remove space before each paragraph

        # Add footer with event info in blue and bold
        section = doc.sections[-1]
        footer = section.footer
        footer_paragraph = footer.paragraphs[0]
        footer_paragraph.text = "Giveaway Drawings & Award Ceremony in College Park Ballroom at 5:50pm"
        footer_paragraph.runs[0].bold = True
        footer_paragraph.runs[0].font.color.rgb = RGBColor(0, 0, 0)  # Set color to black

        # Save the document with the room name as filename
        doc.save(f"Outputs/Print-outs/Saturday_{publisher}_{room}_meeting_schedule.docx")

# Example usage: assuming morning_df and afternoon_df are your datasets
# Generate documents for each room
for room in sat_morn2['room_name'].unique():
    create_word_doc_for_day(sat_morn2, sat_after2)


Next, we'll 4) print out the individual agent and editor schedules for Friday.

In [16]:
#pip install pandas python-docx docxtpl

In [17]:
# We have templates for this particular one. Let's load the required packages
import pandas as pd
from docxtpl import DocxTemplate
import os

In [18]:
# Fix the dataset so that all the pubname1s are editors, and all pubname2s are agents
def correct_row(row):
    if row['pubtype1'] == 'Agent' and row['pubtype2'] == 'Editor':
        # Swap the names and types
        row['pubname1'], row['pubname2'] = row['pubname2'], row['pubname1']
        row['pubtype1'], row['pubtype2'] = row['pubtype2'], row['pubtype1']
    return row

# Apply the correction to all rows
corrected_data = final_room_pairings_Friday.apply(correct_row, axis=1)

Now let's run the actual code.

In [19]:
def generate_schedule(row, template_path, output_dir, pubtype):
    doc = DocxTemplate(template_path)
    context = {
        "pubname1": row["pubname1"],
        "pubname2": row["pubname2"],
        "pubtype1": row["pubtype1"],
        "pubtype2": row["pubtype2"],
        "room_name": row["room_name"]
    }
    if pubtype == 'Agents':
        output_path = os.path.join(output_dir, f"Friday_{row['pubname2']}_schedule.docx")
    if pubtype == "Editors":
        output_path = os.path.join(output_dir, f"Friday_{row['pubname1']}_schedule.docx")

    doc.render(context)
    doc.save(output_path)

# Specify output directory
output_dir = "Outputs/Print-outs"


In [20]:
# Generate schedules for editors
for _, row in corrected_data.iterrows():
    generate_schedule(row, "Templates/Friday_editor_schedule.docx", output_dir, "Editors")

In [21]:
# Generate schedules for agents
for _, row in corrected_data.iterrows():
    generate_schedule(row, "Templates/Friday_agent_schedule.docx", output_dir, "Agents")

Awesome! Now let's do it for Saturday (which is a little easier).

In [22]:
def generate_sat_schedule(row, output_dir):

    if row['lit_guest_type'] == 'Agent':
        template_path = "Templates/Saturday_agent_schedule.docx"
    else:
        template_path = "Templates/Saturday_editor_schedule.docx"

    doc = DocxTemplate(template_path)
    
    context = {
        "lit_guest_name": row["lit_guest_name"],
        "lit_guest_type": row["lit_guest_type"],
        "room_name": row["room_name"]
    }

    output_path = os.path.join(output_dir, f"Saturday_{row['lit_guest_name']}_schedule.docx")

    doc.render(context)
    doc.save(output_path)

# Specify output directory
output_dir = "Outputs/Print-outs"

# Generate schedules
for _, row in final_saturday_rooms.iterrows():
    generate_sat_schedule(row, output_dir)


Lastly for printouts, we need to print out the friday and saturday schedules for the College Park Ballroom (the miniseminars and workshops).

In [23]:
# Load the data

fri_talks = pd.read_excel('List_of_genres_agents_editors.xlsx', sheet_name='minis_fri')
sat_talks = pd.read_excel('List_of_genres_agents_editors.xlsx', sheet_name='minis_sat')

# Set the conference dates
date_str_fri = '2025-05-02'
date_str_sat = '2025-05-03'


In [24]:
# We need to correct the timeslots to the correct dates and times for both
fri_talks['timeslot_start'] = pd.to_datetime(date_str_fri + ' ' + fri_talks['timeslot_start'].astype(str))
fri_talks['timeslot_end'] = pd.to_datetime(date_str_fri + ' ' + fri_talks['timeslot_end'].astype(str))

# Adjust the times to represent the afternoon (add 12 hours if in AM range)
fri_talks['timeslot_start'] = fri_talks['timeslot_start'].apply(
    lambda x: x + pd.Timedelta(hours=12) if x.hour < 9 else x
)
fri_talks['timeslot_end'] = fri_talks['timeslot_end'].apply(
    lambda x: x + pd.Timedelta(hours=12) if x.hour < 9 else x
)

In [25]:
sat_talks['timeslot_start'] = pd.to_datetime(date_str_sat + ' ' + sat_talks['timeslot_start'].astype(str))
sat_talks['timeslot_end'] = pd.to_datetime(date_str_sat + ' ' + sat_talks['timeslot_end'].astype(str))

# Adjust the times to represent the afternoon (add 12 hours if in AM range)
sat_talks['timeslot_start'] = sat_talks['timeslot_start'].apply(
    lambda x: x + pd.Timedelta(hours=12) if x.hour < 9 else x
)
sat_talks['timeslot_end'] = sat_talks['timeslot_end'].apply(
    lambda x: x + pd.Timedelta(hours=12) if x.hour < 9 else x
)

In [26]:
template_path = "Templates/Schedules for Posting - Friday Talks.docx"  # Path to your Word template
doc = DocxTemplate(template_path)

# Create the context
context = {
    "mini1": fri_talks.loc[0, "topic"],
    "speaker": fri_talks.loc[0, "speaker"],
    "designation": fri_talks.loc[0, "designation"],
    "mini2": fri_talks.loc[1, "topic"],
    "workshop": fri_talks.loc[2, "topic"],
}

# Render the template with the context
doc.render(context)

# Save the populated document
output_path = "Outputs/Print-outs/Friday_Schedule_CollegeParkBallrooms.docx"
doc.save(output_path)

In [27]:
template_path = "Templates/Schedules for Posting - Saturday Talks.docx"  # Path to your Word template
doc = DocxTemplate(template_path)

# Create the context
context = {
    "mini1": sat_talks.loc[0, "topic"],
    "mini2": sat_talks.loc[1, "topic"],
    "mini3": sat_talks.loc[2, "topic"],
    "mini4": sat_talks.loc[3, "topic"],
    "speaker1": sat_talks.loc[0, "speaker"],
    "designation1": sat_talks.loc[0, "designation"],
    "speaker2": sat_talks.loc[2, "speaker"],
    "designation2": sat_talks.loc[2, "designation"],
}

# Render the template with the context
doc.render(context)

# Save the populated document
output_path = "Outputs/Print-outs/Saturday_Schedule_CollegeParkBallrooms.docx"
doc.save(output_path)

# Individual Participant Schedules

Before we do anything, since we've already dealt with the manuscript critiques, pitches and query critiques, let's drop those rows from the registered dataset.

In [28]:
fri_workshop = registered[registered['Agenda Item Name']=='Friday Workshop- Writer Beware: How Writers Can Protect Themselves']

import datetime

directory = 'May2025_reports'
most_recent_file = max(
    (f for f in os.listdir(directory) if f.startswith('Allparticipants_') and f.endswith('.csv')),
    key=lambda x: datetime.datetime.strptime(x.split('_')[1].split('.')[0], '%m-%d-%y'),
)

# Load the most recent file
most_recent_path = os.path.join(directory, most_recent_file)
all_participants = pd.read_csv(most_recent_path)
del(most_recent_file, most_recent_path)

all_participants = all_participants.rename(columns={'Email Address':'Email'})
virtual_only = all_participants.loc[all_participants['Hotel vs. Zoom'] == 'Virtually via Zoom (only available for query letter critiques, manuscript sample critiques, and pitches)', :]

In [29]:
registered2 = registered.loc[(~(registered['Agenda Item Name'].str.contains('Query Letter Critique')) & 
                             ~(registered['Agenda Item Name'].str.contains('Manuscript Critique')) &
                             ~(registered['Agenda Item Name'].str.contains('Pitch [A-Z]'))), :]

Let's also drop the pre-conference edits, since those have already happened prior to the event and is something George takes care of.

In [30]:
registered3= registered2.loc[(~(registered2['Agenda Item Name'].str.contains('Pre-conference Edit'))), :]

Let's change the 'Virtual' variable to match the 'virtual' variable found in our other datasets (which is formatted more nicely).

In [31]:
registered3['virtual'] = registered3['Virtual'].replace(['Virtually via Zoom (only available for query letter critiques, manuscript sample critiques, and pitches)', 'In person at the conference hotel'],
                                                              ['Virtual', 'In person'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Lastly, since this is for the schedules they'll be picking up at check-in, we can drop the check-in rows. (Also, now that we added the coaching activity, we need to drop that too)

In [32]:
registered3 = registered3.loc[~registered3['Agenda Item Name'].str.contains('Check-in|Coaching')]

Now let's separate out into Friday vs Saturday stuff.

In [33]:
print(registered3['Agenda Item Name'].value_counts())

Agenda Item Name
Award Ceremony & Prize Giveaway                                       172
Friday Mini-Seminars                                                  172
Friday Night Mixer                                                    172
Saturday Afternoon Mini-Seminars                                      172
Saturday Morning Mini-Seminars                                        172
Friday Publisher Q&A Panel                                             60
Saturday Agent Q&A Panel                                               58
Friday Workshop- Writer Beware: How Writers Can Protect Themselves     43
Book Fair Book Selling                                                 10
Name: count, dtype: int64


In [34]:
fri_activities = registered3.loc[(registered3['Agenda Item Name'].str.contains('Friday|Book')), :]

In [35]:
sat_activities = registered3.loc[registered3['Agenda Item Name'].str.contains('Saturday|Award'), :]

Awesome. Let's delete a few of these intermediate datasets and then move on.

In [36]:
del(registered3, registered2)

## Virtual participants

Thankfully, there's nothing to print for the virtual participants, so we can ignore them for now.

## In person participants

In order to print each individual participant's schedules for Friday and Saturday, we need to merge together all our different datasets, but doing so separately for Friday vs Saturday. We're going to restrict these two datasets to people who are IN PERSON.

**FRIDAY:**

1) Merge the coaching and query letter stuff together, plus information about whether they're doing the workshop, Q&A panel, mini-seminars, or book fair, friday night social, as well as any waitlist information for those activities. Note that check-in is the same for everyone, so we don't need to include that. It'll be in the template. 
2) We'll filter this full dataset to only in person participants. It's okay to have multiple rows per participant. We just need to make sure the core variables are all named the same, such as 'Session', 'publisher', 'room_name', 'timeslot_start'.
3) Once we have that, we'll work to compile it all and print it out to word using a template.

**SATURDAY:**

1) We'll merge the MS and pitch datasets together, along with the Q&A panel, the mini seminars, and the award ceremony. We'll also include any waitlists for those activities.
2) We'll filter this to in person only, and make sure all the variables have the same names in this combined dataset so we can more easily compile it.
3) We'll print it to word using a template.

In [37]:
# We need to change the session names so they better match what we want to print. We'll call this new variable 'Session' to match the other datasets
fri_activities['Session'] = fri_activities['Agenda Item Name'].replace(['Book Fair Book Selling', 'Friday Mini-Seminars', 'Friday Publisher Q&A Panel', 'Friday Workshop- Writer Beware: How Writers Can Protect Themselves'],
                                                              ['Selling your book(s) at the Book Fair', 'Mini-seminar', 'Publisher Q&A panel', 'Workshop, "Writer Beware: How Writers Can Protect Themselves"'])

**Note** for the above: the Book fair people are SELLERS, not buyers. We will apply the book fair to people's schedules as part of the word doc code.

In [38]:
sat_activities['Session']=sat_activities['Agenda Item Name'].replace(['Saturday Afternoon Mini-Seminars', 'Saturday Morning Mini-Seminars', 'Saturday Agent Q&A Panel'],
                                                                    ['Mini-seminar', 'Mini-seminar', 'Agent Q&A panel'])

We need to deal with the miniseminars. There's two, but in the data file it just shows one. For Friday and Saturday, let's identify anyone who's signed up for the mini-seminars and create a dataset with their rows for that

In [39]:
# Friday
reg_fri_minis = fri_activities.loc[fri_activities['Session']=='Mini-seminar'].merge(fri_talks.loc[fri_talks['type']=='miniseminar'], how="cross")

In [40]:
# Saturday - note that everyone who has the morning minisemiars also has the afternoon ones, so we don't need to keep both rows and can just link each person once to the four talks
reg_sat_minis = sat_activities.loc[sat_activities['Agenda Item Name']=='Saturday Afternoon Mini-Seminars'].merge(sat_talks.loc[sat_talks['type']=='miniseminars'], how="cross")

In [41]:
# Okay last thing before change is to combine the 'designation' and 'speaker' information into a 'publisher' column.
reg_fri_minis['publisher'] = reg_fri_minis['designation'] + ", " + reg_fri_minis['speaker'] + ", on '" + reg_fri_minis['topic'] + "'"

In [42]:
reg_sat_minis['publisher'] = reg_sat_minis['designation'] + ", " + reg_sat_minis['speaker'] + ", on '" + reg_sat_minis['topic'] + "'"

In [43]:
# Now let's clean up both those datasets so they'll be ready to merge later
reg_fri_minis = reg_fri_minis[['Email', 'First Name', 'Last Name', 'Session', 'virtual', 'phone', 'room_name', 'timeslot_start', 'timeslot_end', 'publisher']]

In [44]:
reg_sat_minis = reg_sat_minis[['Email', 'First Name', 'Last Name', 'Session', 'virtual', 'phone', 'room_name', 'timeslot_start', 'timeslot_end', 'publisher']]

Okay, let's do teh same thing for the workshop. Specifically, we want to merge the single workshop information with the participants in the workshops.

In [45]:
reg_workshop = fri_activities.loc[fri_activities['Agenda Item Name'].str.contains('Workshop')].merge(fri_talks.loc[fri_talks['type']=='Friday_workshop'], how="cross")
reg_workshop['publisher'] = reg_workshop['designation'] + " " + reg_workshop['speaker']
reg_workshop = reg_workshop[['Email', 'First Name', 'Last Name', 'Session', 'virtual', 'phone', 'room_name', 'timeslot_start', 'timeslot_end', 'publisher']]

Awesome! Now let's remove those sessions from the friday and saturday activities.

In [46]:
fri_activities = fri_activities.loc[~(fri_activities['Session'] =='Mini-seminar') & ~(fri_activities['Session'].str.contains('Workshop'))]
sat_activities = sat_activities.loc[~(sat_activities['Session'] =='Mini-seminar')]

Okay, now we need to clean up the fri_activities and sat_activities datasets so we can merge them with the other stuff for those days. Specifically, we need to add timeslot_start and timeslot_end to everything, as well as room_name. We'll leave 'publisher' blank for these data.

In [47]:
print(fri_activities['Session'].value_counts())

Session
Friday Night Mixer                       172
Publisher Q&A panel                       60
Selling your book(s) at the Book Fair     10
Name: count, dtype: int64


Let's add in the timeslot start and end times for everyone for the Friday and Saturday activities

In [48]:
fri_activities['timeslot_start'] = fri_activities['Session'].replace(['Publisher Q&A panel', 'Friday Night Mixer', 'Selling your book(s) at the Book Fair'],
                                                                    [pd.to_datetime(date_str_fri + ' 12:30'), pd.to_datetime(date_str_fri + ' 20:00'), pd.to_datetime(date_str_fri + ' 11:00')])

fri_activities['timeslot_end'] = fri_activities['Session'].replace(['Publisher Q&A panel', 'Friday Night Mixer', 'Selling your book(s) at the Book Fair'],
                                                                   [pd.to_datetime(date_str_fri + ' 1:30'), pd.to_datetime(date_str_fri + ' 23:00'), pd.to_datetime(date_str_fri + ' 16:00')])



In [49]:
print(sat_activities['Session'].value_counts())

Session
Award Ceremony & Prize Giveaway    172
Agent Q&A panel                     58
Name: count, dtype: int64


In [50]:
sat_activities['timeslot_start'] = sat_activities['Session'].replace(['Agent Q&A panel', 'Award Ceremony & Prize Giveaway'],
                                                                   [pd.to_datetime(date_str_sat + ' 09:00'), pd.to_datetime(date_str_sat + ' 17:45')])
sat_activities['timeslot_end'] = sat_activities['Session'].replace(['Agent Q&A panel', 'Award Ceremony & Prize Giveaway'],
                                                                   [pd.to_datetime(date_str_sat + ' 10:00'), pd.to_datetime(date_str_sat + ' 18:30')])



Lastly, let's add room info for these activities.

In [51]:
fri_activities['room_name'] = fri_activities['Session'].replace(['Publisher Q&A panel', 'Friday Night Mixer', 'Selling your book(s) at the Book Fair'],
                                                                    ['College Park Ballroom', 'Candler Room on the 1st floor near the restaurant', 'Peachtree City'])


In [52]:
sat_activities['room_name'] ='College Park Ballroom'

Lastly, let's add a 'publisher', which is really just going to be a description of these events for the template.

In [53]:
fri_activities['publisher'] = fri_activities['Session'].replace(['Publisher Q&A panel', 'Friday Night Mixer', 'Selling your book(s) at the Book Fair'],
                                                                    ['', 'a cash bar, networking-bingo icebreaker and music', ''])


In [54]:
sat_activities['publisher'] = sat_activities['Session'].replace(['Agent Q&A panel', 'Award Ceremony & Prize Giveaway'],
                                                                   ['', 'prize giveaways, such as free lifetime Atlanta Writers Club memberships, followed by each agent and editor awarding certificates to participants for best manuscript sample submitted for critique and best pitch'])

Okay, now we need to fix up the other datasets a teensy bit. Specifically, we need to add 'timeslot_end' to all of them.

In [55]:
final_friday_assignments2['timeslot_end'] = final_friday_assignments2['timeslot_start'] + pd.Timedelta(minutes=15)
final_sataft_assignments2['timeslot_end'] = final_sataft_assignments2['timeslot_start'] + pd.Timedelta(minutes=15)
final_satmorn_assignments2['timeslot_end'] = final_satmorn_assignments2['timeslot_start'] + pd.Timedelta(minutes=15)
coaching_schedule['timeslot_end'] = coaching_schedule['timeslot_start'] + pd.Timedelta(minutes=15) # Note: for the participants, it's 15 minutes. For the actual schedule, there's a 2 minute break after this so the slots are 17 mins each

Okay, next-to-last step is to combine everything into a single 'friday' and 'saturday' dataset with all their activities on these days.

In [56]:
# Friday needs to combine the coaching sessions, the query letter critiques, the mini seminars, the workshop, and the friday night mixer
all_friday = pd.merge(pd.merge(pd.merge(pd.merge(fri_activities, final_friday_assignments2, how="outer"), 
                        coaching_schedule, how="outer"), reg_workshop, how="outer"), reg_fri_minis, how="outer")

In [57]:
# Saturday needs to combine the Q&A panel, the mini seminars, the award ceremony, and people's pitches and MS critiques
all_saturday = pd.merge(pd.merge(pd.merge(final_sataft_assignments2, final_satmorn_assignments2, how="outer"), reg_sat_minis, how='outer'), sat_activities, how='outer')

And now the final step is to drop any virtual people from these lists, and keep only the relevant variables

In [58]:
all_friday_inperson = all_friday.loc[all_friday['virtual']=='In person']
all_saturday_inperson = all_saturday.loc[all_saturday['virtual']=='In person']

Lastly, let's drop it down to only the variables we need

In [59]:
all_friday_inperson = all_friday_inperson.loc[:, ['Email', 'First Name', 'Last Name', 'phone', 'Session', 'timeslot_start', 'timeslot_end', 'room_name', 'publisher']]
all_saturday_inperson = all_saturday_inperson.loc[:, ['Email', 'First Name', 'Last Name', 'phone', 'Session', 'timeslot_start', 'timeslot_end', 'room_name', 'publisher']]

Okay, very last thing: we need to add in a row for the Book fair for everyone Friday, and add in lunch for everyone on saturday.

In [60]:
bookfair = all_friday_inperson.drop_duplicates(subset='Email', keep='first')
bookfair['Session'] = 'Book fair'
bookfair['room_name'] = 'Peachtree City'
bookfair['timeslot_start'] = pd.to_datetime(date_str_fri + ' 11:00')
bookfair['timeslot_end'] = pd.to_datetime(date_str_fri + ' 16:00')
bookfair['publisher'] = 'published authors from the Atlanta Writers Club. Swing by to chat with them about their writing journey, hear more about their books, and buy signed copies.'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

In [61]:
lunch = all_saturday_inperson.drop_duplicates(subset='Email', keep='first')
lunch['Session'] = "Lead sponsor presentation/Free time"
lunch['timeslot_start'] = pd.to_datetime(date_str_sat + ' 13:00')
lunch['timeslot_end'] = pd.to_datetime(date_str_sat + ' 14:00')
lunch['room_name'] = 'College Park Ballroom'
lunch['publisher'] = ''

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

In [62]:
# Add these back in
all_friday_inperson2 = pd.merge(all_friday_inperson, bookfair, how="outer")
all_saturday_inperson2 = pd.merge(all_saturday_inperson, lunch, how="outer")

Awesome! And now last step is to sort by person (aka email) and timeslot_start, so that everyone's rows are already ordered.

In [63]:
all_friday_inperson2= all_friday_inperson2.sort_values(['Email', 'timeslot_start'])
all_saturday_inperson2= all_saturday_inperson2.sort_values(['Email', 'timeslot_start'])

Okay, and now last thing: we need to separate out the waitlist information by Friday vs Saturday, and ideally, sort by their waitlist ranking.

In [64]:
wait_all['Waitlist_Number'] = wait_all['Agenda Item Name'].str.extract(r'Waitlist #(\d+)').astype(int)
wait_fri = wait_all.loc[wait_all['Agenda Item Name'].str.contains('Query|Coach')].sort_values(['Email', 'Waitlist_Number'])
wait_sat = wait_all.loc[wait_all['Agenda Item Name'].str.contains('Pitch|Manuscript')].sort_values(['Email', 'Waitlist_Number'])

### Print out the in-person schedules

I initially made it a bulleted list. However, that didn't format very nicely, so I tried a table code instead, which I think looks nicer. Below is for friday.

In [65]:
# Define the phone note:
def phone_note(phone):
    return f"We have your phone number listed as {phone}. If this is not your number, please tell a check-in table volunteer so we can update it, as send automated text reminders in advance of any meetings. **The text will show a 678 area code.**"

##### Friday

In [66]:
from docx import Document
from docx.shared import Cm
from docx.oxml.ns import qn
from docx.oxml import OxmlElement
from docx.shared import Pt, RGBColor, Inches
from docx.enum.style import WD_STYLE_TYPE
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT

def create_word_doc_with_table(participant_df, waitlist_df):
    # Create a single Word document for all participants
    doc = Document()

    # Set default styles for the document
    styles = doc.styles

    # Modify the Normal style for non-header text
    normal_style = styles['Normal']
    normal_style.font.name = 'Arial'
    normal_style.font.size = Pt(12)

    # Modify the default style for tables
    table_style = styles.add_style('CustomTable', WD_STYLE_TYPE.TABLE)
    table_style.font.name = 'Arial'
    table_style.font.size = Pt(12)

    # Merge the waitlist data with the participant session data
    merged_df = pd.merge(participant_df, waitlist_df[['Email', 'Agenda Item Name']], 
                         on='Email', how='left', indicator=True)

    first_participant = True  # To track the first participant and skip initial page break
    initial_page_breaks = len(doc.element.xpath('//w:br[@w:type="page"]')) # create the initial page break amount

    flag_friday = [] # We'll do this to identify anyone whose schedule is more than 1 page
    friday_processed = []  # Creating this list so we can easiliy identify all participants who were output in the word doc

    for _, participant in participant_df.drop_duplicates(subset=['Email']).iterrows():

        # Add participant's email to the processed list
        friday_processed.append(participant['Email'])

        if not first_participant:
            doc.add_page_break()  # Start new page for subsequent participants
        first_participant = False

        # Add header with the participant's name
        header = doc.add_heading(f"{participant['First Name']} {participant['Last Name']}", level=1)
        header.alignment = 1  # Center alignment
        for run in header.runs:
            run.font.size = Pt(36)
            run.font.name = 'Arial'
            run.font.color.rgb = RGBColor(0, 0, 0)  # Set header to black

        doc.add_paragraph("") # Add paragraph break to get more space

        # Add session section header
        session_header = doc.add_heading("Your schedule for Friday, May 2nd:", level=2)
        for run in session_header.runs:
            run.font.color.rgb = RGBColor(0, 0, 0)  # Set session header to black
            run.font.name = 'Arial'
            run.font.size = Pt(14)
        session_header.alignment = 1 # center alignment

        # Add paragraph break after session_header
        small_break = doc.add_paragraph("")

        # Create a table for the schedule
        table = doc.add_table(rows=0, cols=2)
        table.autofit = False

        # Set column widths manually
        times_column_width = Inches(1.75) 
        details_column_width = Inches(5) 

        # Add rows to the table
        include_phone_note = False

        # Note that in the below, the merged_df has duplicated rows for any participants with waitlist spots (because we cross-merged). Let's delete those duplicate rows for this part
        for _, session in merged_df[merged_df['Email'] == participant['Email']].drop_duplicates(subset=['Session', 'room_name', 'Email', 'publisher', 'timeslot_start', 'timeslot_end']).iterrows():

            timeslot = f"{session['timeslot_start'].strftime('%I:%M %p')} - {session['timeslot_end'].strftime('%I:%M %p')}"
            if session['publisher']:  # Check if the publisher exists and is not empty
                details = f"{session['Session']} in {session['room_name']} with {session['publisher']}"
            else:
                details = f"{session['Session']} in {session['room_name']}"

            row = table.add_row()
            row.cells[0].text = timeslot
            row.cells[1].text = details

            # Apply widths to each added row as well
            row.cells[0].width = times_column_width
            row.cells[1].width = details_column_width 

            # Check if the session includes "Query" or "Coaching"
            if 'Query' in session['Session'] or 'Coaching' in session['Session']:
                include_phone_note = True

        # Add waitlist information if applicable
        waitlist_sessions = merged_df[(merged_df['Email'] == participant['Email']) & (merged_df['_merge'] == 'both')].drop_duplicates(subset=['Agenda Item Name', 'Email'])
        if not waitlist_sessions.empty:
            waitlist_header = doc.add_paragraph("You're currently waitlisted for:", style='Heading 3')
            for run in waitlist_header.runs:
                run.font.color.rgb = RGBColor(0, 0, 0)  # Set waitlist header to black
                run.font.name = 'Arial'
                run.font.size = Pt(14)

            for _, waitlist in waitlist_sessions.iterrows():
                waitlist_info = f"- {waitlist['Agenda Item Name']}"
                paragraph = doc.add_paragraph(waitlist_info, style='List Bullet')
                paragraph.paragraph_format.left_indent = Cm(2)  # Indent this a little bit

        # Add phone note and footer text if applicable
        if include_phone_note:
            note = doc.add_heading("Important note about your meeting(s):", level=2)
            for run in note.runs:
                run.font.color.rgb = RGBColor(0, 0, 0)  # Set phone note header to black
                run.font.name = 'Arial'
                run.font.size = Pt(14)

            doc.add_paragraph(phone_note(participant['phone']), style='Normal')

            # Add italicized footer text
            footer_text = doc.add_paragraph(
                "Please arrive 15 minutes early for any query letter critiques or coaching sessions."
            )
            footer_text.runs[0].italic = True  # Make the text italicized

        # Track the final page break count
        final_page_breaks = len(doc.element.xpath('//w:br[@w:type="page"]'))

        # If two or more page breaks were added during this participant's content, flag them
        if (final_page_breaks - initial_page_breaks) >= 2:
            flag_friday.append(f"{participant['First Name']} {participant['Last Name']}")

    # Adjust document margins to fit all content on one page
    section = doc.sections[0]
    section.top_margin = Inches(0.5)
    section.bottom_margin = Inches(0.5)
    section.left_margin = Inches(0.75)
    section.right_margin = Inches(0.75)

    # Save the document
    doc.save("Outputs/Print-outs/Friday_In-person_participant_schedules.docx")

    friday_processed = pd.DataFrame(friday_processed, columns=["Email"])
    return(friday_processed)

    # Let's also print out participants whose content went over a page
    if flag_friday:
        print("The following participants have schedules exceeding one page:")
        for name in flag_friday:
            print(name)

# Create the Word doc for each participant in the dataset
processed_friday = create_word_doc_with_table(all_friday_inperson2, wait_fri)


In [67]:
# Check that we processed all the peopel we should have
len(all_friday_inperson2.drop_duplicates(subset='Email', keep='first')) == len(processed_friday)

True

Awesome! It processed everybody it should've :)

##### Saturday

And now for **Saturday!**

In [68]:
def create_word_doc_saturday(participant_df, waitlist_df):
    # Create a single Word document for all participants
    doc = Document()

    # Set default styles for the document
    styles = doc.styles

    # Modify the Normal style for non-header text
    normal_style = styles['Normal']
    normal_style.font.name = 'Arial'
    normal_style.font.size = Pt(12)

    # Modify the default style for tables
    table_style = styles.add_style('CustomTable', WD_STYLE_TYPE.TABLE)
    table_style.font.name = 'Arial'
    table_style.font.size = Pt(12)

    # Merge the waitlist data with the participant session data
    merged_df = pd.merge(participant_df, waitlist_df[['Email', 'Agenda Item Name']], 
                         on='Email', how='left', indicator=True)

    first_participant = True  # To track the first participant and skip initial page break

    flag_saturday = [] # We'll do this to identify anyone whose schedule is more than 1 page
    initial_page_breaks = len(doc.element.xpath('//w:br[@w:type="page"]')) # create the initial page break amount
    saturday_processed = []  # Creating this list so we can easiliy identify all participants who were output in the word doc

    # Add a  header for everyone
    for section in doc.sections:
        header = section.header
        paragraph = header.paragraphs[0] if header.paragraphs else header.add_paragraph()
        paragraph.text = "Schedule for Saturday, May 3rd"
        paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER  # Center-align the header text

        # Set the font and size for the header
        run = paragraph.runs[0]
        run.font.name = 'Arial'
        run.font.size = Pt(10)

    for _, participant in participant_df.drop_duplicates(subset=['Email']).iterrows():

        # Add participant's email to the processed list
        saturday_processed.append(participant['Email'])

        if not first_participant:
            doc.add_page_break()  # Start new page for subsequent participants
        first_participant = False

        # Add header with the participant's name
        header = doc.add_heading(f"{participant['First Name']} {participant['Last Name']}", level=1)
        header.alignment = 1  # Center alignment
        for run in header.runs:
            run.font.size = Pt(28)
            run.font.name = 'Arial'
            run.font.color.rgb = RGBColor(0, 0, 0)  # Set header to black

        doc.add_paragraph("") # Add paragraph break to get more space

        # Add session section header
        #session_header = doc.add_heading("Your schedule for Saturday, May 3rd:", level=2)
        #for run in session_header.runs:
        #    run.font.color.rgb = RGBColor(0, 0, 0)  # Set session header to black
        #    run.font.name = 'Arial'
        #    run.font.size = Pt(14)
        #session_header.alignment = 1 # center alignment

        # Add paragraph break after session_header
        #doc.add_paragraph("") # can't do this for Saturday because there's too much text

        # Create a table for the schedule
        table = doc.add_table(rows=0, cols=2)
        table.autofit = False

        # Set column widths manually
        times_column_width = Inches(1.75) 
        details_column_width = Inches(5) 

        # Add rows to the table
        include_phone_note = False
        
        # Note that in the below, the merged_df has duplicated rows for any participants with waitlist spots (because we cross-merged). Let's delete those duplicate rows for this part
        for _, session in merged_df[merged_df['Email'] == participant['Email']].drop_duplicates(subset=['Session', 'room_name', 'Email', 'publisher', 'timeslot_start', 'timeslot_end']).iterrows():

            timeslot = f"{session['timeslot_start'].strftime('%I:%M %p')} - {session['timeslot_end'].strftime('%I:%M %p')}"
            if session['publisher']:  # Check if the publisher exists and is not empty
                details = f"{session['Session']} in {session['room_name']} with {session['publisher']}"
            else:
                details = f"{session['Session']} in {session['room_name']}"

            row = table.add_row()
            row.cells[0].text = timeslot
            row.cells[1].text = details

            # Apply widths to each added row as well
            row.cells[0].width = times_column_width
            row.cells[1].width = details_column_width 

            # Check if the session includes "Query" or "Coaching"
            if 'Pitch' in session['Session'] or 'Manuscript' in session['Session']:
                include_phone_note = True

        # Add waitlist information if applicable
        waitlist_sessions = merged_df[(merged_df['Email'] == participant['Email']) & (merged_df['_merge'] == 'both')].drop_duplicates(subset=['Agenda Item Name', 'Email'])
        if not waitlist_sessions.empty:
            waitlist_header = doc.add_paragraph("You're currently waitlisted for:", style='Heading 3')
            for run in waitlist_header.runs:
                run.font.color.rgb = RGBColor(0, 0, 0)  # Set waitlist header to black
                run.font.name = 'Arial'
                run.font.size = Pt(14)

            for _, waitlist in waitlist_sessions.iterrows():
                waitlist_info = f"{waitlist['Agenda Item Name']}"
                paragraph = doc.add_paragraph(waitlist_info, style='List Bullet')
                paragraph.paragraph_format.left_indent = Cm(2)  # Indent this a little bit


        # Add phone note and footer text if applicable
        if include_phone_note:
            note = doc.add_heading("Important note about your meeting(s) today:", level=2)
            for run in note.runs:
                run.font.color.rgb = RGBColor(0, 0, 0)  # Set phone note header to black
                run.font.name = 'Arial'
                run.font.size = Pt(14)

            doc.add_paragraph(phone_note(participant['phone']), style='Normal')

            # Add italicized footer text
            footer_text = doc.add_paragraph(
                "Please arrive 15 minutes early for any manuscript critiques or pitches."
            )
            footer_text.runs[0].italic = True  # Make the text italicized

        # Track the final page break count
        final_page_breaks = len(doc.element.xpath('//w:br[@w:type="page"]'))

        # If two or more page breaks were added during this participant's content, flag them
        if (final_page_breaks - initial_page_breaks) >= 2:
            flag_saturday.append(f"{participant['First Name']} {participant['Last Name']}")


    # Adjust document margins to fit all content on one page
    section = doc.sections[0]
    section.top_margin = Inches(0.5)
    section.bottom_margin = Inches(0.5)
    section.left_margin = Inches(0.75)
    section.right_margin = Inches(0.75)

    # Save the document
    doc.save("Outputs/Print-outs/Saturday_In-person_participant_schedules.docx")

    saturday_processed = pd.DataFrame(saturday_processed, columns=["Email"])
    return(saturday_processed)

    # Let's also print out participants whose content went over a page
    if flag_saturday:
        print("The following participants have schedules exceeding one page:")
        for name in flag_saturday:
            print(name)

# Create the Word doc for each participant in the dataset
processed_saturday = create_word_doc_saturday(all_saturday_inperson2, wait_sat)


In [69]:
# Let's again double check that everyone was output who was supposed to be output:
len(all_saturday_inperson2.drop_duplicates(subset='Email', keep='first')) == len(processed_saturday)

True

Also, yay because nobody had more than 1 page!

In [70]:
del(break_data, break_time, cell, context, doc, header_cells, location, output_dir, output_path, pairing, pairings_doc, paragraph, room, row_cells, run, section,
    sections, table, tbl, tbl_borders, template_path, title, correct_row, create_word_doc, create_word_doc_for_day, create_word_doc_saturday, create_word_doc_with_table,
    Document, format_timeslot, generate_sat_schedule, generate_schedule, OxmlElement,  phone_note, qn, Cm, DocxTemplate, Inches, Pt, RGBColor, WD_PARAGRAPH_ALIGNMENT, 
    WD_STYLE_TYPE, break_after, break_df, break_morn, bookfair, corrected_data, lunch, sat_after2, sat_morn2, sat_talks, row,
    room_data, all_friday_inperson, all_saturday_inperson, fri_talks, friday_prints2, final_saturday_rooms, final_room_pairings_Friday)

# Waitlist Schedules - TODO

In this section, we'll create the waitlist printouts that go on the walls. 

# TablesReady Export

We need to create a very specific csv file to import into TablesReady. It needs to have the following:

* Date --> ex 10/1/2024
* Time --> 6:30pm (no spaces!)
* Name --> do first and last name
* Size --> set this to 1 for everyone
* Phone --> 10-digit code
* Notes --> not necessarily needed, but will put the session tag that needs the reminder

NOTE: We only send automated text reminders for the QLC, author coaching, manuscript critiques and pitches. This csv file should be restricted to those participants. We will also send it to both virtual and in-person people.

In [71]:
both_days = pd.merge(all_friday_inperson2, all_saturday_inperson2, how="outer")

# Keep only the rows with the 4 activities, and keep only certain columns
both_days = both_days.loc[both_days['Session'].str.contains('Pitch|Critique|Coach|Manuscript'), ]

In [72]:
both_days['Name'] = both_days['First Name'] + " " + both_days['Last Name']
both_days['Size'] = 1
both_days['Notes'] = both_days['Session'] + " with " + both_days['publisher']
both_days['Time'] = both_days['timeslot_start'].dt.strftime('%I:%M%p').str.lower()
both_days['Date'] = both_days['timeslot_start'].dt.strftime("%m/%d/%Y")
both_days['Phone'] = both_days['phone']

both_days = both_days[['Date', 'Time', 'Name', 'Size', 'Phone', 'Notes']]

In [73]:
# Filter out anyone with missing phones (if applicable)
print(both_days.isnull().any()) #checks column-wise
print(both_days.isnull().values.any()) #checks entire DataFrame

both_days = both_days.dropna() # Drops 5 people


Date     False
Time     False
Name     False
Size     False
Phone     True
Notes    False
dtype: bool
True


In [74]:
# Now print to a csv for upload
today = datetime.date.today().strftime('%Y-%m-%d') # Let's save today's date for when writing excel files
both_days.to_csv(f"Outputs/For Mail Merge/TablesReady_import_csv_{today}.csv", index=False)

# Create Nametags

We need to create nametags for everyone! For this, we need to create the full list of people from the roster (including waitlist only), and simply print people's first and last names onto the page. We also want to put a little black dot at the bottom of everyone's nametags if they signed up for the Friday workshop.

In [75]:
# Create a nametags dataset that includes everyone who might be in person -  both waitlist-only and any registered people.
nametags = pd.merge(wait_all.loc[~wait_all['Email'].isin(virtual_only['Email']), ['First Name', 'Last Name', 'Email']], pd.merge(all_friday_inperson2, all_saturday_inperson2, how="outer"), how="outer").drop_duplicates(subset='Email', keep='first')

In [76]:
# Identify everyone who is in the Friday workshop so we can add a black dot
nametags['workshop'] = nametags['Email'].isin(fri_workshop['Email'])

In [77]:
# Create a single 'Name' field, and only keep that and the workshop thing
nametags['Name'] = nametags['First Name'] + " " + nametags['Last Name']
nametags =  nametags[['First Name', 'Last Name', 'workshop']]

In [104]:
# We're gonna just print this out for George... Couldn't manage to get it to work for whatever reason
nametags.to_excel('Outputs/Rosters/Name tags for mail merge.xlsx', index=False)

In [170]:
# Change all workshop things from True False to * or missing
nametags['flag'] = nametags['workshop'].apply(
    lambda x:  "*" if x else ""
)

Okay,  I spent literally 7 hours on this... I could not figure out how to just print the name tags using a template, or just printing to a word doc, etc. It drove me  insane and I finally had to give up.

# MailerLite Exports

Okay, so this is one is funkier. I tried originally making a single variable that was a list of items, but no matter what I tried, MailerLite will not display this nicely, so the only option is to export individual variables for everything that we need. So in this step, we're going to one giant dataset, for both virtual and in person people, and even including waitlist-only people, with EVERY activity (waitlisted and already paid). We will *exclude* any of the general stuff, like check-in, or the Friday night social, or the free mini-seminars.

These are the following "Fields" (aka variables) we will create for upload into MailerLite:

*General variables to enable email segments*:
- Virtual (True/False) --> We'll use this in MailerLite to create our different email segments
- Friday_Activities (True/False) --> Participants have registered activities on Friday. This field will have values even for the virtual people, but will **NOT** have values for the waitlist people.
- Saturday_Activities (True/False) --> Participants have registered activities on Saturday. This field will have values even for the virtual people, but will **NOT** have values for the waitlist people.
- Waitlisted (True/False) --> Participants with any waitlisted items. We'll use this to create a segment (when combined with 'No' above for Friday and Saturday activities) to identify participants who haven't paid for anything and who are ONLY on the waitlist.

*Variables to capture scheduling stuff*:

The below will appear as "1:00pm - Pitch with Wendy Wong in Board Room I", or "Waitlist #1 - Pitch with Wendy Wong"
- ms1, ms2, ms3 --> Participants can have up to 3 manuscript critiques
- wait_ms1, wait_ms2, wait_ms3 --> Participants can have up to 3 MS waitlist spots
- pitch1, pitch2, pitch3 
- wait_pitch1, wait_pitch2, wait_pitch3
- qlc1, qlc2 --> Participants can only sign up for two query letter critiques
- wait_qlc1, wait_qlc2 --> They can also only have two waitlist spots for this
- coach1, coach2 --> Participants can only sign up for two coaching spots
- wait_coach1, wait_coach2
- workshop
- QApanel_agent, QApanel_editor
- bookfair (sellers)
- Friday_mixer
- Friday_minis (miniseminars) - True/False
- Saturday_minis (miniseminars) - True/False

This results in a total of 27 variables, plus we need to keep email, first and last name, and phone number. (The latter three just for posterity's sake - we likely won't import those fields into MailerLite).

Once we have this massive datastep, the next step will be to import it into MailerLite by clicking 'Add Subscribers' and 'import using csv'. 


In [421]:
# Start with the dataset with ALL participants
all_conf_unique_participants = all_participants.drop_duplicates(subset='Email', keep='first')
all_conf_unique_participants = all_conf_unique_participants[['Email', 'First Name', 'Last Name']]

In [422]:
all_friday['timeslot'] = all_friday['timeslot_start'].dt.strftime('%I:%M %p') + " - " + all_friday['timeslot_end'].dt.strftime('%I:%M %p')
all_friday['details'] = all_friday.apply(lambda session: f"{session['Session']} in {session['room_name']} with {session['publisher']}" if session['publisher'] else f"{session['Session']} in {session['room_name']}", axis=1)
all_friday['output'] = all_friday['timeslot'] + ": " + all_friday['details']

In [423]:
all_conf_unique_participants = all_conf_unique_participants.merge(all_friday.loc[all_friday['Session'] == 'Workshop, "Writer Beware: How Writers Can Protect Themselves"', ['Email', 'output']], how='left').rename(columns={'output':'workshop'})

In [424]:
# Awesome, now we need to add each of these variables by session type into the dataset
all_conf_unique_participants = all_conf_unique_participants.merge(all_friday.loc[all_friday['Session'] == "Selling your book(s) at the Book Fair", ['Email', 'Session', 'output']], how='left').rename(columns={'output': 'bookfair'})


In [425]:
# Add in the different flag variables

    # Virtual flag
all_conf_unique_participants['virtual'] = all_conf_unique_participants['Email'].isin(virtual_only['Email'])

    # If have any Friday activities (regardless if virtual or not -  but NOT set if they're waitlist only)
all_conf_unique_participants['friday_activities'] = all_conf_unique_participants['Email'].isin(coaching_schedule['Email']) | all_conf_unique_participants['Email'].isin(final_friday_assignments2['Email']) | all_conf_unique_participants['bookfair'] | all_conf_unique_participants['Email'].isin(fri_workshop['Email']) | all_conf_unique_participants['Email'].isin(reg_fri_minis['Email'])

    # If have any saturday activities (regardless of if virtual or not - but NOT set if they're waitlist only)
all_conf_unique_participants['saturday_activities'] = all_conf_unique_participants['Email'].isin(final_satmorn_assignments2['Email']) | all_conf_unique_participants['Email'].isin(final_sataft_assignments2['Email']) | all_conf_unique_participants['Email'].isin(reg_sat_minis['Email'])

    # Identify anyone with waitlist items
all_conf_unique_participants['waitlisted'] = all_conf_unique_participants['Email'].isin(wait_all['Email'])

    # Identify anyone with registered activities
all_conf_unique_participants['registered'] = all_conf_unique_participants['Email'].isin(registered['Email'])

In [426]:
#Double check the friday and saturday stuff with registered
print(all_conf_unique_participants[['friday_activities', 'registered']].value_counts())
print(all_conf_unique_participants[['saturday_activities', 'registered']].value_counts())
print(all_conf_unique_participants['saturday_activities'].value_counts())
print(all_conf_unique_participants[['saturday_activities', 'friday_activities']].value_counts())


friday_activities  registered
True               True          171
Name: count, dtype: int64
saturday_activities  registered
True                 True          171
Name: count, dtype: int64
saturday_activities
True    171
Name: count, dtype: int64
saturday_activities  friday_activities
True                 True                 171
Name: count, dtype: int64


In [427]:
# Okay let's just double check this more precisely:
any_friday_activities = registered.loc[registered['Agenda Item Name'].str.contains('Query|Coaching|Friday|Fair'), ['Email', 'phone']].drop_duplicates()
any_saturday_activities = registered.loc[registered['Agenda Item Name'].str.contains('Pitch|Manuscript|Saturday'), ['Email', 'phone']].drop_duplicates()

In [428]:
# Okay cool cool - indeed, everyone who registered for Friday is also registered for Saturday.
del(any_friday_activities, any_saturday_activities)

In [429]:
# Repeat for the publisher Q&A Panel
all_conf_unique_participants = all_conf_unique_participants.merge(all_friday.loc[all_friday['Session'] == 'Publisher Q&A panel', ['Email', 'output']], how='left').rename(columns={'output':'QApanel_editor'})

In [430]:
# Repeat for the Friday night mixer
all_conf_unique_participants = all_conf_unique_participants.merge(all_friday.loc[all_friday['Session'] == 'Friday Night Mixer', ['Email', 'output']], how='left').rename(columns={'output':'Friday_mixer'})

In [431]:
print(all_friday['Session'].value_counts())

Session
Mini-seminar                                                     344
Friday Night Mixer                                               172
Query Letter Critiques                                            65
Publisher Q&A panel                                               60
Workshop, "Writer Beware: How Writers Can Protect Themselves"     43
Selling your book(s) at the Book Fair                             10
Author Coaching with Jessica Handler                               7
Author Coaching with Mickey Dubrow                                 4
Name: count, dtype: int64


In [432]:
# Okay, we'll ignore the mini seminars (we'll automatically include those in the templates based on certain criteria). But now let's deal with the query critiques. People can have up to 2 of them
all_conf_unique_participants = all_conf_unique_participants.merge(all_friday.loc[all_friday['Session'] == 'Query Letter Critiques', ['Email', 'output']].drop_duplicates(subset='Email', keep='first'), how='left').rename(columns={'output':'qlc1'})

In [433]:
all_conf_unique_participants = all_conf_unique_participants.merge(all_friday.loc[all_friday['Session'] == 'Query Letter Critiques', ['Email', 'output']].drop_duplicates(subset='Email', keep='last'), how='left').rename(columns={'output':'qlc2'})
# Now delete any qlc2 that equals qlc1
all_conf_unique_participants.loc[all_conf_unique_participants['qlc2'] == all_conf_unique_participants['qlc1'], 'qlc2'] = np.nan

In [434]:
# We're gonna create a single a single flag for people with miniseminars
all_conf_unique_participants['Friday_minis'] = all_conf_unique_participants['Email'].isin(reg_fri_minis['Email'])
all_conf_unique_participants['Saturday_minis'] = all_conf_unique_participants['Email'].isin(reg_sat_minis['Email'])

**<font color='red'> UNCOMMENT THIS CODE</font>**

In [435]:
# Let's do the waitlists for the query letters now:
# **NOTE**: There are none
#all_conf_unique_participants = all_conf_unique_participants.merge(wait_query['Email', 'output'].drop_duplicates(subset='Email', keep='first'), how='left').rename(columns={'output':'wl_qlc1'})

In [436]:
#all_conf_unique_participants = all_conf_unique_participants.merge(wait_query['Email', 'output'].drop_duplicates(subset='Email', keep='last'), how='left').rename(columns={'output':'wl_qlc2'})
#all_conf_unique_participants.loc[all_conf_unique_participants['wl_qlc2'] == all_conf_unique_participants['wl_qlc1'], 'wl_qlc2'] = np.nan

In [437]:
# Do the coaching waitlists now
#all_conf_unique_participants = all_conf_unique_participants.merge(wait_coach['Email', 'output'].drop_duplicates(subset='Email', keep='first'), how='left').rename(columns={'output':'wl_coach1'})
#all_conf_unique_participants = all_conf_unique_participants.merge(wait_coach['Email', 'output'].drop_duplicates(subset='Email', keep='last'), how='left').rename(columns={'output':'wl_coach2'})
#all_conf_unique_participants.loc[all_conf_unique_participants['wl_coach2'] == all_conf_unique_participants['wl_coach1'], 'wl_coach2'] = np.nan

In [438]:
# Create the coaching variables now
all_conf_unique_participants = all_conf_unique_participants.merge(all_friday.loc[all_friday['Session'].str.contains('Coaching'), ['Email', 'output']].drop_duplicates(subset='Email', keep='first'), how='left').rename(columns={'output':'coach1'})
all_conf_unique_participants = all_conf_unique_participants.merge(all_friday.loc[all_friday['Session'].str.contains('Coaching'), ['Email', 'output']].drop_duplicates(subset='Email', keep='last'), how='left').rename(columns={'output':'coach2'})
all_conf_unique_participants.loc[all_conf_unique_participants['coach2'] == all_conf_unique_participants['coach1'], 'coach2'] = np.nan

Now let's move on to adding in the saturday activities and info

In [439]:
all_saturday['timeslot'] = all_saturday['timeslot_start'].dt.strftime('%I:%M %p') + " - " + all_saturday['timeslot_end'].dt.strftime('%I:%M %p')
all_saturday['details'] = all_saturday.apply(lambda session: f"{session['Session']} in {session['room_name']} with {session['publisher']}" if session['publisher'] else f"{session['Session']} in {session['room_name']}", axis=1)
all_saturday['output'] = all_saturday['timeslot'] + ": " + all_saturday['details']

In [440]:
# Code for the Agent Q&A
all_conf_unique_participants = all_conf_unique_participants.merge(all_saturday.loc[all_saturday['Session'] == 'Agent Q&A panel', ['Email', 'output']], how='left').rename(columns={'output':'QApanel_agent'})

In [441]:
ms = all_saturday.loc[all_saturday['Session'] == 'Manuscript critique', ['Email', 'output']]

# we need to pivot this table so we can get a dataset with one row per person and 3 variables
ms['ms_index'] = ms.groupby('Email').cumcount() + 1  # This will create a 1, 2, 3 for each person

# Pivot the data, creating separate columns for ms1, ms2, ms3
pivoted_ms = ms.pivot_table(index='Email', columns='ms_index', values='output', aggfunc='first')

# Rename columns to ms1, ms2, ms3
pivoted_ms.columns = [f'ms{i}' for i in range(1, 4)]

# Reset index to bring 'Email' as a column
pivoted_ms = pivoted_ms.reset_index()

In [442]:
# Do the same for the pitches
pitches = all_saturday.loc[all_saturday['Session'] == 'Pitch', ['Email', 'output']]

# we need to pivot this table so we can get a dataset with one row per person and 3 variables
pitches['pitch_index'] = pitches.groupby('Email').cumcount() + 1  # This will create a 1, 2, 3 for each person

# Pivot the data, creating separate columns for ms1, ms2, ms3
pivoted_pitch = pitches.pivot_table(index='Email', columns='pitch_index', values='output', aggfunc='first')

# Rename columns to ms1, ms2, ms3
pivoted_pitch.columns = [f'pitch{i}' for i in range(1, 4)]

# Reset index to bring 'Email' as a column
pivoted_pitch = pivoted_pitch.reset_index()

In [443]:
# Do the same for the waitlisted pitches
wait_pitch = wait_all.loc[wait_all['Agenda Item Name'].str.contains('Pitch')]
wait_pitch['pitch_index'] = wait_pitch.groupby('Email').cumcount() + 1  # This will create a 1, 2, 3 for each person

# Pivot the data, creating separate columns for ms1, ms2, ms3
pivoted_wait_pitch = wait_pitch.pivot_table(index='Email', columns='pitch_index', values='Agenda Item Name', aggfunc='first')

# Rename columns to ms1, ms2, ms3
pivoted_wait_pitch.columns = [f'pitch{i}' for i in range(1, 4)]

# Reset index to bring 'Email' as a column
pivoted_wait_pitch = pivoted_wait_pitch.reset_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [444]:
# Do the same for the waitlisted manuscripts
wait_ms = wait_all.loc[wait_all['Agenda Item Name'].str.contains('Manuscript')]
wait_ms['ms_index'] = wait_ms.groupby('Email').cumcount() + 1  # This will create a 1, 2, 3 for each person

# Pivot the data, creating separate columns for ms1, ms2, ms3
pivoted_wait_ms = wait_ms.pivot_table(index='Email', columns='ms_index', values='Agenda Item Name', aggfunc='first')

# Rename columns to ms1, ms2, ms3
pivoted_wait_ms.columns = [f'ms{i}' for i in range(1, 4)]

# Reset index to bring 'Email' as a column
pivoted_wait_ms = pivoted_wait_ms.reset_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [445]:
# Now let's add all of these into the dataset
all_conf_unique_participants = all_conf_unique_participants.merge(pivoted_ms, how='outer')

In [446]:
all_conf_unique_participants = all_conf_unique_participants.merge(pivoted_pitch, how='outer')
all_conf_unique_participants = all_conf_unique_participants.merge(pivoted_wait_ms.rename(columns={'ms1':'wl_ms1', 'ms2':'wl_ms2', 'ms3':'wl_ms3'}), how='outer')
all_conf_unique_participants = all_conf_unique_participants.merge(pivoted_wait_pitch.rename(columns={'pitch1':'wl_pitch1', 'pitch2':'wl_pitch2', 'pitch3':'wl_pitch3'}), how='outer')

In [447]:
## Will need to add in any bookfair waitlists - none yet

#all_conf_unique_participants = all_conf_unique_participants.merge(wait_bookfair['Email', 'output'].drop_duplicates(subset='Email', keep='first'), how='left').rename(columns={'output':'wl_bookfair'})

In [448]:
# Very final thing: add in the phone
all_conf_unique_participants = all_conf_unique_participants.merge(registered[['Email', 'phone']].drop_duplicates(), how='left')

In [449]:
# just in case there are waitlist only people who need phones brought in too, bring in that
all_conf_unique_participants = all_conf_unique_participants.merge(wait_all[['Email', 'phone']].drop_duplicates(), how='left')

In [450]:
timekeepers = pd.read_excel('List_of_genres_agents_editors.xlsx', sheet_name='timekeepers')

In [451]:
# Also need to add a few more true/false ones:
all_conf_unique_participants['qlc_any'] = all_conf_unique_participants['Email'].isin(final_friday_assignments2['Email'])
all_conf_unique_participants['ms_any'] = all_conf_unique_participants['Email'].isin(final_satmorn_assignments2['Email'])
all_conf_unique_participants['pitch_any'] = all_conf_unique_participants['Email'].isin(final_sataft_assignments2['Email'])
all_conf_unique_participants['timekeeper'] = all_conf_unique_participants['Email'].isin(timekeepers['Email'])

In [452]:
# Lastly, let's explicitly say which days of the conference they're attending
all_conf_unique_participants['days_attending'] = all_conf_unique_participants.apply(
    lambda row: 'Both' if row['friday_activities'] and row['saturday_activities']
    else 'Friday' if row['friday_activities']
    else 'Saturday' if row['saturday_activities']
    else 'Waitlist only',
    axis=1
)


In [453]:
print(all_conf_unique_participants['days_attending'].value_counts())

days_attending
Both    171
Name: count, dtype: int64


Yay!! We did it!! Now let's export this to a csv file, and then we're done and ready for upload into MailerLite!

In [454]:
today = datetime.date.today().strftime('%Y-%m-%d') # Let's save today's date for when writing excel files
all_conf_unique_participants.to_csv(f"Outputs/For Mail Merge/MailerLite_update_all_custom_fields_{today}.csv", index=False)

From here on out, no more coding! Head out to MailerLite for the rest.

# Zoom Roster

Kim needs this to be able to see who should have zoom meetings when, so she can let them in from the breakrooms to their particular meetings. This only applies to virtual-only people receiving author coaching, QLC, manuscript critique, or pitch.

In [455]:
zoompeeps = pd.merge(all_friday.loc[all_friday['Session'].str.contains('Critique|Coach'), ['Email', 'First Name', 'Last Name', 'phone', 'virtual', 'timeslot_start', 'Session', 'publisher']], 
                    all_saturday.loc[all_saturday['Session'].str.contains('Critique|Pitch'), ['Email', 'First Name', 'Last Name', 'phone', 'virtual', 'timeslot_start', 'Session', 'publisher']], 
                    how="outer")
zoompeeps = zoompeeps.loc[zoompeeps['virtual']=='Virtual', ]

In [456]:
# Order by timeslot (which is a date-time variable)
zoompeeps = zoompeeps.sort_values(by="timeslot_start")
zoompeeps['Date'] = zoompeeps['timeslot_start'].dt.strftime("%m/%d/%Y")
zoompeeps['Time'] = zoompeeps['timeslot_start'].dt.strftime('%I:%M%p').str.lower()

In [457]:
# Print to excel file for Kim
zoompeeps[['Email', 'First Name', 'Last Name', 'phone', 'Date', 'Time', 'Session', 'publisher']].to_excel("Outputs/Rosters/Roster of Zoom meetings for Kim.xlsx", index=False)

# Conference Roster

We want one ginormous excel file with a row for every single activity, alphabetically sorted by person, then date/time. A person will have as many rows as they have activities.

* First and Last Name
* Cell # (phone)
* Locale (Hotel vs ZOOM)
* Conference Activity ('output)

Make sure to remove any duplicate rows (e.g., mini seminars stuff maybe)


In [458]:
# Add a virtual variable to the waitlist dataset
wait_all['virtual'] = wait_all['Email'].apply(
    lambda email: 'Virtual' if email in virtual_only['Email'] else 'In Person'
)

wait_all['Session'] = wait_all['Agenda Item Name']

In [459]:
conf_roster = pd.merge(pd.merge(all_friday[['Email', 'First Name', 'Last Name', 'phone', 'virtual', 'Session', 'timeslot_start', 'publisher']], 
                        all_saturday[['Email', 'First Name', 'Last Name', 'phone', 'virtual', 'Session', 'timeslot_start', 'publisher']],
                        how="outer"), wait_all[['Email', 'First Name', 'Last Name', 'phone', 'virtual', 'Session']], how="outer")

In [460]:
# Change some of the variables
conf_roster['Locale'] = conf_roster['virtual'].apply(
    lambda virt: 'ZOOM' if virt=='Virtual' else 'Hotel'
)

conf_roster['Cell #'] = conf_roster['phone']

conf_roster['Time'] = conf_roster['timeslot_start'].dt.strftime('%I:%M%p').str.lower()

conf_roster['pub'] = conf_roster.apply(
    lambda row: row['publisher'] if isinstance(row['Session'], str) and 
    ('Pitch' in row['Session'] or 'Manuscript' in row['Session'] or 'Coach' in row['Session'] or 'Critique' in row['Session']) 
    else np.nan, axis=1
)


In [461]:
def combine_variables(var1, var2, var3):
    if pd.notna(var1) and pd.notna(var2) and pd.notna(var3):  # Check for NaN values
        return f"{var1} - {var2} @ {var3}"
    if pd.notna(var1) and pd.notna(var2) and pd.isna(var3) :  # Check for NaN values
        return f"{var1} - {var2}"
    return var1

conf_roster['Conference Activity'] = conf_roster.apply(lambda row: combine_variables(row['Session'], row['pub'], row['Time']), axis=1)

del combine_variables  # Delete function after use


In [462]:
# Awesome! Now let's organize by person (last, then first) and the date/time of activity
conf_roster = conf_roster.sort_values(by=['Last Name', 'First Name', 'timeslot_start'])
conf_roster = conf_roster[['First Name', 'Last Name', 'Cell #',  'Locale' ,'Conference Activity']].drop_duplicates()

In [467]:
# Let's format the excel file as we need it for printing
import pandas as pd
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Border, Side
from openpyxl.worksheet.page import PageMargins
from openpyxl.worksheet.worksheet import Worksheet
from openpyxl.utils import get_column_letter

# Blank out 'First Name' and 'Last Name' except for the first occurrence of each person
conf_roster.loc[conf_roster.duplicated(subset=['First Name', 'Last Name']), ['First Name', 'Last Name']] = ''

# Create a new Excel writer using openpyxl
output_filename = "Outputs/Rosters/Conference Roster - to print.xlsx"
with pd.ExcelWriter(output_filename, engine='openpyxl') as writer:
    conf_roster.to_excel(writer, index=False, sheet_name="Activities")

    # Load the workbook and worksheet
    workbook = writer.book
    worksheet = writer.sheets["Activities"]

    # Define styles
    bold_font = Font(bold=True)
    grey_font = Font(color="808080")  # Dark grey color
    thin_border = Border(left=Side(style="thin", color="D3D3D3"),
                         right=Side(style="thin", color="D3D3D3"),
                         top=Side(style="thin", color="D3D3D3"),
                         bottom=Side(style="thin", color="D3D3D3"))

    # Bold the headers
    for cell in worksheet[1]:
        cell.font = bold_font

    # Bold 'First Name' and 'Last Name' and apply activity text color formatting
    for row in worksheet.iter_rows(min_row=2, max_row=worksheet.max_row):
        first_name_cell, last_name_cell, activity_cell = row[0], row[1], row[4]  # Adjust column indexes as needed
        first_name_cell.font = bold_font
        last_name_cell.font = bold_font

        # Change activity text to grey if it doesn't contain specific words
        if not any(word in str(activity_cell.value) for word in ["Manuscript", "Pitch", "Critique", "Coach"]):
            activity_cell.font = grey_font

        # Apply borders to all cells
        for cell in row:
            cell.border = thin_border

    # Apply borders to header row
    for cell in worksheet[1]:
        cell.border = thin_border

     # Auto-adjust column widths accurately
    for col_idx, col_cells in enumerate(worksheet.columns, start=1):
        max_length = max(len(str(cell.value)) if cell.value else 0 for cell in col_cells)
        adjusted_width = max_length * 1.2  # Scale factor for better accuracy in Excel
        worksheet.column_dimensions[get_column_letter(col_idx)].width = adjusted_width

    # Set print settings for landscape mode
    worksheet.page_setup.orientation = "landscape"
    worksheet.page_setup.fitToWidth = 1  # Fit to page width
    worksheet.page_margins = PageMargins(left=0.5, right=0.5, top=0.5, bottom=0.5)  # Set margins

    # Save the workbook
    workbook.save(output_filename)

print(f"Formatted Excel file saved as {output_filename}")


Formatted Excel file saved as Outputs/Rosters/Conference Roster - to print.xlsx
