In [84]:
import pandas as pd
import numpy as np


In [85]:
mentors = pd.read_excel("/Users/BeckyH/Desktop/AWC_Conference/Mentor-mentee matching/Mentor_cleaned.xlsx")

In [86]:
mentees = pd.read_excel("/Users/BeckyH/Desktop/AWC_Conference/Mentor-mentee matching/Mentee_cleaned.xlsx")

In [87]:
mentors['mentor_prev_pub_type'].value_counts()

mentor_prev_pub_type
Self-published on your own                                                                                                                                                                                     5
Traditionally published with midsize or small publisher without literary agent                                                                                                                                 5
Traditionally published with Big Five publisher with literary agent                                                                                                                                            2
Traditionally published with midsize or small publisher without literary agent, Self-published on your own                                                                                                     2
Traditionally published with midsize or small publisher without literary agent, Self-published using self-publishing company, Hybrid published 

In [88]:
# Let's change the above to just 'Self-published', 'Traditionally published', 'Unpublished' and 'Both'
def classify_pub_type(x):
    x = str(x).lower()  # Normalize for consistent matching
    if 'unpublished' in x:
        return 'Unpublished'
    self_pub = any(kw in x for kw in [
        'self-publish', 
        'self-publish on your own',
        'self-publish using self-publishing company',
        'self-publish with the support of a book coach'
    ])
    hybrid_pub = 'hybrid' in x
    trad_pub = 'traditionally publish' in x
    if self_pub and trad_pub:
        return 'Both'
    elif self_pub:
        return 'Self-publish'
    elif trad_pub:
        return 'Traditionally publish'
    elif hybrid_pub:
        return 'Hybrid'
    else:
        return 'Unpublished'  # fallback for any unexpected cases

mentors['mentor_prev_pubtype'] = mentors['mentor_prev_pub_type'].apply(classify_pub_type)

In [89]:
mentors['mentor_prev_pubtype'].value_counts()

mentor_prev_pubtype
Self-publish             9
Both                     8
Traditionally publish    8
Unpublished              1
Name: count, dtype: int64

In [90]:
# Let's change the mentees one to something similar:
def classify_pub_type(x):
    x = str(x).lower()  # Normalize for consistent matching
    if 'unpublish' in x:
        return 'Unpublished'
    self_pub = any(kw in x for kw in [
        'self-publish', 
        'self publish'
    ])
    hybrid_pub = 'hybrid' in x
    trad_pub = 'traditionally publish' in x
    if self_pub and trad_pub:
        return 'Both'
    elif self_pub:
        return 'Self-publish'
    elif trad_pub:
        return 'Traditionally publish'
    elif hybrid_pub:
        return 'Hybrid'
    else:
        return 'Unsure'  # fallback for any unexpected cases

mentees['mentee_prev_pubtype'] = mentees['pub_pref'].apply(classify_pub_type)

In [91]:
mentees['mentee_prev_pubtype'].value_counts()

mentee_prev_pubtype
Traditionally publish    23
Both                      8
Self-publish              7
Unsure                    4
Hybrid                    1
Name: count, dtype: int64

In [92]:
# Now let's double check the races, gender and age ranges
mentors['mentor_age'].value_counts()

mentor_age
50-59          7
60-69          5
40-49          5
80 or older    4
70-79          4
30-39          1
Name: count, dtype: int64

In [93]:
mentees['mentee_age_pref'].value_counts()

mentee_age_pref
Having a mentor of the same age range is not important to me    40
40-49                                                            2
30-39                                                            1
Name: count, dtype: int64

In [94]:
mentors['mentor_race'].value_counts()

mentor_race
White         23
Black          2
Mixed Race     1
Name: count, dtype: int64

In [95]:
mentees['mentee_race_pref'].value_counts()

mentee_race_pref
Having a mentor of the same race is not important to me    38
Black                                                       3
White                                                       2
Name: count, dtype: int64

In [96]:
mentors['mentor_gender'].value_counts()

mentor_gender
Female    15
Male      11
Name: count, dtype: int64

In [97]:
mentees['mentee_gender_pref'].value_counts()

mentee_gender_pref
Having a mentor of the same gender is not important to me    32
Female                                                       11
Name: count, dtype: int64

In [98]:
mentors_original = mentors.copy()
mentees_original = mentees.copy()

In [74]:

def match_mentees_to_mentors_prioritize_no_scholarship(mentees_original, mentors_original, seed=None):
    """
    Matches mentees to mentors based on various criteria, prioritizing mentees
    who do not need scholarships within the matching logic.

    Args:
        mentees_original (pd.DataFrame): DataFrame of mentee information.
        mentors_original (pd.DataFrame): DataFrame of mentor information.
        seed (int, optional): Random seed for tie-breaking and mentor shuffling. Defaults to None.

    Returns:
        pd.DataFrame: DataFrame of mentees with assigned mentor information.
    """
    mentees = mentees_original.copy()
    mentors = mentors_original.copy()

    mentees['assigned_mentor_email'] = None
    mentees['unmatched_reason'] = None

    if seed is not None:
        np.random.seed(seed)
        mentors = mentors.sample(frac=1, random_state=seed).reset_index(drop=True)  # Shuffle mentors

    # Separate mentees based on scholarship need
    no_scholarship_needed = mentees[mentees['scholarship_need'] != 'Yes'].copy()
    scholarship_needed = mentees[mentees['scholarship_need'] == 'Yes'].copy()

    # Process mentees who don't need scholarships first
    for idx, mentee in no_scholarship_needed.iterrows():
        if mentee['assigned_mentor_email'] is not None:  # Skip if already assigned
            continue
        available_mentors = mentors[mentors['mentor_num_mentees'] > 0].copy()

        if available_mentors.empty:
            mentees.at[idx, 'unmatched_reason'] = 'No mentors with capacity'
            continue

        mentee_pub = mentee['mentee_prev_pubtype']
        def pubtype_match(mentor_pub):
            if mentor_pub in ['Unpublished', 'Hybrid']:
                return mentee_pub in ['Self-publish', 'Unsure', 'Hybrid']
            elif mentee_pub == 'Unsure':
                return True
            else:
                return mentor_pub == mentee_pub or mentor_pub == 'Both'
        available_mentors = available_mentors[available_mentors['mentor_prev_pubtype'].apply(pubtype_match)]
        if available_mentors.empty:
            mentees.at[idx, 'unmatched_reason'] = 'No matching publishing type'
            continue

        mentee_genres = set(str(mentee['mentee_ms_genre']).split(', '))
        available_mentors['genre_overlap'] = available_mentors['mentor_genre'].apply(
            lambda x: len(mentee_genres.intersection(set(str(x).split(', ')))) > 0)
        available_mentors = available_mentors[available_mentors['genre_overlap']]
        if available_mentors.empty:
            mentees.at[idx, 'unmatched_reason'] = 'No genre overlap'
            continue

        race_pref = mentee['mentee_race_pref']
        if 'not important' not in str(race_pref).lower():
            available_mentors = available_mentors[available_mentors['mentor_race'] == race_pref]
            if available_mentors.empty:
                mentees.at[idx, 'unmatched_reason'] = 'No matching race'
                continue

        gender_pref = mentee['mentee_gender_pref']
        if 'not important' not in str(gender_pref).lower():
            available_mentors = available_mentors[available_mentors['mentor_gender'] == gender_pref]
            if available_mentors.empty:
                mentees.at[idx, 'unmatched_reason'] = 'No matching gender'
                continue

        age_pref = mentee['mentee_age_pref']
        if 'not important' not in str(age_pref).lower():
            available_mentors = available_mentors[available_mentors['mentor_age'] == age_pref]
            if available_mentors.empty:
                mentees.at[idx, 'unmatched_reason'] = 'No matching age'
                continue

        if not available_mentors.empty:
            # Final assignment (if multiple matches, the first one after shuffling is selected)
            selected_mentor_email = available_mentors.iloc[0]['mentor_email']
            mentees.at[idx, 'assigned_mentor_email'] = selected_mentor_email
            mentors.loc[mentors['mentor_email'] == selected_mentor_email, 'mentor_num_mentees'] -= 1

    # Process mentees who need scholarships *after* those who don't
    for idx, mentee in scholarship_needed.iterrows():
        if mentee['assigned_mentor_email'] is not None: # Skip if already assigned.
            continue
        available_mentors = mentors[mentors['mentor_num_mentees'] > 0].copy()

        if available_mentors.empty:
            mentees.at[idx, 'unmatched_reason'] = 'No mentors with capacity'
            continue

        mentee_pub = mentee['mentee_prev_pubtype']
        def pubtype_match(mentor_pub):
            if mentor_pub == 'Unpublished':
                return mentee_pub in ['Self-publish', 'Unsure']
            elif mentee_pub == 'Unsure':
                return True
            else:
                return mentor_pub == mentee_pub or mentor_pub == 'Both'
        available_mentors = available_mentors[available_mentors['mentor_prev_pubtype'].apply(pubtype_match)]
        if available_mentors.empty:
            mentees.at[idx, 'unmatched_reason'] = 'No matching publishing type'
            continue

        mentee_genres = set(str(mentee['mentee_ms_genre']).split(', '))
        available_mentors['genre_overlap'] = available_mentors['mentor_genre'].apply(
            lambda x: len(mentee_genres.intersection(set(str(x).split(', ')))) > 0)
        available_mentors = available_mentors[available_mentors['genre_overlap']]
        if available_mentors.empty:
            mentees.at[idx, 'unmatched_reason'] = 'No genre overlap'
            continue

        race_pref = mentee['mentee_race_pref']
        if 'not important' not in str(race_pref).lower():
            available_mentors = available_mentors[available_mentors['mentor_race'] == race_pref]
            if available_mentors.empty:
                mentees.at[idx, 'unmatched_reason'] = 'No matching race'
                continue

        gender_pref = mentee['mentee_gender_pref']
        if 'not important' not in str(gender_pref).lower():
            available_mentors = available_mentors[available_mentors['mentor_gender'] == gender_pref]
            if available_mentors.empty:
                mentees.at[idx, 'unmatched_reason'] = 'No matching gender'
                continue

        age_pref = mentee['mentee_age_pref']
        if 'not important' not in str(age_pref).lower():
            available_mentors = available_mentors[available_mentors['mentor_age'] == age_pref]
            if available_mentors.empty:
                mentees.at[idx, 'unmatched_reason'] = 'No matching age'
                continue

        if not available_mentors.empty:
            # Final assignment (if multiple matches, the first one after shuffling is selected)
            selected_mentor_email = available_mentors.iloc[0]['mentor_email']
            mentees.at[idx, 'assigned_mentor_email'] = selected_mentor_email
            mentors.loc[mentors['mentor_email'] == selected_mentor_email, 'mentor_num_mentees'] -= 1

    # Merge mentor info
    mentor_info_cols = ['mentor_email', 'mentor_name', 'mentor_prev_pubtype', 'mentor_genre', 'mentor_race', 'mentor_gender', 'mentor_age']
    mentors_subset = mentors[mentor_info_cols]

    matched = mentees.merge(
        mentors_subset,
        how='left',
        left_on='assigned_mentor_email',
        right_on='mentor_email'
    )

    return matched

def run_multiple_scenarios_prioritize_no_scholarship(mentees_original, mentors_original, num_iterations=500, seed_start=0):
    """
    Runs the matching algorithm for a specified number of iterations.

    Args:
        mentees_original (pd.DataFrame): DataFrame of mentee information.
        mentors_original (pd.DataFrame): DataFrame of mentor information.
        num_iterations (int): The number of iterations to run.
        seed_start (int): The starting seed value for reproducibility.

    Returns:
        pd.DataFrame: The matched mentees DataFrame from the iteration
                      that resulted in the highest number of assigned mentors.
    """
    results = {}
    best_iteration = None
    max_assigned = -1
    best_match_df = None

    for i in range(num_iterations):
        current_seed = seed_start + i
        # No need to shuffle mentees here anymore, the shuffling is done inside the matching function
        shuffled_mentees = mentees_original.copy() # Keep original order for each iteration.

        matched_df = match_mentees_to_mentors_prioritize_no_scholarship(shuffled_mentees, mentors_original, seed=current_seed)
        num_assigned = matched_df['assigned_mentor_email'].notna().sum()
        results[current_seed] = num_assigned

        if num_assigned > max_assigned:
            max_assigned = num_assigned
            best_iteration = current_seed
            best_match_df = matched_df

        print(f"Iteration: {i + 1}, Seed: {current_seed}, Assigned Mentors: {num_assigned}")

    print(f"\nBest iteration (seed): {best_iteration} with {max_assigned} participants assigned mentors.")
    return best_match_df

# Run the matching
best_matched_mentees_prioritized = run_multiple_scenarios_prioritize_no_scholarship(mentees_original, mentors_original, num_iterations=100)

# Print the results
print("\nBest Matched Mentees DataFrame (prioritizing no scholarship):")
print(best_matched_mentees_prioritized)

# Display value counts for 'mentor_email' and 'scholarship_need', including NaN
value_counts_with_nan = best_matched_mentees_prioritized[['mentor_email', 'scholarship_need']].value_counts(dropna=False)
print("\nValue Counts of (mentor_email, scholarship_need) including NaN:")
print(value_counts_with_nan)


Iteration: 1, Seed: 0, Assigned Mentors: 30
Iteration: 2, Seed: 1, Assigned Mentors: 27
Iteration: 3, Seed: 2, Assigned Mentors: 30
Iteration: 4, Seed: 3, Assigned Mentors: 29
Iteration: 5, Seed: 4, Assigned Mentors: 29
Iteration: 6, Seed: 5, Assigned Mentors: 30
Iteration: 7, Seed: 6, Assigned Mentors: 29
Iteration: 8, Seed: 7, Assigned Mentors: 31
Iteration: 9, Seed: 8, Assigned Mentors: 29
Iteration: 10, Seed: 9, Assigned Mentors: 26
Iteration: 11, Seed: 10, Assigned Mentors: 30
Iteration: 12, Seed: 11, Assigned Mentors: 31
Iteration: 13, Seed: 12, Assigned Mentors: 31
Iteration: 14, Seed: 13, Assigned Mentors: 32
Iteration: 15, Seed: 14, Assigned Mentors: 30
Iteration: 16, Seed: 15, Assigned Mentors: 31
Iteration: 17, Seed: 16, Assigned Mentors: 29
Iteration: 18, Seed: 17, Assigned Mentors: 32
Iteration: 19, Seed: 18, Assigned Mentors: 30
Iteration: 20, Seed: 19, Assigned Mentors: 27
Iteration: 21, Seed: 20, Assigned Mentors: 29
Iteration: 22, Seed: 21, Assigned Mentors: 28
Iterati

In [75]:
best_matched_mentees_prioritized[['mentor_email', 'scholarship_need']].value_counts(dropna=False)

mentor_email                     scholarship_need
NaN                              NaN                 7
                                 Yes                 3
kimconrey@yahoo.com              NaN                 2
whoisbrendalowder@gmail.com      NaN                 2
tagreen97@yahoo.com              NaN                 2
mickeydubrow@comcast.net         NaN                 2
merrill@merrilldavies.com        NaN                 2
Patricia.Fors@gmail.com          NaN                 2
knichols75@gmail.com             NaN                 2
KatFieler@gmail.com              NaN                 2
emilydrakecarpenter@gmail.com    NaN                 2
ligayacc@gmail.com               NaN                 2
gsummerskill@me.com              Yes                 1
eniac.caine@gmail.com            NaN                 1
                                 Yes                 1
bobby@bobbynash.com              NaN                 1
patriciamartinholt@yahoo.com     NaN                 1
rlmartz47@gmail

In [76]:
best_matched_mentees_prioritized[['unmatched_reason', 'scholarship_need']].value_counts(dropna=False)

unmatched_reason    scholarship_need
NaN                 NaN                 27
                    Yes                  6
No genre overlap    Yes                  2
                    NaN                  2
No matching age     NaN                  2
No matching race    NaN                  2
No matching gender  NaN                  1
No matching race    Yes                  1
Name: count, dtype: int64

In [80]:
cols_to_drop = ['datetime', 'commitment', 'assigned_mentor_email',	'acceptadvice']
cols_to_keep = [col for col in best_matched_mentees_prioritized.columns if col not in cols_to_drop]
best_matched_mentees_prioritized[cols_to_keep].to_excel('/Users/BeckyH/Desktop/AWC_Conference/Mentor-mentee matching/Final_mentor_mentee_matching.xlsx')

In [99]:
import pandas as pd
import numpy as np

def get_potential_mentors(mentee, mentors):
    """
    Identifies the potential mentors a given mentee could be matched with
    based on the specified criteria.

    Args:
        mentee (pd.Series): A Series representing a single mentee's information.
        mentors (pd.DataFrame): DataFrame of mentor information.

    Returns:
        list: A list of potential mentor names ('mentor_name') that the mentee
              could be matched with.
    """
    potential_mentors = []
    available_mentors = mentors[mentors['mentor_num_mentees'] > 0].copy()

    if available_mentors.empty:
        return potential_mentors

    mentee_pub = mentee['mentee_prev_pubtype']
    def pubtype_match(mentor_pub):
        if mentor_pub in ['Unpublished', 'Hybrid']:
            return mentee_pub in ['Self-publish', 'Unsure', 'Hybrid']
        elif mentee_pub == 'Unsure':
            return True
        else:
            return mentor_pub == mentee_pub or mentor_pub == 'Both'
    available_mentors = available_mentors[available_mentors['mentor_prev_pubtype'].apply(pubtype_match)]
    if available_mentors.empty:
        return potential_mentors

    mentee_genres = set(str(mentee['mentee_ms_genre']).split(', '))
    available_mentors['genre_overlap'] = available_mentors['mentor_genre'].apply(
        lambda x: len(mentee_genres.intersection(set(str(x).split(', ')))) > 0)
    available_mentors = available_mentors[available_mentors['genre_overlap']]
    if available_mentors.empty:
        return potential_mentors

    race_pref = mentee['mentee_race_pref']
    if 'not important' not in str(race_pref).lower():
        available_mentors = available_mentors[available_mentors['mentor_race'] == race_pref]
        if available_mentors.empty:
            return potential_mentors

    gender_pref = mentee['mentee_gender_pref']
    if 'not important' not in str(gender_pref).lower():
        available_mentors = available_mentors[available_mentors['mentor_gender'] == gender_pref]
        if available_mentors.empty:
            return potential_mentors

    age_pref = mentee['mentee_age_pref']
    if 'not important' not in str(age_pref).lower():
        available_mentors = available_mentors[available_mentors['mentor_age'] == age_pref]
        if available_mentors.empty:
            return potential_mentors

    if not available_mentors.empty:
        potential_mentors = available_mentors['mentor_name'].tolist()

    return potential_mentors

def create_potential_mentor_list(mentees_original, mentors_original):
    """
    Creates a new column in the mentees DataFrame listing all potential
    mentor names each mentee could be matched with based on the criteria.

    Args:
        mentees_original (pd.DataFrame): DataFrame of mentee information.
        mentors_original (pd.DataFrame): DataFrame of mentor information.

    Returns:
        pd.DataFrame: A copy of the mentees DataFrame with an added column
                      'potential_mentors' containing a list of potential
                      mentor names for each mentee.
    """
    mentees = mentees_original.copy()
    mentees['potential_mentors'] = mentees.apply(lambda row: get_potential_mentors(row, mentors_original), axis=1)
    return mentees

# Create the variable with potential mentor names
mentees_with_potential_mentors = create_potential_mentor_list(mentees_original, mentors_original)

                                    potential_mentors
0                         [Mickey Dubrow, Kim Conrey]
1                                                  []
2   [Merrill J. Davies, Kat Fieler, Katherine Nich...
3                             [Ron Martz, Kim Conrey]
4                                                  []
5   [Merrill J. Davies, Sally Kilpatrick, Emily Ca...
6   [Mickey Dubrow, Kat Fieler, Patricia Fors, Kim...
7                                        [Kim Conrey]
8   [Merrill J. Davies, Kat Fieler, Sally Kilpatri...
9                                                  []
10                     [Sally Kilpatrick, Kim Conrey]
11                                 [Sally Kilpatrick]
12                                                 []
13  [Merrill J. Davies, Kat Fieler, Sally Kilpatri...
14                                  [Gail Summerskil]
15  [Merrill J. Davies, Susan Crawford, Patricia F...
16  [Merrill J. Davies, Mike Shaw, Susan Crawford,...
17  [Merrill J. Davies, Kat 

In [101]:
mentees_with_potential_mentors[['mentee_name','mentee_email', 'potential_mentors']].to_excel('/Users/BeckyH/Desktop/AWC_Conference/Mentor-mentee matching/All_potential_matches.xlsx')
