In [2]:
import pandas as pd
import googlemaps
from itertools import product
import os
import json
from dotenv import load_dotenv
from sklearn.preprocessing import MinMaxScaler

In [3]:
MENTOR_DATA = './data/mentor.csv'
MENTEE_DATA = './data/mentee.csv'
load_dotenv('../.env')

True

In [4]:
mentee_df = pd.read_csv(MENTEE_DATA)
mentor_df = pd.read_csv(MENTOR_DATA)

mentees = mentee_df.columns
mentors = mentor_df.columns

print("Columns in mentee.csv:", mentees)
print("Columns in mentor.csv:", mentors)

Columns in mentee.csv: Index(['Timestamp', 'First Name', 'Last Name', 'Phone Number', 'Email',
       'Year of Birth', 'Gender', 'Are you a person with Disability?',
       'County of Residence',
       'Sub County of Residence (Sub Counties in Mombasa)',
       'Ward of Residence', 'Highest level of education completed',
       'Are you employed', 'Are you running a business',
       'What's your area of interest?',
       'If you selected other, please elaborate (If not, respond with N/A)',
       'Who referred you? (Your Case Manager/Mentor)',
       'What are your Strengths', 'Mode of Mentorship',
       'Preferred Gender of your mentor', 'What are your weaknesses',
       'What are your career Goals',
       'What skills would you like to be mentored?',
       'What are your financial goals', 'Upload your resume if available'],
      dtype='object')
Columns in mentor.csv: Index(['Timestamp', 'First Name', 'Last Name', 'Gender', 'Year of Birth',
       'Are you a person with disabi

In [5]:
# Extract unique sub-counties and ensure they are strings
mentee_sub_counties = mentees['Sub County of Residence (Sub Counties in Mombasa)'].dropna().astype(str).unique()
mentor_sub_counties = mentors['Residence Sub-County'].dropna().astype(str).unique()

# Initialize Google Maps client
gmaps = googlemaps.Client(key='AIzaSyDcscqUSQIxLYzshDBk5PVnxpXKTOZ0Rj0')

# Geocode sub-counties
def geocode_sub_county(sub_county):
    geocode_result = gmaps.geocode(sub_county + ', Mombasa, Kenya')
    if geocode_result:
        location = geocode_result[0]['geometry']['location']
        return (location['lat'], location['lng'])
    return None

mentee_coords = {sub_county: geocode_sub_county(sub_county) for sub_county in mentee_sub_counties}
mentor_coords = {sub_county: geocode_sub_county(sub_county) for sub_county in mentor_sub_counties}

# Prepare distance calculation
distances = []
for (mentee_sub, mentee_coord), (mentor_sub, mentor_coord) in product(mentee_coords.items(), mentor_coords.items()):
    if mentee_coord and mentor_coord:
        distance_result = gmaps.distance_matrix(mentee_coord, mentor_coord, mode='driving')
        if distance_result['rows'][0]['elements'][0]['status'] == 'OK':
            distance = distance_result['rows'][0]['elements'][0]['distance']['value']  # distance in meters
            distances.append({
                'Mentee Sub-County': mentee_sub,
                'Mentor Sub-County': mentor_sub,
                'Distance (meters)': distance
            })

# Convert distances to DataFrame
distances_df = pd.DataFrame(distances)

# Display DataFrame
print(distances_df)

# Optionally, save the DataFrame to a CSV file
distances_df.to_csv('./data/distances.csv', index=False)

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [6]:
# Extract unique sub-counties and ensure they are strings
mentee_sub_counties = mentees['Sub County of Residence (Sub Counties in Mombasa)'].dropna().astype(str).unique()
mentor_sub_counties = mentors['Residence Sub-County'].dropna().astype(str).unique()

# Initialize Google Maps client
gmaps = googlemaps.Client(key=os.getenv('GOOGLE_MAPS_API_KEY'))

# Geocode sub-counties
def geocode_sub_county(sub_county):
    geocode_result = gmaps.geocode(sub_county + ', Mombasa, Kenya')
    if geocode_result:
        location = geocode_result[0]['geometry']['location']
        return (location['lat'], location['lng'])
    return None

mentee_coords = {sub_county: geocode_sub_county(sub_county) for sub_county in mentee_sub_counties}
mentor_coords = {sub_county: geocode_sub_county(sub_county) for sub_county in mentor_sub_counties}

# Prepare distance calculation
distances = []
for (mentee_sub, mentee_coord), (mentor_sub, mentor_coord) in product(mentee_coords.items(), mentor_coords.items()):
    if mentee_coord and mentor_coord:
        distance_result = gmaps.distance_matrix(mentee_coord, mentor_coord, mode='driving')
        if distance_result['rows'][0]['elements'][0]['status'] == 'OK':
            distance = distance_result['rows'][0]['elements'][0]['distance']['value']  # distance in meters
            distances.append({
                'Mentee Sub-County': mentee_sub,
                'Mentor Sub-County': mentor_sub,
                'Distance (meters)': distance
            })

# Convert distances to DataFrame
distances_df = pd.DataFrame(distances)

# Save the DataFrame to CSV and JSON files
distances_df.to_csv('./data/distance.csv', index=False)
distances_df.to_json('./data/distance.json', orient='records', indent=4)

# Display DataFrame
print(distances_df)

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [8]:
# Load precomputed distances
with open('./data/distance.json', 'r') as f:
    distances = json.load(f)

# Convert distances to DataFrame
distances_df = pd.DataFrame(distances)

# Normalize distance for match score calculation
distances_df['Distance Score'] = (distances_df['Distance (meters)'] - distances_df['Distance (meters)'].min()) / (distances_df['Distance (meters)'].max() - distances_df['Distance (meters)'].min())

# Define a function to calculate the match score
def calculate_match_score(mentee, mentor, distance_score):
    score = 0
    if mentee['Preferred Gender of your mentor'] == mentor['Gender']:
        score += 20
    if mentee['What\'s your area of interest?'] in mentor['Your area of expertise']:
        score += 30
    
    mentee_strengths = mentee['What are your Strengths']
    mentor_strengths = mentor['What are your strengths']
    if isinstance(mentee_strengths, str) and isinstance(mentor_strengths, str):
        if any(strength in mentor_strengths for strength in mentee_strengths.split(',')):
            score += 20
    
    mentee_weaknesses = mentee['What are your weaknesses']
    mentor_weaknesses = mentor['What are your weaknesses']
    if isinstance(mentee_weaknesses, str) and isinstance(mentor_weaknesses, str):
        if any(weakness in mentor_weaknesses for weakness in mentee_weaknesses.split(',')):
            score += 10
    
    score += (1 - distance_score) * 20  # Closer distance gets higher score
    return score

# Calculate match scores
distances_df['Match Score'] = distances_df.apply(lambda row: calculate_match_score(
    mentees.loc[mentees['Sub County of Residence (Sub Counties in Mombasa)'] == row['Mentee Sub-County']].iloc[0], 
    mentors.loc[mentors['Residence Sub-County'] == row['Mentor Sub-County']].iloc[0], 
    row['Distance Score']), axis=1)

# Match X mentees to each mentor
X = 3  # Number of mentees per mentor

matches = []
mentors_matched = set()

for mentor_sub in distances_df['Mentor Sub-County'].unique():
    mentor_matches = distances_df[distances_df['Mentor Sub-County'] == mentor_sub].sort_values(by='Match Score', ascending=False)
    mentee_matches = mentor_matches.head(X)
    for _, match in mentee_matches.iterrows():
        mentee_index = mentees[mentees['Sub County of Residence (Sub Counties in Mombasa)'] == match['Mentee Sub-County']].index[0]
        mentor_index = mentors[mentors['Residence Sub-County'] == match['Mentor Sub-County']].index[0]
        matches.append({
            'Mentee Index': mentee_index,
            'Mentor Index': mentor_index,
            'Match Score': match['Match Score']
        })
        mentors_matched.add(mentor_index)

# Convert matches to DataFrame
matches_df = pd.DataFrame(matches)

# Save the DataFrame to CSV and JSON files
matches_df.to_csv('matches.csv', index=False)
matches_df.to_json('matches.json', orient='records', indent=4)

# Display DataFrame
print(matches_df)

AttributeError: 'Index' object has no attribute 'loc'

In [9]:
# Load precomputed distances
with open('./data/distance.json', 'r') as f:
    distances = json.load(f)

# Convert distances to DataFrame
distances_df = pd.DataFrame(distances)

# Normalize distance for match score calculation
distances_df['Distance Score'] = (distances_df['Distance (meters)'] - distances_df['Distance (meters)'].min()) / (distances_df['Distance (meters)'].max() - distances_df['Distance (meters)'].min())

# Define a function to calculate the match score
def calculate_match_score(mentee, mentor, distance_score):
    score = 0
    if mentee['Preferred Gender of your mentor'] == mentor['Gender']:
        score += 20
    if mentee['What\'s your area of interest?'] in mentor['Your area of expertise']:
        score += 30
    
    mentee_strengths = mentee['What are your Strengths']
    mentor_strengths = mentor['What are your strengths']
    if isinstance(mentee_strengths, str) and isinstance(mentor_strengths, str):
        if any(strength in mentor_strengths for strength in mentee_strengths.split(',')):
            score += 20
    
    mentee_weaknesses = mentee['What are your weaknesses']
    mentor_weaknesses = mentor['What are your weaknesses']
    if isinstance(mentee_weaknesses, str) and isinstance(mentor_weaknesses, str):
        if any(weakness in mentor_weaknesses for weakness in mentee_weaknesses.split(',')):
            score += 10
    
    score += (1 - distance_score) * 20  # Closer distance gets higher score
    return score

# Calculate match scores
distances_df['Match Score'] = distances_df.apply(lambda row: calculate_match_score(
    mentees.loc[mentees['Sub County of Residence (Sub Counties in Mombasa)'] == row['Mentee Sub-County']].iloc[0], 
    mentors.loc[mentors['Residence Sub-County'] == row['Mentor Sub-County']].iloc[0], 
    row['Distance Score']), axis=1)

# Match X mentees to each mentor
X = 3  # Number of mentees per mentor

matches = []
mentors_matched = set()

for mentor_sub in distances_df['Mentor Sub-County'].unique():
    mentor_matches = distances_df[distances_df['Mentor Sub-County'] == mentor_sub].sort_values(by='Match Score', ascending=False)
    mentee_matches = mentor_matches.head(X)
    for _, match in mentee_matches.iterrows():
        mentee_index = mentees[mentees['Sub County of Residence (Sub Counties in Mombasa)'] == match['Mentee Sub-County']].index[0]
        mentor_index = mentors[mentors['Residence Sub-County'] == match['Mentor Sub-County']].index[0]
        matches.append({
            'Mentee Index': mentee_index,
            'Mentor Index': mentor_index,
            'Mentee Name': f"{mentees.at[mentee_index, 'First Name']} {mentees.at[mentee_index, 'Last Name']}",
            'Mentor Name': f"{mentors.at[mentor_index, 'First Name']} {mentors.at[mentor_index, 'Last Name']}",
            'Match Score': match['Match Score']
        })
        mentors_matched.add(mentor_index)

# Convert matches to DataFrame
matches_df = pd.DataFrame(matches)

# Save the DataFrame to CSV and JSON files
matches_df.to_csv('matches.csv', index=False)
matches_df.to_json('matches.json', orient='records', indent=4)

# Display DataFrame
print(matches_df)

AttributeError: 'Index' object has no attribute 'loc'

In [None]:
import pandas as pd
import json
from itertools import product

# Load data
mentees = pd.read_csv('./data/mentee.csv')
mentors = pd.read_csv('./data/mentor.csv')

# Load precomputed distances
with open('./data/distance.json', 'r') as f:
    distances = json.load(f)

# Convert distances to DataFrame
distances_df = pd.DataFrame(distances)

# Normalize distance for match score calculation
distances_df['Distance Score'] = (distances_df['Distance (meters)'] - distances_df['Distance (meters)'].min()) / (distances_df['Distance (meters)'].max() - distances_df['Distance (meters)'].min())

# Define a function to calculate the match score
def calculate_match_score(mentee, mentor, distance_score):
    score = 0
    if mentee['Preferred Gender of your mentor'] == mentor['Gender']:
        score += 20
    if mentee['What\'s your area of interest?'] in mentor['Your area of expertise']:
        score += 30
    
    mentee_strengths = mentee['What are your Strengths']
    mentor_strengths = mentor['What are your strengths']
    if isinstance(mentee_strengths, str) and isinstance(mentor_strengths, str):
        if any(strength in mentor_strengths for strength in mentee_strengths.split(',')):
            score += 20
    
    mentee_weaknesses = mentee['What are your weaknesses']
    mentor_weaknesses = mentor['What are your weaknesses']
    if isinstance(mentee_weaknesses, str) and isinstance(mentor_weaknesses, str):
        if any(weakness in mentor_weaknesses for weakness in mentee_weaknesses.split(',')):
            score += 10
    
    score += (1 - distance_score) * 20  # Closer distance gets higher score
    return score

# Calculate match scores
distances_df['Match Score'] = distances_df.apply(lambda row: calculate_match_score(
    mentees.loc[mentees['Sub County of Residence (Sub Counties in Mombasa)'] == row['Mentee Sub-County']].iloc[0], 
    mentors.loc[mentors['Residence Sub-County'] == row['Mentor Sub-County']].iloc[0], 
    row['Distance Score']), axis=1)

# Match X mentees to each mentor
X = 3  # Number of mentees per mentor

matches = []
mentors_matched = set()

for mentor_sub in distances_df['Mentor Sub-County'].unique():
    mentor_matches = distances_df[distances_df['Mentor Sub-County'] == mentor_sub].sort_values(by='Match Score', ascending=False)
    mentee_matches = mentor_matches.head(X)
    for _, match in mentee_matches.iterrows():
        mentee_index = mentees[mentees['Sub County of Residence (Sub Counties in Mombasa)'] == match['Mentee Sub-County']].index[0]
        mentor_index = mentors[mentors['Residence Sub-County'] == match['Mentor Sub-County']].index[0]
        matches.append({
            'Mentee Index': mentee_index,
            'Mentor Index': mentor_index,
            'Mentee Name': f"{mentees.at[mentee_index, 'First Name']} {mentees.at[mentee_index, 'Last Name']}",
            'Mentor Name': f"{mentors.at[mentor_index, 'First Name']} {mentors.at[mentor_index, 'Last Name']}",
            'Match Score': match['Match Score']
        })
        mentors_matched.add(mentor_index)

# Convert matches to DataFrame
matches_df = pd.DataFrame(matches)

# Save the DataFrame to CSV and JSON files
matches_df.to_csv('matches.csv', index=False)
matches_df.to_json('matches.json', orient='records', indent=4)

# Display DataFrame
print(matches_df)

In [None]:
# Load precomputed distances
with open('./data/distance.json', 'r') as f:
    distances = json.load(f)

# Convert distances to DataFrame
distances_df = pd.DataFrame(distances)

# Filter out distances greater than 15 km
max_distance_meters = 15000  # 15 km in meters
distances_df = distances_df[distances_df['Distance (meters)'] <= max_distance_meters]

# Normalize distance for match score calculation
distances_df['Distance Score'] = (distances_df['Distance (meters)'] - distances_df['Distance (meters)'].min()) / (distances_df['Distance (meters)'].max() - distances_df['Distance (meters)'].min())

# Define a function to calculate the match score
def calculate_match_score(mentee, mentor, distance_score):
    score = 0
    if mentee['Preferred Gender of your mentor'] == mentor['Gender']:
        score += 20
    if mentee['What\'s your area of interest?'] in mentor['Your area of expertise']:
        score += 30
    
    mentee_strengths = mentee['What are your Strengths']
    mentor_strengths = mentor['What are your strengths']
    if isinstance(mentee_strengths, str) and isinstance(mentor_strengths, str):
        if any(strength in mentor_strengths for strength in mentee_strengths.split(',')):
            score += 20
    
    mentee_weaknesses = mentee['What are your weaknesses']
    mentor_weaknesses = mentor['What are your weaknesses']
    if isinstance(mentee_weaknesses, str) and isinstance(mentor_weaknesses, str):
        if any(weakness in mentor_weaknesses for weakness in mentee_weaknesses.split(',')):
            score += 10
    
    score += (1 - distance_score) * 20  # Closer distance gets higher score
    return score

# Calculate match scores
distances_df['Match Score'] = distances_df.apply(lambda row: calculate_match_score(
    mentees.loc[mentees['Sub County of Residence (Sub Counties in Mombasa)'] == row['Mentee Sub-County']].iloc[0], 
    mentors.loc[mentors['Residence Sub-County'] == row['Mentor Sub-County']].iloc[0], 
    row['Distance Score']), axis=1)

# Match X mentees to each mentor
X = 100  # Number of mentees per mentor

matches = []
mentors_matched = set()

for mentor_sub in distances_df['Mentor Sub-County'].unique():
    mentor_matches = distances_df[distances_df['Mentor Sub-County'] == mentor_sub].sort_values(by='Match Score', ascending=False)
    mentee_matches = mentor_matches.head(X)
    for _, match in mentee_matches.iterrows():
        mentee_index = mentees[mentees['Sub County of Residence (Sub Counties in Mombasa)'] == match['Mentee Sub-County']].index[0]
        mentor_index = mentors[mentors['Residence Sub-County'] == match['Mentor Sub-County']].index[0]
        matches.append({
            'Mentee Index': mentee_index,
            'Mentor Index': mentor_index,
            'Mentee Name': f"{mentees.at[mentee_index, 'First Name']} {mentees.at[mentee_index, 'Last Name']}",
            'Mentor Name': f"{mentors.at[mentor_index, 'First Name']} {mentors.at[mentor_index, 'Last Name']}",
            'Match Score': match['Match Score']
        })
        mentors_matched.add(mentor_index)

# Convert matches to DataFrame
matches_df = pd.DataFrame(matches)

# Save the DataFrame to CSV and JSON files
matches_df.to_csv('matches1.csv', index=False)
matches_df.to_json('matches1.json', orient='records', indent=4)

# Display DataFrame
print(matches_df)

In [None]:
# Load precomputed distances
with open('./data/distance.json', 'r') as f:
    distances = json.load(f)

# Convert distances to DataFrame
distances_df = pd.DataFrame(distances)

# Filter out distances greater than 15 km
max_distance_meters = 15000  # 15 km in meters
distances_df = distances_df[distances_df['Distance (meters)'] <= max_distance_meters]

# Normalize distance for match score calculation
distances_df['Distance Score'] = (distances_df['Distance (meters)'] - distances_df['Distance (meters)'].min()) / (distances_df['Distance (meters)'].max() - distances_df['Distance (meters)'].min())

# Define a function to calculate the match score
def calculate_match_score(mentee, mentor, distance_score):
    score = 0
    if mentee['Preferred Gender of your mentor'] == mentor['Gender']:
        score += 20
    if mentee['What\'s your area of interest?'] in mentor['Your area of expertise']:
        score += 30
    
    mentee_strengths = mentee['What are your Strengths']
    mentor_strengths = mentor['What are your strengths']
    if isinstance(mentee_strengths, str) and isinstance(mentor_strengths, str):
        if any(strength in mentor_strengths for strength in mentee_strengths.split(',')):
            score += 20
    
    mentee_weaknesses = mentee['What are your weaknesses']
    mentor_weaknesses = mentor['What are your weaknesses']
    if isinstance(mentee_weaknesses, str) and isinstance(mentor_weaknesses, str):
        if any(weakness in mentor_weaknesses for weakness in mentee_weaknesses.split(',')):
            score += 10
    
    score += (1 - distance_score) * 20  # Closer distance gets higher score
    return score

# Calculate match scores
distances_df['Match Score'] = distances_df.apply(lambda row: calculate_match_score(
    mentees.loc[mentees['Sub County of Residence (Sub Counties in Mombasa)'] == row['Mentee Sub-County']].iloc[0], 
    mentors.loc[mentors['Residence Sub-County'] == row['Mentor Sub-County']].iloc[0], 
    row['Distance Score']), axis=1)

# Calculate X based on number of mentees and mentors
number_of_mentees = len(mentees)
number_of_mentors = len(mentors)
X = max(1, number_of_mentees // number_of_mentors)  # Ensure X is at least 1

matches = []
mentors_matched = set()

for mentor_sub in distances_df['Mentor Sub-County'].unique():
    mentor_matches = distances_df[distances_df['Mentor Sub-County'] == mentor_sub].sort_values(by='Match Score', ascending=False)
    mentee_matches = mentor_matches.head(X)
    for _, match in mentee_matches.iterrows():
        mentee_index = mentees[mentees['Sub County of Residence (Sub Counties in Mombasa)'] == match['Mentee Sub-County']].index[0]
        mentor_index = mentors[mentors['Residence Sub-County'] == match['Mentor Sub-County']].index[0]
        matches.append({
            'Mentee Index': mentee_index,
            'Mentor Index': mentor_index,
            'Mentee Name': f"{mentees.at[mentee_index, 'First Name']} {mentees.at[mentee_index, 'Last Name']}",
            'Mentor Name': f"{mentors.at[mentor_index, 'First Name']} {mentors.at[mentor_index, 'Last Name']}",
            'Match Score': match['Match Score']
        })
        mentors_matched.add(mentor_index)

# Convert matches to DataFrame
matches_df = pd.DataFrame(matches)

# Save the DataFrame to CSV and JSON files
matches_df.to_csv('matches2.csv', index=False)
matches_df.to_json('matches2.json', orient='records', indent=4)

# Display DataFrame
print(matches_df)

In [None]:
# Load precomputed distances
with open('./data/distance.json', 'r') as f:
    distances = json.load(f)

# Convert distances to DataFrame
distances_df = pd.DataFrame(distances)

# Filter out distances greater than 15 km
max_distance_meters = 15000  # 15 km in meters
distances_df = distances_df[distances_df['Distance (meters)'] <= max_distance_meters]

# Normalize distance for match score calculation
distances_df['Distance Score'] = 1 - (distances_df['Distance (meters)'] / max_distance_meters)  # Closer distance gets higher score

# Define a function to calculate the match score
def calculate_match_score(mentee, mentor, distance_score):
    score = 0
    if mentee['Preferred Gender of your mentor'] == mentor['Gender']:
        score += 20
    if mentee['What\'s your area of interest?'] in mentor['Your area of expertise']:
        score += 30
    
    mentee_strengths = mentee['What are your Strengths']
    mentor_strengths = mentor['What are your strengths']
    if isinstance(mentee_strengths, str) and isinstance(mentor_strengths, str):
        if any(strength in mentor_strengths for strength in mentee_strengths.split(',')):
            score += 20
    
    mentee_weaknesses = mentee['What are your weaknesses']
    mentor_weaknesses = mentor['What are your weaknesses']
    if isinstance(mentee_weaknesses, str) and isinstance(mentor_weaknesses, str):
        if any(weakness in mentor_weaknesses for weakness in mentee_weaknesses.split(',')):
            score += 10
    
    score += distance_score * 20  # Closer distance gets higher score
    return score

# Calculate match scores
distances_df['Match Score'] = distances_df.apply(lambda row: calculate_match_score(
    mentees.loc[mentees['Sub County of Residence (Sub Counties in Mombasa)'] == row['Mentee Sub-County']].iloc[0], 
    mentors.loc[mentors['Residence Sub-County'] == row['Mentor Sub-County']].iloc[0], 
    row['Distance Score']), axis=1)

# Calculate X based on number of mentees and mentors
number_of_mentees = len(mentees)
number_of_mentors = len(mentors)
X = max(1, number_of_mentees // number_of_mentors)  # Ensure X is at least 1

matches = []
mentors_matched = set()

for mentor_sub in distances_df['Mentor Sub-County'].unique():
    mentor_matches = distances_df[distances_df['Mentor Sub-County'] == mentor_sub].sort_values(by='Match Score', ascending=False)
    mentee_matches = mentor_matches.head(X)
    for _, match in mentee_matches.iterrows():
        mentee_index = mentees[mentees['Sub County of Residence (Sub Counties in Mombasa)'] == match['Mentee Sub-County']].index[0]
        mentor_index = mentors[mentors['Residence Sub-County'] == match['Mentor Sub-County']].index[0]
        matches.append({
            'Mentee Index': mentee_index,
            'Mentor Index': mentor_index,
            'Mentee Name': f"{mentees.at[mentee_index, 'First Name']} {mentees.at[mentee_index, 'Last Name']}",
            'Mentor Name': f"{mentors.at[mentor_index, 'First Name']} {mentors.at[mentor_index, 'Last Name']}",
            'Match Score': match['Match Score']
        })
        mentors_matched.add(mentor_index)

# Convert matches to DataFrame
matches_df = pd.DataFrame(matches)

# Save the DataFrame to CSV and JSON files
matches_df.to_csv('matches3.csv', index=False)
matches_df.to_json('matches3.json', orient='records', indent=4)

# Display DataFrame
print(matches_df)

In [None]:
# Load precomputed distances
with open('./data/distance.json', 'r') as f:
    distances = json.load(f)

# Convert distances to DataFrame
distances_df = pd.DataFrame(distances)

# Filter out distances greater than 15 km
max_distance_meters = 15000  # 15 km in meters
distances_df = distances_df[distances_df['Distance (meters)'] <= max_distance_meters]

# Normalize distance for match score calculation
distances_df['Distance Score'] = 1 - (distances_df['Distance (meters)'] / max_distance_meters)  # Closer distance gets higher score

# Define a function to calculate the match score
def calculate_match_score(mentee, mentor, distance_score):
    score = 0
    if mentee['Preferred Gender of your mentor'] == mentor['Gender']:
        score += 20
    if mentee['What\'s your area of interest?'] in mentor['Your area of expertise']:
        score += 30
    
    mentee_strengths = mentee['What are your Strengths']
    mentor_strengths = mentor['What are your strengths']
    if isinstance(mentee_strengths, str) and isinstance(mentor_strengths, str):
        if any(strength in mentor_strengths for strength in mentee_strengths.split(',')):
            score += 20
    
    mentee_weaknesses = mentee['What are your weaknesses']
    mentor_weaknesses = mentor['What are your weaknesses']
    if isinstance(mentee_weaknesses, str) and isinstance(mentor_weaknesses, str):
        if any(weakness in mentor_weaknesses for weakness in mentee_weaknesses.split(',')):
            score += 10
    
    score += distance_score * 20  # Closer distance gets higher score
    return score

# Calculate match scores for all possible mentor-mentee pairs
all_matches = []

for mentee_index, mentee_row in mentees.iterrows():
    for mentor_index, mentor_row in mentors.iterrows():
        distance_row = distances_df[(distances_df['Mentee Sub-County'] == mentee_row['Sub County of Residence (Sub Counties in Mombasa)']) &
                                    (distances_df['Mentor Sub-County'] == mentor_row['Residence Sub-County'])]
        if not distance_row.empty:
            distance_score = distance_row['Distance Score'].values[0]
            match_score = calculate_match_score(mentee_row, mentor_row, distance_score)
            all_matches.append({
                'Mentee Index': mentee_index,
                'Mentor Index': mentor_index,
                'Mentee Name': f"{mentee_row['First Name']} {mentee_row['Last Name']}",
                'Mentor Name': f"{mentor_row['First Name']} {mentor_row['Last Name']}",
                'Match Score': match_score
            })

# Convert all matches to DataFrame and sort by Match Score
all_matches_df = pd.DataFrame(all_matches).sort_values(by='Match Score', ascending=False)

# Save the DataFrame to CSV and JSON files
all_matches_df.to_csv('all_matches.csv', index=False)
all_matches_df.to_json('all_matches.json', orient='records', indent=4)

# Display DataFrame
print(all_matches_df)

In [None]:
# Load precomputed distances
with open('./data/distance.json', 'r') as f:
    distances = json.load(f)

# Convert distances to DataFrame
distances_df = pd.DataFrame(distances)

# Filter out distances greater than 15 km
max_distance_meters = 15000  # 15 km in meters
distances_df = distances_df[distances_df['Distance (meters)'] <= max_distance_meters]

# Normalize distance for match score calculation
distances_df['Distance Score'] = 1 - (distances_df['Distance (meters)'] / max_distance_meters)  # Closer distance gets higher score

# Define a function to calculate the match score
def calculate_match_score(mentee, mentor, distance_score):
    score = 0
    if mentee['Preferred Gender of your mentor'] == mentor['Gender']:
        score += 20
    if isinstance(mentee['What\'s your area of interest?'], str) and isinstance(mentor['Your area of expertise'], str):
        if mentee['What\'s your area of interest?'] in mentor['Your area of expertise']:
            score += 30
    
    mentee_strengths = mentee['What are your Strengths']
    mentor_strengths = mentor['What are your strengths']
    if isinstance(mentee_strengths, str) and isinstance(mentor_strengths, str):
        if any(strength in mentor_strengths for strength in mentee_strengths.split(',')):
            score += 20
    
    mentee_weaknesses = mentee['What are your weaknesses']
    mentor_weaknesses = mentor['What are your weaknesses']
    if isinstance(mentee_weaknesses, str) and isinstance(mentor_weaknesses, str):
        if any(weakness in mentor_weaknesses for weakness in mentee_weaknesses.split(',')):
            score += 10
    
    score += distance_score * 20  # Closer distance gets higher score
    return score

# Calculate match scores for all possible mentor-mentee pairs
all_matches = []

for mentee_index, mentee_row in mentees.iterrows():
    for mentor_index, mentor_row in mentors.iterrows():
        distance_row = distances_df[(distances_df['Mentee Sub-County'] == mentee_row['Sub County of Residence (Sub Counties in Mombasa)']) &
                                    (distances_df['Mentor Sub-County'] == mentor_row['Residence Sub-County'])]
        if not distance_row.empty:
            distance_score = distance_row['Distance Score'].values[0]
            match_score = calculate_match_score(mentee_row, mentor_row, distance_score)
            all_matches.append({
                'Mentee Index': mentee_index,
                'Mentor Index': mentor_index,
                'Mentee Name': f"{mentee_row['First Name']} {mentee_row['Last Name']}",
                'Mentor Name': f"{mentor_row['First Name']} {mentor_row['Last Name']}",
                'Match Score': match_score
            })

# Convert all matches to DataFrame and sort by Match Score
all_matches_df = pd.DataFrame(all_matches).sort_values(by='Match Score', ascending=False)

# Save the DataFrame to CSV and JSON files
all_matches_df.to_csv('all_matches.csv', index=False)
all_matches_df.to_json('all_matches.json', orient='records', indent=4)

# Display DataFrame
print(all_matches_df)

In [None]:
import pandas as pd
import json
from itertools import product
import openai
import os

In [None]:
# Load precomputed distances
with open('./data/distance.json', 'r') as f:
    distances = json.load(f)

# Convert distances to DataFrame
distances_df = pd.DataFrame(distances)

# Filter out distances greater than 15 km
max_distance_meters = 15000  # 15 km in meters
distances_df = distances_df[distances_df['Distance (meters)'] <= max_distance_meters]

# Normalize distance for match score calculation
distances_df['Distance Score'] = 1 - (distances_df['Distance (meters)'] / max_distance_meters)  # Closer distance gets higher score

# Define a function to calculate the match score
def calculate_match_score(mentee, mentor, distance_score):
    score = 0
    if mentee['Preferred Gender of your mentor'] == mentor['Gender']:
        score += 20
    if isinstance(mentee['What\'s your area of interest?'], str) and isinstance(mentor['Your area of expertise'], str):
        if mentee['What\'s your area of interest?'] in mentor['Your area of expertise']:
            score += 30
    
    mentee_strengths = mentee['What are your Strengths']
    mentor_strengths = mentor['What are your strengths']
    if isinstance(mentee_strengths, str) and isinstance(mentor_strengths, str):
        if any(strength in mentor_strengths for strength in mentee_strengths.split(',')):
            score += 20
    
    mentee_weaknesses = mentee['What are your weaknesses']
    mentor_weaknesses = mentor['What are your weaknesses']
    if isinstance(mentee_weaknesses, str) and isinstance(mentor_weaknesses, str):
        if any(weakness in mentor_weaknesses for weakness in mentee_weaknesses.split(',')):
            score += 10
    
    score += distance_score * 20  # Closer distance gets higher score
    return score

# Calculate match scores for all possible mentor-mentee pairs
all_matches = []

for mentee_index, mentee_row in mentees.iterrows():
    for mentor_index, mentor_row in mentors.iterrows():
        distance_row = distances_df[(distances_df['Mentee Sub-County'] == mentee_row['Sub County of Residence (Sub Counties in Mombasa)']) &
                                    (distances_df['Mentor Sub-County'] == mentor_row['Residence Sub-County'])]
        if not distance_row.empty:
            distance_score = distance_row['Distance Score'].values[0]
            match_score = calculate_match_score(mentee_row, mentor_row, distance_score)
            all_matches.append({
                'Mentee Index': mentee_index,
                'Mentor Index': mentor_index,
                'Mentee Name': f"{mentee_row['First Name']} {mentee_row['Last Name']}",
                'Mentor Name': f"{mentor_row['First Name']} {mentor_row['Last Name']}",
                'Match Score': match_score
            })

# Convert all matches to DataFrame and sort by Match Score
all_matches_df = pd.DataFrame(all_matches).sort_values(by='Match Score', ascending=False)

# Function to get explanation from OpenAI
def get_openai_explanation(mentee_name, mentor_name, match_score):
    prompt = f"Explain why {mentee_name} should or should not be matched with {mentor_name}. The match score is {match_score}."
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ]
    )
    explanation = response['choices'][0]['message']['content'].strip()
    return explanation

# Set your OpenAI API key
openai.api_key = os.getenv('6f4266921e2d481596516f157769c8c6')

# Get explanations for the matches
all_matches_df['Explanation'] = all_matches_df.apply(lambda row: get_openai_explanation(row['Mentee Name'], row['Mentor Name'], row['Match Score']), axis=1)

# Save the DataFrame to CSV and JSON files
all_matches_df.to_csv('all_matches_with_explanations.csv', index=False)
all_matches_df.to_json('all_matches_with_explanations.json', orient='records', indent=4)

# Display DataFrame
print(all_matches_df)

In [None]:
# Load precomputed distances
with open('./data/distance.json', 'r') as f:
    distances = json.load(f)

# Convert distances to DataFrame
distances_df = pd.DataFrame(distances)

# Filter out distances greater than 15 km
max_distance_meters = 15000  # 15 km in meters
distances_df = distances_df[distances_df['Distance (meters)'] <= max_distance_meters]

# Normalize distance for match score calculation
distances_df['Distance Score'] = 1 - (distances_df['Distance (meters)'] / max_distance_meters)  # Closer distance gets higher score

# Define a function to calculate the match score
def calculate_match_score(mentee, mentor, distance_score):
    score = 0
    if mentee['Preferred Gender of your mentor'] == mentor['Gender']:
        score += 20
    if isinstance(mentee['What\'s your area of interest?'], str) and isinstance(mentor['Your area of expertise'], str):
        if mentee['What\'s your area of interest?'] in mentor['Your area of expertise']:
            score += 30
    
    mentee_strengths = mentee['What are your Strengths']
    mentor_strengths = mentor['What are your strengths']
    if isinstance(mentee_strengths, str) and isinstance(mentor_strengths, str):
        if any(strength in mentor_strengths for strength in mentee_strengths.split(',')):
            score += 20
    
    mentee_weaknesses = mentee['What are your weaknesses']
    mentor_weaknesses = mentor['What are your weaknesses']
    if isinstance(mentee_weaknesses, str) and isinstance(mentor_weaknesses, str):
        if any(weakness in mentor_weaknesses for weakness in mentee_weaknesses.split(',')):
            score += 10
    
    score += distance_score * 20  # Closer distance gets higher score
    return score

# Calculate match scores for all possible mentor-mentee pairs
all_matches = []

for mentee_index, mentee_row in mentees.iterrows():
    for mentor_index, mentor_row in mentors.iterrows():
        distance_row = distances_df[(distances_df['Mentee Sub-County'] == mentee_row['Sub County of Residence (Sub Counties in Mombasa)']) &
                                    (distances_df['Mentor Sub-County'] == mentor_row['Residence Sub-County'])]
        if not distance_row.empty:
            distance_score = distance_row['Distance Score'].values[0]
            match_score = calculate_match_score(mentee_row, mentor_row, distance_score)
            all_matches.append({
                'Mentee Index': mentee_index,
                'Mentor Index': mentor_index,
                'Mentee Name': f"{mentee_row['First Name']} {mentee_row['Last Name']}",
                'Mentor Name': f"{mentor_row['First Name']} {mentor_row['Last Name']}",
                'Match Score': match_score
            })

# Convert all matches to DataFrame and sort by Match Score
all_matches_df = pd.DataFrame(all_matches).sort_values(by='Match Score', ascending=False)

# Function to get explanation from OpenAI
def get_openai_explanation(mentee_name, mentor_name, match_score):
    prompt = f"Explain why {mentee_name} should or should not be matched with {mentor_name}. The match score is {match_score}."
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ]
    )
    explanation = response['choices'][0]['message']['content'].strip()
    return explanation

# Set your OpenAI API key
openai.api_key = os.getenv('6f4266921e2d481596516f157769c8c6')

# Get explanations for the matches
def apply_explanation(row):
    return get_openai_explanation(row['Mentee Name'], row['Mentor Name'], row['Match Score'])

all_matches_df['Explanation'] = all_matches_df.apply(apply_explanation, axis=1)

# Save the DataFrame to CSV and JSON files
all_matches_df.to_csv('all_matches_with_explanations.csv', index=False)
all_matches_df.to_json('all_matches_with_explanations.json', orient='records', indent=4)

# Display DataFrame
print(all_matches_df)

In [None]:
# Load precomputed distances
with open('./data/distance.json', 'r') as f:
    distances = json.load(f)

# Convert distances to DataFrame
distances_df = pd.DataFrame(distances)

# Filter out distances greater than 15 km
max_distance_meters = 15000  # 15 km in meters
distances_df = distances_df[distances_df['Distance (meters)'] <= max_distance_meters]

# Normalize distance for match score calculation
distances_df['Distance Score'] = 1 - (distances_df['Distance (meters)'] / max_distance_meters)  # Closer distance gets higher score

# Define a function to calculate the match score
def calculate_match_score(mentee, mentor, distance_score):
    score = 0
    if mentee['Preferred Gender of your mentor'] == mentor['Gender']:
        score += 20
    if isinstance(mentee['What\'s your area of interest?'], str) and isinstance(mentor['Your area of expertise'], str):
        if mentee['What\'s your area of interest?'] in mentor['Your area of expertise']:
            score += 30
    
    mentee_strengths = mentee['What are your Strengths']
    mentor_strengths = mentor['What are your strengths']
    if isinstance(mentee_strengths, str) and isinstance(mentor_strengths, str):
        if any(strength in mentor_strengths for strength in mentee_strengths.split(',')):
            score += 20
    
    mentee_weaknesses = mentee['What are your weaknesses']
    mentor_weaknesses = mentor['What are your weaknesses']
    if isinstance(mentee_weaknesses, str) and isinstance(mentor_weaknesses, str):
        if any(weakness in mentor_weaknesses for weakness in mentee_weaknesses.split(',')):
            score += 10
    
    score += distance_score * 20  # Closer distance gets higher score
    return score

# Calculate match scores for all possible mentor-mentee pairs
all_matches = []

for mentee_index, mentee_row in mentees.iterrows():
    for mentor_index, mentor_row in mentors.iterrows():
        distance_row = distances_df[(distances_df['Mentee Sub-County'] == mentee_row['Sub County of Residence (Sub Counties in Mombasa)']) &
                                    (distances_df['Mentor Sub-County'] == mentor_row['Residence Sub-County'])]
        if not distance_row.empty:
            distance_score = distance_row['Distance Score'].values[0]
            match_score = calculate_match_score(mentee_row, mentor_row, distance_score)
            all_matches.append({
                'Mentee Index': mentee_index,
                'Mentor Index': mentor_index,
                'Mentee Name': f"{mentee_row['First Name']} {mentee_row['Last Name']}",
                'Mentor Name': f"{mentor_row['First Name']} {mentor_row['Last Name']}",
                'Match Score': match_score
            })

# Convert all matches to DataFrame and sort by Match Score
all_matches_df = pd.DataFrame(all_matches).sort_values(by='Match Score', ascending=False)

# Function to get explanation from OpenAI
def get_openai_explanation(mentee_name, mentor_name, match_score):
    prompt = f"Explain why {mentee_name} should or should not be matched with {mentor_name}. The match score is {match_score}."
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ]
    )
    explanation = response.choices[0].message["content"].strip()
    return explanation

# Set your OpenAI API key
openai.api_key = os.getenv('6f4266921e2d481596516f157769c8c6')

# Get explanations for the matches
def apply_explanation(row):
    return get_openai_explanation(row['Mentee Name'], row['Mentor Name'], row['Match Score'])

all_matches_df['Explanation'] = all_matches_df.apply(apply_explanation, axis=1)

# Save the DataFrame to CSV and JSON files
all_matches_df.to_csv('all_matches_with_explanations.csv', index=False)
all_matches_df.to_json('all_matches_with_explanations.json', orient='records', indent=4)

# Display DataFrame
print(all_matches_df)

In [None]:

import openai

# gets API Key from environment variable OPENAI_API_KEY
client = openai.OpenAI()

assistant = client.beta.assistants.create(
    name="Math Tutor",
    instructions="You are a personal math tutor. Write and run code to answer math questions.",
    tools=[{"type": "code_interpreter"}],
    model="gpt-4-1106-preview",
)

thread = client.beta.threads.create()

message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="I need to solve the equation `3x + 11 = 14`. Can you help me?",
)

run = client.beta.threads.runs.create_and_poll(
    thread_id=thread.id,
    assistant_id=assistant.id,
    instructions="Please address the user as Jane Doe. The user has a premium account.",
)

print("Run completed with status: " + run.status)

if run.status == "completed":
    messages = client.beta.threads.messages.list(thread_id=thread.id)

    print("messages: ")
    for message in messages:
        assert message.content[0].type == "text"
        print({"role": message.role, "message": message.content[0].text.value})

    client.beta.assistants.delete(assistant.id)

In [None]:
# Load precomputed distances
with open('./data/distance.json', 'r') as f:
    distances = json.load(f)

# Convert distances to DataFrame
distances_df = pd.DataFrame(distances)

# Filter out distances greater than 15 km
max_distance_meters = 15000  # 15 km in meters
distances_df = distances_df[distances_df['Distance (meters)'] <= max_distance_meters]

# Normalize distance for match score calculation
distances_df['Distance Score'] = 1 - (distances_df['Distance (meters)'] / max_distance_meters)  # Closer distance gets higher score

# Define a function to calculate the match score
def calculate_match_score(mentee, mentor, distance_score):
    score = 0
    if mentee['Preferred Gender of your mentor'] == mentor['Gender']:
        score += 20
    if isinstance(mentee['What\'s your area of interest?'], str) and isinstance(mentor['Your area of expertise'], str):
        if mentee['What\'s your area of interest?'] in mentor['Your area of expertise']:
            score += 30
    
    mentee_strengths = mentee['What are your Strengths']
    mentor_strengths = mentor['What are your strengths']
    if isinstance(mentee_strengths, str) and isinstance(mentor_strengths, str):
        if any(strength in mentor_strengths for strength in mentee_strengths.split(',')):
            score += 20
    
    mentee_weaknesses = mentee['What are your weaknesses']
    mentor_weaknesses = mentor['What are your weaknesses']
    if isinstance(mentee_weaknesses, str) and isinstance(mentor_weaknesses, str):
        if any(weakness in mentor_weaknesses for weakness in mentee_weaknesses.split(',')):
            score += 10
    
    score += distance_score * 20  # Closer distance gets higher score
    return score

# Calculate match scores for all possible mentor-mentee pairs
all_matches = []

for mentee_index, mentee_row in mentees.iterrows():
    for mentor_index, mentor_row in mentors.iterrows():
        distance_row = distances_df[(distances_df['Mentee Sub-County'] == mentee_row['Sub County of Residence (Sub Counties in Mombasa)']) &
                                    (distances_df['Mentor Sub-County'] == mentor_row['Residence Sub-County'])]
        if not distance_row.empty:
            distance_score = distance_row['Distance Score'].values[0]
            match_score = calculate_match_score(mentee_row, mentor_row, distance_score)
            all_matches.append({
                'Mentee Index': mentee_index,
                'Mentor Index': mentor_index,
                'Mentee Name': f"{mentee_row['First Name']} {mentee_row['Last Name']}",
                'Mentor Name': f"{mentor_row['First Name']} {mentor_row['Last Name']}",
                'Match Score': match_score
            })

# Convert all matches to DataFrame and sort by Match Score
all_matches_df = pd.DataFrame(all_matches).sort_values(by='Match Score', ascending=False)

# Set your Azure OpenAI API key and endpoint
openai.api_key = os.getenv('6f4266921e2d481596516f157769c8c6')
openai.api_base = os.getenv('https://swahilipot-azure-openai.openai.azure.com/')
openai.api_type = "azure"
openai.api_version = "2023-07-01-preview"  # Use the appropriate version for Azure OpenAI

# Function to get explanation from OpenAI
def get_openai_explanation(mentee_name, mentor_name, match_score):  
    prompt = f"Explain why {mentee_name} should or should not be matched with {mentor_name}. The match score is {match_score}."  
    response = openai.Completion.create(  
        model="text-davinci-003",  # Update this to the model you intend to use  
        prompt=prompt,  
        max_tokens=150,  # Adjust based on your needs  
        temperature=0.7,  # Adjust based on how creative/varied you want the responses to be  
    )  
    explanation = response['choices'][0]['text'].strip()  
    return explanation  


# Get explanations for the matches
def apply_explanation(row):
    return get_openai_explanation(row['Mentee Name'], row['Mentor Name'], row['Match Score'])

all_matches_df['Explanation'] = all_matches_df.apply(apply_explanation, axis=1)

# Save the DataFrame to CSV and JSON files
all_matches_df.to_csv('all_matches_with_explanations.csv', index=False)
all_matches_df.to_json('all_matches_with_explanations.json', orient='records', indent=4)

# Display DataFrame
print(all_matches_df)