In [None]:
import sys
from pathlib import Path
# Add tools directory so Matcher can be imported (notebook is in tools/matching_related/)
_cwd = Path.cwd()
if (_cwd / "Matcher").exists():
    _tools = _cwd
elif (_cwd.parent / "Matcher").exists():
    _tools = _cwd.parent
elif (_cwd / "tools" / "Matcher").exists():
    _tools = _cwd / "tools"
else:
    _tools = _cwd.parent
if str(_tools) not in sys.path:
    sys.path.insert(0, str(_tools))

In [None]:
%reload_ext autoreload
%autoreload 2

from Matcher.utilities import DataLoader
from Matcher.scorer import ScorerConfig, PreferenceScorer, SimilarityScorer
from Matcher.matcher import Matcher
import numpy as np

In [None]:
dataloader = DataLoader()
config = ScorerConfig(
    # base_preference_score = 30,  # base score for preference
    # unqualified_penalty = -np.inf,  # penalty for unqualified
    # preferred_wxid_value = 999,  # value for preferred_wxid
    # timezone_difference_base_penalty = -2,  # base penalty per hour
    # timezone_difference_penalty_multiplier_threshold = 5,  # threshold for penalty multiplier (inclusive)
    # timezone_difference_penalty_multiplier = 1.25,  # multiplier for penalty if time difference is greater than threshold
    # grade_difference_base_penalty = 2.25,  # penalty per grade difference
    # grade_difference_penalty_multiplier_threshold = 3,  # threshold for penalty multiplier (inclusive)
    # grade_difference_penalty_multiplier = 2,  # multiplier for penalty if grade difference is greater than threshold
    # same_location_reward = 10,  # reward for same location
    # same_location_group_reward = 5,  # reward for same location group
    # different_location_group_penalty = -1,  # penalty for different location group
    # mbti_transform = "scaled_sigmoid",  # a function to transform the mbti score to a range of -50 to 50
    # mbti_multiplier = 0.3,  # multiplier for mbti score
    # reply_frequency_reward = {
    #     "1": -4,  # 开启勿扰模式, 闲下来再回
    #     "2": -1,  # 攒很多消息, 逐一回复
    #     "3": 0,  # 佛系查看, 不定时回复
    #     "4": 2,  # 经常看手机, 看到就回
    #     "5": 6,  # 一直在线, 基本秒回
    # },
    
    # base_similarity_score = 0,  # base score for similarity
    
    # hobbies_reward_multiplier = 25,  # reward for hobbies bonus
    # hobbies_bonus_threshold = 0.66,  # threshold for hobbies bonus
    # hobbies_bonus_multiplier = 3,  # multiplier for hobbies bonus
    
    # fav_movies_reward_multiplier = 21,  # reward for fav_movies bonus
    # fav_movies_bonus_threshold = 0.64,  # threshold for fav_movies bonus
    # fav_movies_bonus_multiplier = 3,  # multiplier for fav_movies bonus
    
    # expectation_reward_multiplier = 20,  # reward for expectation bonus
    # expectation_thresholds = (
    #     0.6,
    #     0.78,
    # ),  # thresholds for expectation bonus
    # expectation_penalty_multiplier = 5,  # multiplier for expectation penalty
    # expectation_bonus_multiplier = 2,  # multiplier for expectation bonus
    
    # weekend_arrangement_reward_multiplier = 15,  # reward for weekend_arrangement bonus
    # weekend_arrangement_thresholds = (
    #     0.35,
    #     0.68,
    # ),  # thresholds for weekend_arrangement bonus
    # weekend_arrangement_penalty_multiplier = 2,  # multiplier for weekend_arrangement penalty
    # weekend_arrangement_bonus_multiplier = 2.5,  # multiplier for weekend_arrangement bonus
    
    # wish_reward_multiplier = 7,  # reward for wish bonus
    # wish_bonus_threshold = 0.65,  # threshold for wish bonus
    # wish_bonus_multiplier = 1.5,  # multiplier for wish bonus
)

heterosexual_female_df = dataloader.load_data("embedded_heterosexual_female_df")
heterosexual_male_df = dataloader.load_data("embedded_heterosexual_male_df")
homosexual_female_df = dataloader.load_data("embedded_homosexual_female_df")
homosexual_male_df = dataloader.load_data("embedded_homosexual_male_df")

In [None]:
FM_preference_scorer = PreferenceScorer(config, heterosexual_female_df, heterosexual_male_df)
MF_preference_scorer = PreferenceScorer(config, heterosexual_male_df, heterosexual_female_df)
MM_preference_scorer = PreferenceScorer(config, homosexual_male_df, homosexual_male_df)
FF_preference_scorer = PreferenceScorer(config, homosexual_female_df, homosexual_female_df)

FM_preference_res = FM_preference_scorer.calculate_score_matrix()
MF_preference_res = MF_preference_scorer.calculate_score_matrix()
MM_preference_res = MM_preference_scorer.calculate_score_matrix()
FF_preference_res = FF_preference_scorer.calculate_score_matrix()


In [None]:
FM_similarity_scorer = SimilarityScorer(config, heterosexual_female_df, heterosexual_male_df)
MF_similarity_scorer = SimilarityScorer(config, heterosexual_male_df, heterosexual_female_df)
MM_similarity_scorer = SimilarityScorer(config, homosexual_male_df, homosexual_male_df)
FF_similarity_scorer = SimilarityScorer(config, homosexual_female_df, homosexual_female_df)

FM_similarity_res = FM_similarity_scorer.calculate_score_matrix()
MF_similarity_res = MF_similarity_scorer.calculate_score_matrix()
MM_similarity_res = MM_similarity_scorer.calculate_score_matrix()
FF_similarity_res = FF_similarity_scorer.calculate_score_matrix()

In [None]:
total_FM = FM_preference_res + FM_similarity_res
total_MF = MF_preference_res + MF_similarity_res
total_MM = MM_preference_res + MM_similarity_res
total_FF = FF_preference_res + FF_similarity_res

total_MM[np.arange(len(total_MM)), np.arange(len(total_MM))] = -np.inf
total_FF[np.arange(len(total_FF)), np.arange(len(total_FF))] = -np.inf
total_hetro = np.min([total_FM, total_MF.T], axis=0)

clipped_FM = np.clip(total_FM, min=0)
clipped_MM = np.clip(total_MM, min=0)
clipped_FF = np.clip(total_FF, min=0)


In [None]:
import pandas as pd


cols = [
    "name", "wxid", "school", "grade",
    "mbti_ei", "mbti_sn", "mbti_tf", "mbti_jp",
    "hobbies", "fav_movies", "expectation", "weekend_arrangement", "wish", "why_lamp_remembered_your_name",
    "message_to_partner"
    ]
heterosexual_female_df = heterosexual_female_df[cols]
heterosexual_male_df = heterosexual_male_df[cols]
homosexual_female_df = homosexual_female_df[cols]
homosexual_male_df = homosexual_male_df[cols]

def get_n_fav_person(person_idx: int, score_matrix: np.ndarray, info_df: pd.DataFrame, self_df: pd.DataFrame, scoring_to: np.ndarray, scoring_from: np.ndarray, n=3) -> pd.DataFrame:
    """
    Get the n most favorite people from the score matrix.
    """
    
    self_info = self_df.iloc[person_idx]
    self_info["score"] = 0
    self_info["scoring_to"] = 0
    self_info["scoring_from"] = 0
    
    df = info_df.iloc[np.argsort(score_matrix[person_idx])[-n:]]
    df["score"] = score_matrix[person_idx][df.index]
    df["scoring_to"] = scoring_to[person_idx, df.index]
    df["scoring_from"] = scoring_from[df.index, person_idx]
    df = df.reindex(df.score.sort_values(ascending=False).index)
    df = pd.concat([self_info.to_frame().T, df])
    
    return df


def get_n_MF(person_idx: int, n=3) -> pd.DataFrame:
    """
    Get the n most favorite people from the score matrix (male -> female, heterosexual).
    """
    return get_n_fav_person(
        person_idx,
        total_hetro.T,
        heterosexual_female_df,   # info_df
        heterosexual_male_df,     # self_df
        total_MF,
        total_FM,
        n,
    )

def get_n_FM(person_idx: int, n=3) -> pd.DataFrame:
    """
    Get the n most favorite people from the score matrix (female -> male, heterosexual).
    """
    return get_n_fav_person(
        person_idx,
        total_hetro,
        heterosexual_male_df,     # info_df
        heterosexual_female_df,   # self_df
        total_FM,
        total_MF,
        n,
    )

def get_n_MM(person_idx: int, n=3) -> pd.DataFrame:
    """
    Get the n most favorite people from the score matrix.
    """
    
    return get_n_fav_person(person_idx, total_MM, homosexual_male_df, homosexual_male_df, total_MM, total_MM, n)

def get_n_FF(person_idx: int, n=3) -> pd.DataFrame:
    """
    Get the n most favorite people from the score matrix.
    """
    
    return get_n_fav_person(person_idx, total_FF, homosexual_female_df, homosexual_female_df, total_FF, total_FF, n)



In [None]:
# get_n_MF(68, 7)
get_n_FM(34, 7)
# get_n_FF(bqh_FF, 7)

In [None]:
# heterosexual_male_df.iloc[np.argsort(total_FM[zyw])[:7]]
# heterosexual_female_df.iloc[np.argsort(total_MF[csp])[:7]]
# homosexual_female_df.iloc[np.argsort(total_FF[wrj_FF])[:7]]