In [None]:
import sys
from pathlib import Path
# Add tools directory so Matcher can be imported (notebook is in tools/matching_related/)
_cwd = Path.cwd()
if (_cwd / "Matcher").exists():
    _tools = _cwd
elif (_cwd.parent / "Matcher").exists():
    _tools = _cwd.parent
elif (_cwd / "tools" / "Matcher").exists():
    _tools = _cwd / "tools"
else:
    _tools = _cwd.parent
if str(_tools) not in sys.path:
    sys.path.insert(0, str(_tools))

In [None]:
%reload_ext autoreload
%autoreload 2

from Matcher.utilities import DataLoader
from Matcher.scorer import ScorerConfig, PreferenceScorer, SimilarityScorer
from matplotlib import pyplot as plt
import numpy as np

In [None]:
dataloader = DataLoader()
config = ScorerConfig(
    
)

heterosexual_female_df = dataloader.load_data("embedded_heterosexual_female_df")
heterosexual_male_df = dataloader.load_data("embedded_heterosexual_male_df")
homosexual_female_df = dataloader.load_data("embedded_homosexual_female_df")
homosexual_male_df = dataloader.load_data("embedded_homosexual_male_df")

In [None]:
# config = ScorerConfig(
#     base_preference_score = 30,  # base score for preference
#     unqualified_penalty = -np.inf,  # penalty for unqualified
#     preferred_wxid_value = 999,  # value for preferred_wxid
#     timezone_difference_base_penalty = -2,  # base penalty per hour
#     timezone_difference_penalty_multiplier_threshold = 5,  # threshold for penalty multiplier (inclusive)
#     timezone_difference_penalty_multiplier = 1.25,  # multiplier for penalty if time difference is greater than threshold
#     grade_difference_base_penalty = 2.25,  # penalty per grade difference
#     grade_difference_penalty_multiplier_threshold = 3,  # threshold for penalty multiplier (inclusive)
#     grade_difference_penalty_multiplier = 2,  # multiplier for penalty if grade difference is greater than threshold
#     same_location_reward = 10,  # reward for same location
#     same_location_group_reward = 5,  # reward for same location group
#     different_location_group_penalty = -1,  # penalty for different location group
#     mbti_transform = "scaled_sigmoid",  # a function to transform the mbti score to a range of -50 to 50
#     mbti_multiplier = 0.26,  # multiplier for mbti score
#     reply_frequency_reward = {
#         "1": -4,  # 开启勿扰模式, 闲下来再回
#         "2": -1,  # 攒很多消息, 逐一回复
#         "3": 0,  # 佛系查看, 不定时回复
#         "4": 2,  # 经常看手机, 看到就回
#         "5": 6,  # 一直在线, 基本秒回
#     },
# )

FM_preference_scorer = PreferenceScorer(config, heterosexual_female_df, heterosexual_male_df)
FM_preference_res = FM_preference_scorer.calculate_score_matrix()

MF_preference_scorer = PreferenceScorer(config, heterosexual_male_df, heterosexual_female_df)
MF_preference_res = MF_preference_scorer.calculate_score_matrix()

In [None]:
# config = ScorerConfig(
#     base_similarity_score = 0,  # base score for similarity
    
#     hobbies_reward_multiplier = 25,  # reward for hobbies bonus
#     hobbies_bonus_threshold = 0.67,  # threshold for hobbies bonus
#     hobbies_bonus_multiplier = 2.5,  # multiplier for hobbies bonus
    
#     fav_movies_reward_multiplier = 15,  # reward for fav_movies bonus
#     fav_movies_bonus_threshold = 0.65,  # threshold for fav_movies bonus
#     fav_movies_bonus_multiplier = 2.5,  # multiplier for fav_movies bonus
    
#     expectation_reward_multiplier = 20,  # reward for expectation bonus
#     expectation_thresholds = (
#         0.6,
#         0.8,
#     ),  # thresholds for expectation bonus
#     expectation_penalty_multiplier = 4,  # multiplier for expectation penalty
#     expectation_bonus_multiplier = 2,  # multiplier for expectation bonus
    
#     weekend_arrangement_reward_multiplier = 10,  # reward for weekend_arrangement bonus
#     weekend_arrangement_thresholds = (
#         0.35,
#         0.7,
#     ),  # thresholds for weekend_arrangement bonus
#     weekend_arrangement_penalty_multiplier = 2,  # multiplier for weekend_arrangement penalty
#     weekend_arrangement_bonus_multiplier = 2,  # multiplier for weekend_arrangement bonus
    
#     wish_reward_multiplier = 7,  # reward for wish bonus
#     wish_bonus_threshold = 0.65,  # threshold for wish bonus
#     wish_bonus_multiplier = 1.5,  # multiplier for wish bonus
# )

FM_similarity_scorer = SimilarityScorer(config, heterosexual_female_df, heterosexual_male_df)
FM_similarity_res = FM_similarity_scorer.calculate_score_matrix()

MF_similarity_scorer = SimilarityScorer(config, heterosexual_male_df, heterosexual_female_df)
MF_similarity_res = MF_similarity_scorer.calculate_score_matrix()

In [None]:
FM_non_inf_res = FM_preference_res[FM_preference_res != -np.inf]
clipped_FM_pref_res = np.clip(FM_non_inf_res, -100, 200)

MF_non_inf_res = MF_preference_res[MF_preference_res != -np.inf]
clipped_MF_pref_res = np.clip(MF_non_inf_res, -100, 200)

FM_non_inf_res = FM_similarity_res[FM_similarity_res != -np.inf]
clipped_FM_sim_res = np.clip(FM_non_inf_res, -100, 300)

MF_non_inf_res = MF_similarity_res[MF_similarity_res != -np.inf]
clipped_MF_sim_res = np.clip(MF_non_inf_res, -100, 300)

total_FM = FM_preference_res + FM_similarity_res
total_MF = MF_preference_res + MF_similarity_res

total_FM_non_inf_res = total_FM[total_FM != -np.inf]
clipped_total_FM_res = np.clip(total_FM_non_inf_res, -10, 450)

total_MF_non_inf_res = total_MF[total_MF != -np.inf]
clipped_total_MF_res = np.clip(total_MF_non_inf_res, -10, 450)

plt.figure(figsize=(16, 15))
plt.subplot(3, 2, 1)
plt.hist(clipped_FM_pref_res, bins=100)
plt.title(f"F -> M preference")
plt.subplot(3, 2, 2)
plt.hist(clipped_MF_pref_res, bins=100)
plt.title(f"M -> F preference")

plt.subplot(3, 2, 3)
plt.hist(clipped_FM_sim_res, bins=100)
plt.title("F -> M similarity")
plt.subplot(3, 2, 4)
plt.hist(clipped_MF_sim_res, bins=100)
plt.title("M -> F similarity")

plt.subplot(3, 2, 5)
plt.hist(clipped_total_FM_res, bins=100)
plt.title("F -> M total")
plt.subplot(3, 2, 6)
plt.hist(clipped_total_MF_res, bins=100)
plt.title("M -> F total")
plt.show()