<a href="https://colab.research.google.com/github/asia-kamysh/Random_Coffee/blob/main/RC_v_1_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
import datetime
import requests
import time
import pandas as pd
import gspread
from google.colab import files, auth
from google.auth import default


# Authenticate to Google
auth.authenticate_user()
creds, _ = default()
gc = gspread.authorize(creds)

# Define the name of the current meetings table
MEETINGS_CURRENT_TABLE_NAME = 'test_current'

# Mount Google Drive to access files
from google.colab import drive
drive.mount('/content/drive')

# Import pathlib to work with file paths
import pathlib
print(pathlib.Path().resolve())

# Load data from a CSV file into a DataFrame
df = pd.read_csv('2024-06-05.csv', sep=',')
df.index = df['0']  # Set the index to the first column
df = df.drop(columns=['0'])  # Drop the first column

# Convert each column in the DataFrame to datetime format
for col in df.columns:
    df[col] = pd.to_datetime(df[col], format='%Y-%m-%d', errors='coerce')

# Function to remove time from datetime, keeping only the date
def to_date_without_time(dt):
    return dt.date()

# Apply the function to each element in the DataFrame
df = df.applymap(lambda x: to_date_without_time(pd.to_datetime(x)))

# Open the Google Sheet and get the first worksheet
worksheet = gc.open('Random_Coffee').get_worksheet(0)

# Get all values from the worksheet (as a list of rows)
rows = worksheet.get_all_values()

# Convert the list of rows into a DataFrame
data = pd.DataFrame(rows)
data.columns = data.iloc[0]  # Set the first row as the column headers
data = data.iloc[1:]  # Remove the first row from the DataFrame
data = data.set_index('Укажи Имя и Фамилию\nEnter your first and last name')  # Set the index

# Rename the columns
data.columns = ['Timestamp', 'Contact', 'Format', 'City', 'Interests', 'Community', 'Language', 'Frequency', 'Intro', 'intro', 'intro', 'STATUS', 'PERSONAL_FORM_LINK']

# Replace textual frequency values with numerical values
data['Frequency'] = data['Frequency'].replace('Раз в неделю/Once a week', 1)
data['Frequency'] = data['Frequency'].replace('Раз в две недели/Once in two weeks', 2)
data['Frequency'] = data['Frequency'].replace('Раз в четыре недели/Once in four weeks', 4)


In [None]:
def create_matches(data, df):
    """
    Create pairs of people who want to meet, based on certain criteria such as availability,
    meeting frequency, format, city, language, and community.

    Parameters:
    data (DataFrame): Contains metadata about individuals, including their status, meeting frequency, interests, etc.
    df (DataFrame): Contains historical meeting data between individuals.

    Returns:
    pairs (list): List of tuples containing matched pairs.
    df (DataFrame): Updated DataFrame with the latest meeting data.
    """

    # Shuffle the DataFrame to ensure random pairings
    df = df.sample(frac=1)

    # Determine people available to meet in this round
    people_to_meet = []
    next_round = datetime.date.today()
    for name in df.columns:
        if name in data.index:
            if data.loc[name, 'STATUS'] in ['active', '']:
                last_meeting = df[name].max()
                # If the time since the last meeting is greater than their meeting frequency, they are available
                if next_round - last_meeting > datetime.timedelta(weeks=int(data.loc[name]['Frequency'])):
                    people_to_meet.append(name)

    # List to store pairs of people who have not met before
    pairs = []
    already_paired = []

    # Iterate over each person to find potential pairs
    for i in range(df.shape[0]):
        name1 = df.index[i]
        if name1 in data.index:
            possible_pairs = []
            # Look for candidates for each person
            for j in range(df.shape[1]):
                name2 = df.index[j]
                if name2 in data.index and name1 in data.index and name1 in people_to_meet and name2 in people_to_meet:
                    # Check if they have not met before
                    if df.loc[name1, name2] == datetime.date(2001, 1, 1):
                        # Check various criteria for pairing
                        format = set(data.loc[name1, 'Format'].split(', ')).intersection(data.loc[name2, 'Format'].split(', '))
                        city = (data.loc[name1, 'City'] == data.loc[name2, 'City']) if format == {'Оффлайн/Offline'} else True
                        format = bool(format)
                        language = bool(set(data.loc[name1, 'Language'].split(', ')).intersection(data.loc[name2, 'Language'].split(', ')))
                        community = not (data.loc[name1, 'Community'] == 'Community Global' and data.loc[name2, 'Community'] == 'Community Global')

                        # If all criteria are met, add to possible pairs
                        if format and city and language and community and (name1 != name2):
                            possible_pairs.append(name2)

            if possible_pairs and name1 not in already_paired:
                interests = data.loc[name1]['Interests'].split(', ')
                maxim = 0
                possible_pairs = [x for x in possible_pairs if x not in already_paired]
                if possible_pairs:
                    best_match = possible_pairs[0]
                    for name2 in possible_pairs:
                        if name2 not in already_paired:
                            common_interests = len(set(interests).intersection(set(data.loc[name2]['Interests'].split(', '))))
                            if common_interests > maxim:
                                best_match = name2
                                maxim = common_interests
                    # Add the best match to the pairs list
                    pairs.append((name1, best_match))
                    # Update the DataFrame with the new meeting date
                    df.at[name1, best_match] = next_round
                    df.at[best_match, name1] = next_round
                    already_paired.extend([best_match, name1])

    return pairs, df

# Initialize variables to track the best matching pairs and the updated DataFrame
len_max_pairs = 0
max_pairs = []
final_updated_df = df

# Run the matching process 100 times to find the best possible matches
for n in range(100):
    pairs, updated_df = create_matches(data, df)
    if len(pairs) > len_max_pairs:
        max_pairs = pairs
        final_updated_df = updated_df

pairs = max_pairs
updated_df = final_updated_df

pairs_to_send = []

# Print and prepare the pairs for sending
print('the pairs are:')
for p in pairs:
    n1, n2 = p
    print(n1, 'and', n2)
    contacts = (data.loc[n1]['Contact'], data.loc[n2]['Contact'])
    print(contacts)
    com_int = set(data.loc[n1]['Interests'].split(', ')).intersection(set(data.loc[n2]['Interests'].split(', ')))
    com_int_rus = [x.split('/')[0] for x in com_int]
    com_int_eng = [x.split('/')[1] for x in com_int]
    com_lang = list(set(data.loc[n1]['Language'].split(', ')).intersection(set(data.loc[n2]['Language'].split(', '))))[-1]
    print('[' + ', '.join(com_int_rus) + ']')
    print('[' + ', '.join(com_int_eng) + ']')
    print()
    pairs_to_send.append([n1, n2, contacts, com_int_rus, com_int_eng])

In [None]:
def send_pair(pair):
    """Sends a pair to the bot."""

    # Extract and clean the first Telegram handle
    handle1 = pair[2][0].replace('@', '').replace(' ', '').replace('https://t.me/', '')

    # Extract and clean the second Telegram handle
    handle2 = pair[2][1].replace('@', '').replace(' ', '').replace('https://t.me/', '')

    # Get the common interests
    interests = pair[3]

    # Define the URL to send the pair data
    url = 'http://2.59.41.41:8000/new_pair/'

    # Create the data payload
    data = {
        "first_match_handle": handle1,
        "second_match_handle": handle2,
        "common_interests": interests
    }

    # Send the POST request to the server
    r = requests.post(url, json=data)

    # Raise an error if the request was unsuccessful
    r.raise_for_status()

# Loop through each pair in the list of pairs to send
for pair in pairs_to_send:
    # Wait for 3 seconds before sending the next pair
    time.sleep(3)

    # Send the pair data
    send_pair(pair)

    # Print confirmation message
    print('SENT:')
    print(pair)

In [None]:
# Get today's date as a string
next_round = str(datetime.date.today())

# Save the updated DataFrame to a CSV file
updated_df.to_csv(next_round + '.csv', encoding='utf-8-sig')

# Download the CSV file
files.download(next_round + '.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>