# Read Data from Google Sheet

In [25]:
from google.oauth2 import service_account
from googleapiclient.discovery import build
import pandas as pd

# Replace this with your downloaded JSON credentials file path
SERVICE_ACCOUNT_FILE = 'refreshing-park-454420-u2-55ad31703fbb.json'

# Replace this with your Google Sheet ID from the URL
SPREADSHEET_ID = '1Cz8HyMIBeDZUN_uLQ-TrFTc1eaNO29qLNXr4vg5FGq0'

# Adjust if your data is not in 'Sheet1' or if you need a specific range
RANGE_NAME = 'Răspunsuri la formular 1!A:Z'

credentials = service_account.Credentials.from_service_account_file(
    SERVICE_ACCOUNT_FILE, scopes=['https://www.googleapis.com/auth/spreadsheets.readonly'])

service = build('sheets', 'v4', credentials=credentials)

# Get data from your sheet
sheet = service.spreadsheets()
result = sheet.values().get(spreadsheetId=SPREADSHEET_ID, range=RANGE_NAME).execute()
values = result.get('values', [])

if not values:
    print('No data found in your Google Sheet.')
else:
    df = pd.DataFrame(values[1:], columns=values[0])
    print(df.head())


        Marcaj de timp      What is your name? How old are you?  \
0  20.03.2025 13:29:18  George-Alexandru Marin               22   
1  20.03.2025 13:29:58                Mariana                21   
2  20.03.2025 13:30:47              Tzio Mping               26   
3  20.03.2025 13:31:24                  Andrei               21   
4  20.03.2025 13:33:50                    Ella               24   

  How tall are you? What is your occupation?  \
0      170 - 175 cm                       IT   
1      170 - 175 cm                 Student    
2      180 - 185 cm        Digital Marketing   
3      195 - 200 cm                  student   
4      175 - 180 cm             Antreprenor    

  What are your main hobbies or interests? (Select all that apply)  \
0                Gym, Sports, Cooking/Baking, Gaming                 
1  Hiking, Tv shows/Movies, Music/Art, Socializin...                 
2  Traveling, Gaming, Gym, Cooking/Baking, Cars/M...                 
3                  Socializi

In [26]:
df.describe()   

Unnamed: 0,Marcaj de timp,What is your name?,How old are you?,How tall are you?,What is your occupation?,What are your main hobbies or interests? (Select all that apply),Do you have any pets?,What is your favorite music genre? (Select all that apply),Who is your favorite band or artist?,What type of movie or TV show do you prefer? (Select all that apply),...,Which of these are on your bucket list? (Select all that apply),Which of the following vices would you say you have? (Select all that apply):,Which gender do you prefer for your ideal partner? (Select all that apply),What physical attributes are most attractive to you in a partner? (Select all that apply),What age range do you prefer?,Which best describes your preferred activity level for a partner?,How important is it that your partner is pet-friendly?,How important is it that your partner is child-friendly?,Which of these traits would be deal breakers for you? (Select all that apply),What is your gender?
count,519,519,519,519,519,519,519,519,519.0,519,...,519,519,519,519,519,519,519,519,519,515
unique,518,518,32,11,22,394,6,162,511.0,133,...,88,25,4,30,5,4,5,5,131,4
top,20.03.2025 13:37:46,Matthew Evans,30,200+ cm,Healthcare,Sports,"No, but I would love to",Manele,,Horror/Thriller,...,Traveling to a new continent,Smoking,Male,Stylish/Well-groomed,18–25,Moderately active,1,3,Poor hygiene,Female
freq,2,2,25,55,70,13,94,27,7.0,34,...,41,60,137,61,119,154,115,129,39,134


In [3]:
# Print the hole data
print(df)

          Marcaj de timp      What is your name? How old are you?  \
0    20.03.2025 13:29:18  George-Alexandru Marin               22   
1    20.03.2025 13:29:58                Mariana                21   
2    20.03.2025 13:30:47              Tzio Mping               26   
3    20.03.2025 13:31:24                  Andrei               21   
4    20.03.2025 13:33:50                    Ella               24   
..                   ...                     ...              ...   
511  20.03.2025 14:02:31      Emily Rodriguez MD               38   
512  20.03.2025 14:02:34            Kevin Steele               42   
513  20.03.2025 14:02:37           Kristi Romero               24   
514  20.03.2025 14:02:40           Natalie Smith               24   
515  20.03.2025 14:02:43             Alan Kelley               38   

    How tall are you? What is your occupation?  \
0        170 - 175 cm                       IT   
1        170 - 175 cm                 Student    
2        180 - 185 cm

In [7]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# Incarci modelul pretrained Sentence Transformer
model = SentenceTransformer('all-MiniLM-L6-v2')

# Citesti datele din Google Sheets (ai facut deja asta anterior)
# presupunand ca ai deja dataframe-ul df incarcat:
# df = pd.read_csv('your-google-sheet-export.csv') 
# sau cu Google Sheets API cum ai implementat anterior

# Exemplu concret cu două persoane (din dataframe-ul tau df)
user_1 = df.iloc[0]  # Prima persoana (randul 0)
user_2 = df.iloc[1]  # A doua persoana (randul 1)

# Lista de coloane relevante pentru matching (personalizeaza conform formularului tau)
matching_columns = [
    "What are your main hobbies or interests? (Select all that apply)", "What is your occupation?",
    "Do you have any pets?", "What is your favorite music genre? (Select all that apply)",
    "What type of movie or TV show do you prefer? (Select all that apply)",
    "What type of physical activity do you prefer? (Select all that apply)",
    "Which type of vacation do you prefer? (Select all that apply)", "Which of these are on your bucket list? (Select all that apply)",
    "Which of the following vices would you say you have? (Select all that apply):"
]

# Functie pentru calculul similaritatii între doi utilizatori
def calculate_similarity(user_a, user_b, columns, model):
    scores = []
    for column in columns:
        response_a = str(user_a[column])
        preference_b = str(user_b[column])

        # Creezi embedding-uri pentru fiecare pereche întrebare-răspuns
        embeddings = model.encode([response_a, preference_b])

        # Calculezi similaritatea cosine
        sim = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]
        scores.append(sim)

    # Similaritatea medie pe toate intrebarile
    average_similarity = np.mean(scores)
    return average_similarity

# Calculeaza similaritatea
similarity_score = calculate_similarity(user_1, user_2, matching_columns, model)

# Afiseaza scorul obtinut
print(f"Compatibility Score between '{user_1['What is your name?']}' and '{user_2['What is your name?']}': {similarity_score:.3f}")


Compatibility Score between 'George-Alexandru Marin' and 'Mariana ': 0.724


In [9]:
# Numarul total de utilizatori
num_users = len(df)

# Matrice pentru scorurile de similaritate
similarity_matrix = np.zeros((num_users, num_users))

for i in range(num_users):
    for j in range(num_users):
        if i != j:
            similarity_matrix[i, j] = calculate_similarity(df.iloc[i], df.iloc[j], matching_columns, model)

# Transformă matricea în DataFrame pentru ușurință de vizualizare
similarity_df = pd.DataFrame(similarity_matrix, columns=df['What is your name?'], index=df['What is your name?'])

# Afiseaza primele 5 randuri
print(similarity_df.head())

What is your name?      George-Alexandru Marin  Mariana   Tzio Mping  \
What is your name?                                                     
George-Alexandru Marin                0.000000  0.724466    0.495795   
Mariana                               0.724466  0.000000    0.496185   
Tzio Mping                            0.495795  0.496185    0.000000   
Andrei                                0.355407  0.481997    0.570429   
Ella                                  0.561062  0.486539    0.486007   

What is your name?        Andrei      Ella  Cavescu Dumitru  Ana Lipianu  \
What is your name?                                                         
George-Alexandru Marin  0.355407  0.561062         0.661822     0.453462   
Mariana                 0.481997  0.486539         0.679739     0.554207   
Tzio Mping              0.570429  0.486007         0.673404     0.563575   
Andrei                  0.000000  0.409740         0.549936     0.607871   
Ella                    0.409740  0.000

In [13]:
# Afiseaza cel mai compatibil utilizator pentru fiecare persoana
for i in range(num_users):
    user_name = df.iloc[i]['What is your name?']
    most_compatible_user = similarity_df[user_name].idxmax()
    compatibility_score = similarity_df[user_name].max()
    print(f"Most compatible user for '{user_name}': '{most_compatible_user}' with a compatibility score of {compatibility_score:.3f}")

Most compatible user for 'George-Alexandru Marin': 'Katherine Owens' with a compatibility score of 0.747
Most compatible user for 'Mariana ': 'Gregory Williams' with a compatibility score of 0.727
Most compatible user for 'Tzio Mping': 'Nancy Roberts' with a compatibility score of 0.760
Most compatible user for 'Andrei': 'Alexis Cook' with a compatibility score of 0.705
Most compatible user for 'Ella': 'Denise Smith' with a compatibility score of 0.654
Most compatible user for 'Cavescu Dumitru': 'Cojocaru Andrei Radu ' with a compatibility score of 0.739
Most compatible user for 'Ana Lipianu': 'Thomas Carr' with a compatibility score of 0.690
Most compatible user for 'Chandler Bing': 'Jason Jennings' with a compatibility score of 0.717
Most compatible user for 'Cojocaru Andrei Radu ': 'Cavescu Dumitru' with a compatibility score of 0.739
Most compatible user for 'Mandru Luca ': 'Tony Grimes' with a compatibility score of 0.776
Most compatible user for 'Monica Geller': 'Bonnie Werner' w

TypeError: unsupported format string passed to Series.__format__

In [16]:
# Show the compatibilities for Ana Lipianu with George-Alexandru Marin and Mandru Luca
print(similarity_df['Ana Lipianu']['George-Alexandru Marin'])
print(similarity_df['Ana Lipianu']['Mandru Luca '])

0.45346179604530334
0.5067331790924072


In [24]:
# Calculate compatibility between Ana Lipianu and Stefan Caraenache
compatibility_score = calculate_similarity(df.iloc[6], df.iloc[517], matching_columns, model)
print(f"Compatibility Score between '{df.iloc[6]['What is your name?']}' and '{df.iloc[517]['What is your name?']}': {compatibility_score:.3f}")

# Print compabilities for Ana Lipianu
print(similarity_df['Ana Lipianu'].sort_values(ascending=False))

Compatibility Score between 'Ana Lipianu' and 'Stefan Caraenache': 0.636
What is your name?
Thomas Carr           0.690400
Cynthia Butler        0.655429
Jodi Adkins           0.655209
Jerry Hill            0.648750
Angelica Velasquez    0.644394
                        ...   
Dominic Green         0.287915
Rachel Thompson       0.285889
Angela Marshall       0.279686
Richard Nelson        0.278558
Ana Lipianu           0.000000
Name: Ana Lipianu, Length: 516, dtype: float64


In [38]:
# Calculate ccompatibility for Erin $$$ with everyone and keep the 10 most compatible
compatibilities = []
for i in range(num_users):
    compatibility_score = calculate_similarity(df.iloc[518], df.iloc[i], matching_columns, model)
    compatibilities.append((df.iloc[i]['What is your name?'], compatibility_score))

# Sorteaza compatibilitatile si afiseaza primele 10
compatibilities.sort(key=lambda x: x[1], reverse=True)
print(compatibilities[:10])


# for i in range(num_users):
#     compatibility_score = calculate_similarity(df.iloc[518], df.iloc[i], matching_columns, model)
#     print(f"Compatibility Score between '{df.iloc[518]['What is your name?']}' and '{df.iloc[i]['What is your name?']}': {compatibility_score:.3f}")


[('Richard Burton', 0.7942384), ('Ella', 0.76357436), ('Beth Rose', 0.72024035), ('Jesse Boone', 0.70739704), ('Alia Andrei', 0.69294655), ('Ian Simon', 0.6922599), ('Robert Woodard', 0.6921126), ('Daniel Shah', 0.68855274), ('John Bailey', 0.68717164), ('John Allen', 0.68408614)]


KeyError: 'Erin $$$'