In [1]:
# Recipe - Pattern 2B - Persistent window (multiple reads using an event loop + updates data in window)
# https://www.pysimplegui.org/en/latest/cookbook/

## The link above should contain information about how to refresh the GUI so that the match generated data can be displayed

In [2]:
import PySimpleGUI as sg
import csv
import pickle
import random
import pandas as pd
import numpy as np

In [3]:
# Function to import model from pickel file

def load(filename = "filename.pickle"):
    try:
        with open(filename, "rb") as f:
            return pickle.load(f)
    except FileNotFoundError:
        print("File not found!")

In [4]:
scaler = load("klust_scaler.pickle")
kmeans = load("klust_model.pickle")

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [5]:
# Function to determine user cluster

def determine_user_cluster(kmeans, X_user_scaled):

    predicted_cluster = kmeans.predict(X_user_scaled)

    return predicted_cluster[0]

In [12]:
def distance(row, user):
    result = 0
    for i, v in enumerate(row):
        result += (v - user[i])**2
    return result ** 0.5;

In [6]:
# We open the okcupid dataframe with clusters

cluster_data = pd.read_csv("okcupid_withclusters.csv").drop(columns='Unnamed: 0')

# We import the okcupid dataframe with clusters

X_scaled = pd.read_csv("okcupid_X_scaled.csv").drop(columns='Unnamed: 0')

In [7]:
cluster_data.head()

Unnamed: 0,age,status,sex,orientation,body_type,diet,drinks,drugs,education,ethnicity,...,smokes_type_yes,likes_dogs,likes_cats,dogs_type_no,dogs_type_yes,cats_type_no,cats_type_yes,offspring_type_no,offspring_type_yes,membership
0,22,single,m,straight,curvy,strictly anything,sometimes,,graduate,"asian, white",...,0,yes,yes,0,1,0,1,0,1,10
1,35,single,m,straight,average,mostly other,sometimes,sometimes,graduate,white,...,0,yes,yes,0,1,0,1,0,1,0
2,38,available,m,straight,thin,anything,sometimes,no,graduate,,...,0,no,yes,1,0,0,1,0,0,4
3,23,single,m,straight,thin,vegetarian,sometimes,no,graduate,white,...,0,no,yes,1,0,0,1,1,0,4
4,29,single,m,straight,average,,sometimes,,graduate,"asian, black, other",...,0,yes,yes,0,1,0,1,0,0,5


In [8]:
# Function to convert input user features to the needed format

def add_user_preference_columns(df_user_features):
    """
    Add columns based on user preferences.

    :param df_user_features: pandas DataFrame
    :return: Modified DataFrame
    """
    # Dogs
    if 'dogs_type_yes' in df_user_features.columns:
        df_user_features['dogs_type_no'] = 0
    else:
        df_user_features['dogs_type_yes'] = 0

    # Cats
    if 'cats_type_yes' in df_user_features.columns:
        df_user_features['cats_type_no'] = 0
    else:
        df_user_features['cats_type_yes'] = 0

    #drinks
    if 'drinks_type_no' in df_user_features.columns:
        df_user_features['drinks_type_yes'] = 0
        df_user_features['drinks_type_sometimes'] = 0
    elif 'drinks_type_yes' in df_user_features.columns:
        df_user_features['drinks_type_no'] = 0
        df_user_features['drinks_type_sometimes'] = 0
    else:
        df_user_features['drinks_type_yes'] = 0
        df_user_features['drinks_type_no'] = 0

     #drugs
    if 'drugs_type_no' in df_user_features.columns:
        df_user_features['drugs_type_yes'] = 0
        df_user_features['drugs_type_sometimes'] = 0
    elif 'drugs_type_yes' in df_user_features.columns:
        df_user_features['drugs_type_no'] = 0
        df_user_features['drugs_type_sometimes'] = 0
    else:
        df_user_features['drugs_type_yes'] = 0
        df_user_features['drugs_type_no'] = 0

    #smokes
    if 'smokes_type_no' in df_user_features.columns:
        df_user_features['smokes_type_yes'] = 0
        df_user_features['smokes_type_sometimes'] = 0
    elif 'smokes_type_yes' in df_user_features.columns:
        df_user_features['smokes_type_no'] = 0
        df_user_features['smokes_type_sometimes'] = 0
    else:
        df_user_features['smokes_type_yes'] = 0
        df_user_features['smokes_type_no'] = 0

    return df_user_features

In [9]:
# Creation of a dataframe containing the pictures of future matches

dict_pics = {'ladies':['lady_1.png', 'lady_2.png', 'lady_3.png', 'lady_4.png'],
            'gentlemen':['gentleman_1.png', 'gentleman_2.png', 'gentleman_3.png', 'gentleman_4.png']
            }

pics_df = pd.DataFrame(data = dict_pics)

In [26]:
cluster_data_users

Int64Index([   66,    70,   129,   139,   164,   183,   184,   185,   215,
              220,
            ...
            59557, 59592, 59609, 59614, 59685, 59734, 59856, 59875, 59904,
            59943],
           dtype='int64', length=3111)

In [24]:
X_scaled.dtypes

age                      float64
height                   float64
education_encoded        float64
smokes_type_no           float64
smokes_type_sometimes    float64
smokes_type_yes          float64
drugs_type_never         float64
drugs_type_sometimes     float64
drugs_type_often         float64
body_type_thin           float64
body_type_average        float64
body_type_curvy          float64
drinks_type_no           float64
drinks_type_sometimes    float64
drinks_type_yes          float64
dog_type_No              float64
dog_type_Yes             float64
cat_type_No              float64
cat_type_Yes             float64
dtype: object

In [14]:
# GUI

sg.theme('BrightColors')
sg.set_options(font=('Arial Bold', 16))


# Reading the .csv file with the generated match

filename = 'sample_match.csv' # please, replace with the actual file name from the file that has just been created

with open(filename, "r") as infile: 
            reader = csv.reader(infile)
            header_list = next(reader)
            data = list(reader)


            
# The interface is divided in the following areas areas: col0, col1, col2, col3, and col4


# col0 contains the title

col0 = sg.Column([[sg.Text("KLUST", text_color="#343434", font='Courier 40', pad = (400,0))]])


# col1 contains the user input that will be used to find a match

col1 = sg.Column([
    [sg.Frame('THIS IS ABOUT YOU', [
        [sg.Text(), sg.Column([
            [sg.Text('Age')],
            [sg.Input(key='user_age', enable_events=True, size=(10, 1))],
            [sg.Text('Gender')],
            [sg.Combo(['female', 'male'], default_value='', key='user_sex', enable_events=True)],
            [sg.Text('Orientation')],
            [sg.Combo(['straight', 'gay', 'bisexual'], default_value='', key='user_orientation', enable_events=True)],
            [sg.Text('Education')],
            [sg.Combo(['high school', 'undergraduate', 'graduate', 'postgraduate'], default_value='', key='user_education', enable_events=True)],
            [sg.Text('Do you smoke?')],
            [sg.Combo(['yes', 'no', 'sometimes'], default_value='', key='user_smokes', enable_events=True)],
            [sg.Text('Do you consume alcohol?')],
            [sg.Combo(['yes', 'no', 'sometimes'], default_value='', key='user_drinks', enable_events=True)],
            [sg.Text('Do you take drugs?')],
            [sg.Combo(['yes', 'no', 'sometimes'], default_value='', key='user_drugs', enable_events=True)],
            [sg.Text('Do you want kids?')],
            [sg.Combo(['Yes', 'No'], default_value='', key='user_kids', enable_events=True)],
            [sg.Text('Do you like cats?')],
            [sg.Combo(['yes', 'no'], default_value='', key='user_likes_cats', enable_events=True)],
            [sg.Text('Do you like dogs?')],
            [sg.Combo(['yes', 'no'], default_value='', key='user_likes_dogs', enable_events=True)],
        ], size=(250, 575), pad=(0, 0))]])] ], pad=(0, 0))



# col2 contains the button to gather the data from the user and pass it to a csv file

col2 = sg.Column([[sg.Button('Find Me a Match!', size=(15, 2), pad = (80,200))]],  size=(400, 550))



# col3 contains the AI-generated picture of the match

col3 = sg.Column([
    [
        sg.Frame('TIME TO INTRODUCE YOU TO YOUR MATCH!', [
            
        ])
    ],
    [
        sg.Image('?.png', expand_x=True, expand_y=True, key='-IMAGE-') # this key will be used later on to update the image
    ]
], pad=(0, 0))



# col4 contains general information about the match

col4 = sg.Column([[sg.Frame('INFORMATION ABOUT YOUR MATCH',
                             [[sg.Table(values=data,
                            headings=header_list,
                            max_col_width=25,
                            auto_size_columns=False,
                            justification='center',
                            num_rows=1, size = (3000,825), key='-MATCH_VALUES-') ]], pad=(0, 0), size = (4000,1000) )]])


# Layout of areas col1, col2, col3

layout = [[col0],[col1, col2, col3], [col4]]


# Title, definition and size of the GUI window

window = sg.Window('KLUST. Finding true love through K-means clusters.', layout, size=(1000, 750))



# Recording in a csv file the events (clicks) taking place in the GUI, that is, storing the user input in a csv file

while True:

    event, values = window.read()

    if event == sg.WINDOW_CLOSED:
        break

    
    elif event == 'Find Me a Match!': # When the "Find Me a Match!" button is pressed, the following code is executed

        # We first create a csv file with the user input data, which will be created to query the database
        
        with open('user_input.csv', 'w', newline='') as csvfile:
            
            writer = csv.writer(csvfile)
            
            values_keys = list(values.keys())
            values_keys.pop()
            
            values_values = list(values.values())
            values_values.pop()
            
            writer.writerow(values_keys)
            writer.writerow(values_values)      

        sg.popup('Looking for Love. \n\nPlease, be patient.', title='Important Message For You', font=('Courier', 52))

        



        # We need to transform them into a format compatible with the model
        # Moreover, due to time restrictions, we did not ask the user to input the height nor the body type;
        # for this reason, in this case, we will simply assign an aribitrary value to these two variables
 
        # Retrieve values entered by the user
        
        user_age = int(values["user_age"])
        user_sex = values['user_sex']
        user_orientation = values['user_orientation']
        user_age = values['user_age']
        user_education = values['user_education']
        user_drinks = values['user_drinks']
        user_drugs = values['user_drugs']
        user_smokes = values['user_smokes']
        user_likes_dogs = values['user_likes_dogs']
        user_likes_cats = values['user_likes_cats']



        user_height = 178
        user_body_type = 'average'
        
        
        # We create a dataframe based on the csv file and we add the two columns for height and body type
        # in specific positions; as keeping these positions is important for the clustering
        
        user_input = pd.read_csv("user_input.csv")
        
#####        
        
        # we also have to change "Female" for "f" and "Male" for "m"
        
        if user_input['user_sex'][0] == 'female':
            user_input['user_sex'][0] = 'f'
        elif user_input['user_sex'][0] == 'male':
            user_input['user_sex'][0] = 'm'
        
        
        
        
    
        
        
        
        # Encoding the user features and creating a new dataframe based on them
        
        user_features  = [user_sex, user_orientation, user_age, user_height, user_education , user_drinks, user_drugs, user_smokes,
                    user_body_type, user_likes_dogs, user_likes_cats]


        df_user_features = pd.DataFrame([user_features], columns=['sex', 'orientation','age', 'height','education', 'drinks',
                                                          'drugs', 'smokes', 'body type', 'dogs', 'cats'])


        # User drinks
        drinks_user_encoded = pd.get_dummies(df_user_features['drinks'], prefix='drinks_type')
        df_user_features = pd.concat([df_user_features, drinks_user_encoded], axis=1)
        # User drugs
        drugs_user_encoded = pd.get_dummies(df_user_features['drugs'], prefix='drugs_type')
        df_user_features = pd.concat([df_user_features, drugs_user_encoded], axis=1)
        # User smokes
        smokes_user_encoded = pd.get_dummies(df_user_features['smokes'], prefix='smokes_type')
        df_user_features = pd.concat([df_user_features, smokes_user_encoded], axis=1)
        # User cats
        cats_user_encoded = pd.get_dummies(df_user_features['cats'], prefix='cats_type')
        df_user_features = pd.concat([df_user_features, cats_user_encoded], axis=1)
        # User dogs
        dogs_user_encoded = pd.get_dummies(df_user_features['dogs'], prefix='dogs_type')
        df_user_features = pd.concat([df_user_features, dogs_user_encoded], axis=1)
        # Ordinal encoding of education
        education_mapping = {
            'high school': 0,
            'undergraduate': 1,
            'graduate': 2,
            'postgraduate': 3
        }

        df_user_features['education_encoded'] = df_user_features['education'].map(education_mapping)       
        
        
        # We need to add more columns (all together 19) to be able to fit the model depending on what the user inputed
        
        
        # We call the function add_user_preference_columns
        
        df_user_features = add_user_preference_columns(df_user_features)
        
        
        
        df_user_features["age"] = df_user_features.age.astype(float)
        
        user_numerical_df = df_user_features.select_dtypes(include=[np.number])
        
        
        print(user_numerical_df)
        
        
        # We predict the user cluster
        
        user_features_model = [ 'age','height','education_encoded', 	'smokes_type_no', 	'smokes_type_sometimes', 'smokes_type_yes',
            'drugs_type_no', 'drugs_type_sometimes', 'drugs_type_yes',

             	'drinks_type_no', 	'drinks_type_sometimes', 	'drinks_type_yes',  	'dogs_type_no', 	'dogs_type_yes',
       	'cats_type_no', 	'cats_type_yes']
        
        X_user = user_numerical_df[user_features_model].copy()

       
        X_user_scaled = scaler.transform(X_user)

        user_cluster = determine_user_cluster(kmeans, X_user_scaled)

        
        cluster_data_users = cluster_data[cluster_data['membership'] == user_cluster].index
        
        
        # We calculate the distances with the other members to calculate the highest affinity
        
        features = [ 'age','height','education_encoded',  'smokes_type_no',   'smokes_type_sometimes', 'smokes_type_yes',
            'drugs_type_no', 'drugs_type_sometimes', 'drugs_type_yes',
              'drinks_type_no',   'drinks_type_sometimes',  'drinks_type_yes',    'dogs_type_no',   'dogs_type_yes',
        'cats_type_no',   'cats_type_yes']
        

        distances = X_scaled.loc[cluster_data_users].apply(distance, axis=0, args=(features,)).sort_values()
        
                
        
        # We get only the top match
        
        
        
        top_match = cluster_data.loc[distances.index, features].head(1)
        

        
        # WE NOW NEED TO CONVERT THE TOP_MATCH TO THE DESIRED FORMAT COMPATIBLE WITH THE GUI COLUMNS
        # BEFORE SAVING IT AS CSV

        
        # We save the top match data as a csv file
        
        top_match.to_csv("top_match.csv")
                
        
        
########        
        
        if user_input['user_orientation'].values[0] == 'straight' and user_input['user_sex'].values[0] == 'f':
            wanted_gender = 'm'
        elif user_input['user_orientation'].values[0] == 'straight' and user_input['user_sex'].values[0] == 'm':
            wanted_gender = 'f'
        elif user_input['user_orientation'].values[0] == 'gay' and user_input['user_sex'].values[0] == 'f':
            wanted_gender = 'f'
        elif user_input['user_orientation'].values[0] == 'gay' and user_input['user_sex'].values[0] == 'm':
            wanted_gender = 'm'
        elif user_input['user_orientation'].values[0] == 'bisexual':
            possible_genders = ['f','m']
            wanted_gender = random.choice(possible_genders)
        
        
        
        if wanted_gender == 'm':
            pic_filename = pics_df['gentlemen'].sample().values[0]
            print(pic_filename)
            
        elif wanted_gender == 'f':
            pic_filename = pics_df['ladies'].sample().values[0]
            print(pic_filename)
            

        
        
        
        
        # We now should query our database and look for the best matches and generate a csv with the features of the best match
        
        # The information from the csv will then be prompted in col4 in the GUI
        
        with open("top_match.csv", "r") as infile: 
                    reader = csv.reader(infile)
                    header_list = next(reader)
                    data = list(reader)

        window['-MATCH_VALUES-'].update(values = data)
        
        
        

        # The information from the csv file will then be used to generate an image of the match through DALL-E
        # Given that it was not possible for us to connect to DALL-E due to billing issues,
        # we decided to instead prompt pictures of Angela Merkel

        
    
        new_image = 'pics/' + pic_filename  # replace the image with the image of the generated match
        window['-IMAGE-'].update(filename = new_image)

    
    
        

window.close()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  user_input['user_sex'][0] = 'm'


    age  height  drinks_type_no  drugs_type_no  smokes_type_no  cats_type_no  \
0  23.0     178               1              1               1             1   

   dogs_type_no  education_encoded  dogs_type_yes  cats_type_yes  \
0             1                  2              0              0   

   drinks_type_yes  drinks_type_sometimes  drugs_type_yes  \
0                0                      0               0   

   drugs_type_sometimes  smokes_type_yes  smokes_type_sometimes  
0                     0                0                      0  


TypeError: unsupported operand type(s) for -: 'float' and 'str'

In [None]:
df = pd.read_csv("user_input.csv")

In [None]:
def transform_input():
    df = pd.read_csv("user_input.csv")
    
    user_features  = ['age','height','education_encoded', 
                      'smokes_type_no', 'smokes_type_sometimes', 'smokes_type_yes',
                      'drugs_type_no', 'drugs_type_sometimes', 'drugs_type_yes',
                      'drinks_type_no', 'drinks_type_sometimes', 'drinks_type_yes', 
                      'dogs_type_no', 'dogs_type_yes','cats_type_no', 'cats_type_yes']
    
    input_transformed = pd.DataFrame(columns=user_features)
    
    input_transformed = input_transformed.append(new_row, ignore_index=True)
    
    
    return input_transformed

In [None]:
df = pd.read_csv("user_input.csv")


In [None]:
df

In [None]:
user_input

In [None]:
if user_input['user_sex'][0] == 'female':
    user_input['user_sex'][0] = 'f'
elif user_input['user_sex'][0] == 'male':
    user_input['user_sex'][0] = 'm'
              
          
        
        
if user_input['user_orientation'].values[0] == 'straight' and user_input['user_sex'].values[0] == 'f':
    wanted_gender = 'm'
elif user_input['user_orientation'].values[0] == 'straight' and user_input['user_sex'].values[0] == 'm':
    wanted_gender = 'f'
elif user_input['user_orientation'].values[0] == 'gay' and user_input['user_sex'].values[0] == 'f':
    wanted_gender = 'f'
elif user_input['user_orientation'].values[0] == 'gay' and user_input['user_sex'].values[0] == 'm':
    wanted_gender = 'm'
elif user_input['user_orientation'].values[0] == 'bisexual':
    possible_genders = ['f','m']
    wanted_gender = random.choice(possible_genders)
        
wanted_gender  

In [None]:
df = pd.read_csv("user_input.csv")
    
user_features  = ['age','height','education_encoded', 
                      'smokes_type_no', 'smokes_type_sometimes', 'smokes_type_yes',
                      'drugs_type_no', 'drugs_type_sometimes', 'drugs_type_yes',
                      'drinks_type_no', 'drinks_type_sometimes', 'drinks_type_yes', 
                      'dogs_type_no', 'dogs_type_yes','cats_type_no', 'cats_type_yes']
    
input_transformed = pd.DataFrame(columns=user_features)
    
input_transformed = input_transformed.append(pd.Series(0, index=df.columns), ignore_index=True)