# Load the female users--including phony names, passwords, etc.--to the AWS Users table

## Import dependencies and load survey data from S3 bucket

In [4]:
import pandas as pd

In [5]:
df = pd.read_csv('https://aws-matchmaker-bucket.s3-us-west-2.amazonaws.com/SpeedDatingPreprocessed.csv')

In [6]:
df.head()

Unnamed: 0,couple_id,f_age,f_imprace,f_attr,f_sinc,f_intel,f_fun,f_amb,f_race,f_intrace,...,m_imprace,m_attr,m_sinc,m_intel,m_fun,m_amb,m_race,samerace,match,m_intrace
0,f1-m11,21.0,2.0,6.0,9.0,7.0,7.0,6.0,4.0,0.0,...,7.0,6.0,8.0,8.0,8.0,8.0,2.0,0,0,0.0
1,f1-m12,21.0,2.0,7.0,8.0,7.0,8.0,5.0,4.0,0.0,...,1.0,7.0,8.0,10.0,7.0,7.0,2.0,0,0,0.0
2,f1-m13,21.0,2.0,5.0,8.0,9.0,8.0,5.0,4.0,2.0,...,3.0,10.0,10.0,10.0,10.0,10.0,4.0,1,1,3.0
3,f1-m14,21.0,2.0,7.0,6.0,8.0,7.0,6.0,4.0,0.0,...,1.0,7.0,8.0,9.0,8.0,9.0,2.0,0,1,0.0
4,f1-m15,21.0,2.0,5.0,6.0,7.0,7.0,6.0,4.0,0.0,...,3.0,8.0,7.0,9.0,6.0,9.0,3.0,0,1,0.0


## Transform data

Convert couple_id back to iid

In [7]:
df['f_iid'] = df['couple_id'].str.split('-').apply(lambda x: x[0]).str.replace('f','')

Use the mean of partner ratings as attributes.

In [8]:
df2 = df[['f_iid','f_age','f_imprace','m_attr','m_sinc','m_intel','m_fun','m_amb','f_race']]
df3 = df2.groupby(['f_iid','f_age','f_imprace','f_race']).agg(['mean']).reset_index()
df3.columns = df3.columns.get_level_values(0)
df3['gender']=0
df3.head()

Unnamed: 0,f_iid,f_age,f_imprace,f_race,m_attr,m_sinc,m_intel,m_fun,m_amb,gender
0,1,21.0,2.0,4.0,6.7,7.4,8.0,7.2,8.0,0
1,10,26.0,4.0,2.0,6.333333,6.888889,6.333333,6.333333,6.111111,0
2,112,20.0,1.0,6.0,7.777778,7.222222,7.333333,6.666667,6.0,0
3,113,21.0,3.0,2.0,6.333333,6.555556,6.888889,6.555556,5.888889,0
4,114,20.0,2.0,1.0,4.888889,6.555556,7.0,5.333333,6.111111,0


In [9]:
df3.rename(columns={'f_iid':'iid',
                           'f_age':'age',
                           'f_imprace':'imprace',
                           'm_attr':'attr',
                           'm_sinc':'sinc',
                           'm_intel':'intel',
                           'm_fun':'fun',
                           'm_amb':'amb',
                           'f_race':'race'},
                           inplace=True)

Import the phony user data and merge it with the survey data.

In [10]:
df4 = pd.read_csv('female_userdata.csv')
df4.dropna(inplace=True)

In [11]:
df5 = pd.merge(df3,df4,how='inner',left_index=True,right_index=True)
df5.head()

Unnamed: 0,iid,age_x,imprace,race,attr,sinc,intel,fun,amb,gender_x,firstname,lastname,password,screenname,gender_y,age_y,email,photo,region
0,1,21.0,2.0,4.0,6.7,7.4,8.0,7.2,8.0,0,Elodie,Christiaens,Christiaens94~$,Elodie197,female,25,elodie94@example.com,https://uinames.com/api/photos/female/20.jpg,Belgium
1,10,26.0,4.0,2.0,6.333333,6.888889,6.333333,6.333333,6.111111,0,Johana,Cardona,Cardona94=*,Johana426,female,25,johana94@example.com,https://uinames.com/api/photos/female/16.jpg,Colombia
2,112,20.0,1.0,6.0,7.777778,7.222222,7.333333,6.666667,6.0,0,Clarisa,Vădineanu,Vădineanu85&@,Clarisa162,female,34,clarisa85@example.com,https://uinames.com/api/photos/female/1.jpg,Romania
3,113,21.0,3.0,2.0,6.333333,6.555556,6.888889,6.555556,5.888889,0,Jorja,Dowling,Dowling87$,Jorja826,female,32,jorja-87@example.com,https://uinames.com/api/photos/female/18.jpg,New Zealand
4,114,20.0,2.0,1.0,4.888889,6.555556,7.0,5.333333,6.111111,0,Ιωάννα,Δυοβουνιώτης,Δυοβουνιώτης87}#,Ιωάννα865,female,32,Ιωάννα-87@example.com,https://uinames.com/api/photos/female/14.jpg,Greece


In [12]:
df5.drop(columns=['gender_y','age_y','region'], inplace=True)

In [13]:
df5.rename(columns={'age_x':'age','gender_x':'gender'},inplace=True)

In [14]:
df5.head()

Unnamed: 0,iid,age,imprace,race,attr,sinc,intel,fun,amb,gender,firstname,lastname,password,screenname,email,photo
0,1,21.0,2.0,4.0,6.7,7.4,8.0,7.2,8.0,0,Elodie,Christiaens,Christiaens94~$,Elodie197,elodie94@example.com,https://uinames.com/api/photos/female/20.jpg
1,10,26.0,4.0,2.0,6.333333,6.888889,6.333333,6.333333,6.111111,0,Johana,Cardona,Cardona94=*,Johana426,johana94@example.com,https://uinames.com/api/photos/female/16.jpg
2,112,20.0,1.0,6.0,7.777778,7.222222,7.333333,6.666667,6.0,0,Clarisa,Vădineanu,Vădineanu85&@,Clarisa162,clarisa85@example.com,https://uinames.com/api/photos/female/1.jpg
3,113,21.0,3.0,2.0,6.333333,6.555556,6.888889,6.555556,5.888889,0,Jorja,Dowling,Dowling87$,Jorja826,jorja-87@example.com,https://uinames.com/api/photos/female/18.jpg
4,114,20.0,2.0,1.0,4.888889,6.555556,7.0,5.333333,6.111111,0,Ιωάννα,Δυοβουνιώτης,Δυοβουνιώτης87}#,Ιωάννα865,Ιωάννα-87@example.com,https://uinames.com/api/photos/female/14.jpg


Load to the AWS RDS db Users table.

In [15]:
from sqlalchemy import create_engine
import config
engine = create_engine(f"postgresql+psycopg2://{config.pguser}:{config.pgpassword}@{config.pghost}:{config.pgport}/{config.pgdatabase}")

In [16]:
df5.to_sql('Users', engine, if_exists='append',index=False)