In [85]:
import pymongo
import pandas as pd
import pickle
from data_prep import DataPrep
from model import Model
from sklearn.preprocessing import MinMaxScaler

In [154]:
mc = pymongo.MongoClient()
db = mc['my-facebook-webscrape']
fb_statuses = db['fb-statuses']

In [155]:
names_and_statuses = list(fb_statuses.find({'friends_dict': {'$exists': False}}, {'statuses':1, 'name':1, '_id':0}))

df_dict = {'NAME': [], 'DATE': [], 'STATUS': []}
for entry in names_and_statuses:

    name = entry['name']
    statuses = entry['statuses']
    
    for date, status in statuses.items():
        df_dict['NAME'].append(name)
        df_dict['DATE'].append(date)
        df_dict['STATUS'].append(status)

In [156]:
df = pd.DataFrame(df_dict)
df['STATUS_COUNT'] = df.groupby("NAME")["STATUS"].transform('count')
df

Unnamed: 0,DATE,NAME,STATUS,STATUS_COUNT
0,08/03/2009 9:20pm,Kellian Valenti,I'm done (╯°□°）╯︵ ┻━┻),1
1,05/03/2018 12:17pm,Chen Chi,Kind of healing especially in these tough days...,92
2,04/14/2018 4:03pm,Chen Chi,A rainy week -_-\nDimmed ocean view,92
3,04/13/2018 9:48am,Chen Chi,I'm fine\nSmiley face:) @ Downtown Seattle,92
4,03/03/2018 9:22pm,Chen Chi,I'm fine\nSmiley face:) @ Downtown Seattle,92
5,02/20/2018 12:41pm,Chen Chi,想去的地方终于打了卡，相聚离开都有时候，但我们都走在一条叫做努力的路上.See u when...,92
6,01/12/2018 7:18pm,Chen Chi,Amazing Samsung~,92
7,01/12/2018 7:11pm,Chen Chi,"油腻少女范儿get! @ Chelsea, Manhattan",92
8,01/11/2018 12:47pm,Chen Chi,"油腻少女范儿get! @ Chelsea, Manhattan",92
9,01/08/2018 5:26pm,Chen Chi,板栗烧鸡,92


In [None]:
traits = ['OPN', 'CON', 'EXT', 'AGR', 'NEU']

for trait in traits:
    pkl_model = pickle.load(open(trait + '_model.pkl', "rb"))
    dp = DataPrep()
    X = dp.prep_data('status', trait, regression=True, model_comparison=False)
    trait_scores = pkl_model.predict(X, regression=True)
    df['pred_s'+trait] = trait_scores

In [157]:
traits = ['OPN', 'CON', 'EXT', 'AGR', 'NEU']

for trait in traits:
    pkl_model = pickle.load(open(trait + '_model.pkl', "rb"))
    dp = DataPrep()
    X = df['STATUS']
    
    trait_scores = pkl_model.predict(X, regression=True)
    df['pred_s'+trait] = trait_scores
    
    trait_categories = pkl_model.predict(X, regression=False)
    df['pred_c'+trait] = trait_categories
    
    trait_categories_probs = pkl_model.predict_proba(X, regression=False)
    df['pred_prob_c'+trait] = trait_categories_probs

(3606,)
(3606,)
(3606,)
(3606,)
(3606,)


In [158]:
scaler = MinMaxScaler(feature_range=(0, 100))
scores = df.iloc[:, 3:]
scaled_scores = scaler.fit_transform(scores)

df_scaled = pd.DataFrame(scaled_scores)
df_scaled.columns = df.iloc[:, 3:].columns
df_scaled

for col in df_scaled.columns:
    df[col] = df_scaled[col]

df

Unnamed: 0,DATE,NAME,STATUS,STATUS_COUNT,O_score,C_score,E_score,A_score,N_score
0,08/03/2009 9:20pm,Kellian Valenti,I'm done (╯°□°）╯︵ ┻━┻),0.000000,58.064684,54.787529,54.881022,44.587677,49.716107
1,05/03/2018 12:17pm,Chen Chi,Kind of healing especially in these tough days...,43.750000,60.882939,58.920478,55.993691,60.577252,57.387619
2,04/14/2018 4:03pm,Chen Chi,A rainy week -_-\nDimmed ocean view,43.750000,54.578190,60.079712,61.804998,58.832883,34.034720
3,04/13/2018 9:48am,Chen Chi,I'm fine\nSmiley face:) @ Downtown Seattle,43.750000,93.096989,63.534006,62.966614,56.834888,52.622052
4,03/03/2018 9:22pm,Chen Chi,I'm fine\nSmiley face:) @ Downtown Seattle,43.750000,93.096989,63.534006,62.966614,56.834888,52.622052
5,02/20/2018 12:41pm,Chen Chi,想去的地方终于打了卡，相聚离开都有时候，但我们都走在一条叫做努力的路上.See u when...,43.750000,61.803341,38.730944,55.551661,31.271847,37.509697
6,01/12/2018 7:18pm,Chen Chi,Amazing Samsung~,43.750000,46.779024,68.633007,57.823224,49.260516,41.698991
7,01/12/2018 7:11pm,Chen Chi,"油腻少女范儿get! @ Chelsea, Manhattan",43.750000,58.064684,54.787529,54.881022,44.587677,49.716107
8,01/11/2018 12:47pm,Chen Chi,"油腻少女范儿get! @ Chelsea, Manhattan",43.750000,58.064684,54.787529,54.881022,44.587677,49.716107
9,01/08/2018 5:26pm,Chen Chi,板栗烧鸡,43.750000,58.064684,54.787529,54.881022,44.587677,49.716107


In [159]:
df_mean_scores = df.groupby('NAME')[['pred_sOPN', 'pred_sCON', 'pred_sEXT', 'pred_sAGR', 'pred_sNEU']].mean()

df_mean_scores['status_counts'] = df.groupby('NAME')['STATUS_COUNT'].count()

df_mean_scores.sort_values(by=['status_counts'], ascending=False)

Unnamed: 0_level_0,O_score,C_score,E_score,A_score,N_score,status_counts
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Stanley Cheng,59.851625,53.170666,58.957124,52.587482,39.911536,209
Emmanuel Watkins,58.433604,51.186477,56.103937,49.252100,42.719005,207
Kimberli Cheung Wright,62.067662,52.715522,57.937408,52.885949,40.755140,206
Eddie Ignacio,60.206801,52.251243,53.459414,49.758589,44.454412,205
Bradley Li,60.582556,50.783516,55.759422,52.300250,42.173202,205
William Young,59.694449,49.219614,56.054278,48.971810,45.891016,204
Annelise Yee,60.920195,51.799587,57.648890,51.333612,42.017398,203
Rosanna Cheng,61.060998,51.890701,57.426847,52.793013,41.345358,201
Nick DeJesus,59.437847,49.634673,55.141321,50.290453,45.305583,200
Jonathan Cheung,58.757232,52.370937,58.319671,52.829238,40.785092,119


In [146]:
df.groupby('NAME')['STATUS_COUNT'].count()

NAME
Adam Koehler              105
Adanimo Nrg                94
Adelina Manaut             34
Alana McKnight              2
Alex Chang                 41
Alex Grob                   2
Alexander Robinson          3
Alexis Szigeti              1
Alfred Quaicoe              4
Ally Holtz                  1
Amy Lynn                    1
Andrew Horowitz             2
Andrew Wei                  2
Andy Auyong                 4
Angelica Lee                3
Ankita Janakiraman          1
Annelise Yee              203
Anthony Tran                5
Archie Trajano              4
Asa Grendell                5
Austen Lau                  1
Averiey Cobb                2
Aziz Peregrino-Brimah       7
BK Vo                      17
Barryn Chun                 6
Bomin Wu                    1
Bradley Li                205
Brailey Hirose-Hulbert      2
Brant Ueki                  1
Brent Nagamine              1
                         ... 
Lee Xavier                  5
Lenell L-train Dinkins     11
Leni 

Unnamed: 0,DATE,NAME,STATUS,pred_sO,pred_sC,pred_sE,pred_sA,pred_sN,STATUS_COUNT
0,08/03/2009 9:20pm,Kellian Valenti,I'm done (╯°□°）╯︵ ┻━┻),58.064684,54.787529,54.881022,44.587677,49.716107,1
1,05/03/2018 12:17pm,Chen Chi,Kind of healing especially in these tough days...,60.882939,58.920478,55.993691,60.577252,57.387619,92
2,04/14/2018 4:03pm,Chen Chi,A rainy week -_-\nDimmed ocean view,54.578190,60.079712,61.804998,58.832883,34.034720,92
3,04/13/2018 9:48am,Chen Chi,I'm fine\nSmiley face:) @ Downtown Seattle,93.096989,63.534006,62.966614,56.834888,52.622052,92
4,03/03/2018 9:22pm,Chen Chi,I'm fine\nSmiley face:) @ Downtown Seattle,93.096989,63.534006,62.966614,56.834888,52.622052,92
5,02/20/2018 12:41pm,Chen Chi,想去的地方终于打了卡，相聚离开都有时候，但我们都走在一条叫做努力的路上.See u when...,61.803341,38.730944,55.551661,31.271847,37.509697,92
6,01/12/2018 7:18pm,Chen Chi,Amazing Samsung~,46.779024,68.633007,57.823224,49.260516,41.698991,92
7,01/12/2018 7:11pm,Chen Chi,"油腻少女范儿get! @ Chelsea, Manhattan",58.064684,54.787529,54.881022,44.587677,49.716107,92
8,01/11/2018 12:47pm,Chen Chi,"油腻少女范儿get! @ Chelsea, Manhattan",58.064684,54.787529,54.881022,44.587677,49.716107,92
9,01/08/2018 5:26pm,Chen Chi,板栗烧鸡,58.064684,54.787529,54.881022,44.587677,49.716107,92
