In [86]:
from ibm_watson_creds import apikey
from ibm_watson import PersonalityInsightsV3
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

import json
import pandas as pd
import numpy as np
from collections import defaultdict
import pprint as pp

In [2]:
# set IBM Watson Credentials
url = 'https://api.us-south.personality-insights.watson.cloud.ibm.com/instances/bbb53899-1323-4424-a9c2-82e7a719c287'
version = '2020-01-08'

In [3]:
# set personality insights
authenticator = IAMAuthenticator(apikey)
personality_insights = PersonalityInsightsV3(
    version=version,
    authenticator=authenticator
)

personality_insights.set_service_url(url)

## Apply IBM Watson Personality Insights API to full corpus of text per profile

In [13]:
# read in favorites dataframe (uncprocessed)
df = pd.read_pickle('favorites.pkl')

In [17]:
df.head(1)

Unnamed: 0,favorited_by_id,id,created_at,screen_name,user_id,in_reply_to_status_id,in_reply_to_screen_name,in_reply_to_user_id,favorite_count,retweet_count,text
0,2649540547,1236533635290890240,Sun Mar 08 06:06:06 +0000 2020,AOC,138203134,1.236524e+18,nbcsnl,28221296.0,115728,3550,@nbcsnl ok this is legendary


In [30]:
# group tweets by user_id tweet was favorited by and concatenate all tweets into one string
alltweets_per_user = df.groupby('favorited_by_id')['text'].transform(lambda x: ', '.join(x)).drop_duplicates()

In [42]:
# create a dataframe mapping user_ids to their full text
ids = list(df.favorited_by_id.unique())

tweets_per_user = pd.DataFrame({'user_id': ids, 'all_tweets': alltweets_per_user}, columns=['user_id', 'all_tweets']).reset_index(drop=True)
tweets_per_user.head()

Unnamed: 0,user_id,all_tweets
0,2649540547,"@nbcsnl ok this is legendary, 𝗙𝗹𝗶𝗽𝗽𝗲𝗱 𝘁𝗵𝗲 𝘀𝘄𝗶𝘁..."
1,1094750013304029187,We in the Bay Area should do this too https://...
2,1235337664083222528,"Crack SQL Interviews https://t.co/AJzljKpV2a,..."
3,1333491954,i used to wait for love before i loved back.\n...
4,1219308741369253894,@DishNation I ordered the TrackR that you prom...


In [67]:
def personality_profile(tweets):
    """
    Create personality profile using IBM Watson API given a corpus of tweets.
    ---
    :param tweets: String of concatenated tweets.
    :return: JSON object of the personality profile of the user with which the tweets correspond.
    """
    
    try:
        profile = personality_insights.profile(tweets, accept='application/json').get_result()
        return profile
    except:
        pass

### Test using my own personality profile to structure output of full dataset

In [56]:
# run test on my profile
my_profile = personality_profile(tweets_per_user.iloc[0]['all_tweets'])
pp.pprint(my_profile['personality'])

[{'category': 'personality',
  'children': [{'category': 'personality',
                'name': 'Adventurousness',
                'percentile': 0.5092574278504252,
                'significant': True,
                'trait_id': 'facet_adventurousness'},
               {'category': 'personality',
                'name': 'Artistic interests',
                'percentile': 0.5546090668268516,
                'significant': True,
                'trait_id': 'facet_artistic_interests'},
               {'category': 'personality',
                'name': 'Emotionality',
                'percentile': 0.4681742846520679,
                'significant': True,
                'trait_id': 'facet_emotionality'},
               {'category': 'personality',
                'name': 'Imagination',
                'percentile': 0.7781376603492451,
                'significant': True,
                'trait_id': 'facet_imagination'},
               {'category': 'personality',
                'name': 'Int

### Loop through all user profiles and create personality profiles

In [68]:
all_profiles = []
for tweet in tweets_per_user.all_tweets:
    profile = personality_profile(tweet)
    all_profiles.append(profile)

ERROR:root:The number of words 24 is less than the minimum number of words required for analysis: 100
Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/site-packages/ibm_cloud_sdk_core/base_service.py", line 229, in send
    response.status_code, error_message, http_response=response)
ibm_cloud_sdk_core.api_exception.ApiException: Error: The number of words 24 is less than the minimum number of words required for analysis: 100, Code: 400 , X-global-transaction-id: 8ea0c48bd8578d69b13499daa63a64e4
ERROR:root:The number of words 14 is less than the minimum number of words required for analysis: 100
Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/site-packages/ibm_cloud_sdk_core/base_service.py", line 229, in send
    response.status_code, error_message, http_response=response)
ibm_cloud_sdk_core.api_exception.ApiException: Error: The number of words 14 is less than the minimum number of words required for analysis: 100, Code: 400 , X-global-transacti

### Extract the big 5 personality type percerntiles from each user

In [95]:
# extract personality types from full profiles and create list of dictionaries for each user
personality_percentiles = []
for traits in all_profiles:
    personality_vector = defaultdict(float)
    try:
        for idx, personality_type in enumerate(traits['personality']):
            personality_vector[personality_type['name']] = personality_type['percentile']
    except TypeError:
        for personality_type in ['Openness', 'Conscientiousness', 'Extraversion', 'Agreeableness', 'Emotional range']:
            personality_vector[personality_type] = np.nan
    personality_percentiles.append(personality_vector)

In [99]:
# create a dataframe from the extracted personality types
personality_df = pd.DataFrame(personality_percentiles)
personality_df.head()

Unnamed: 0,Openness,Conscientiousness,Extraversion,Agreeableness,Emotional range
0,0.730236,0.334933,0.411628,0.106015,0.552724
1,0.906036,0.550105,0.648426,0.297694,0.500949
2,0.708852,0.768717,0.294999,0.15403,0.112169
3,0.518856,0.069776,0.637403,0.241616,0.757079
4,0.46632,0.706029,0.187577,0.058168,0.317718


In [101]:
# concatentate the personality types to the original dataframe to map personalities to users
user_personalities = pd.concat([tweets_per_user, personality_df], axis=1)

In [102]:
# export dataframe to pickle file
user_personalities.to_pickle('personalities.pkl')