In [1]:
DATAPATH = '../data/'
TRAIN_FNAME = 'embed_train_201906020021.csv'
VALID_FNAME = 'embed_valid_201906020021.csv'
TRAIN_RAW = 'twitgen_train_201906011956.csv'
VALID_RAW = 'twitgen_valid_201906011956.csv'
EMBEDDING_DIM = 512
MODEL_TYPE_ID = 'lrp'
MODEL_TIMESTAMP = '201906021549'

In [2]:
import pandas as pd
import numpy as np
import joblib

In [3]:
train_path = DATAPATH + MODEL_TYPE_ID + '_train_pred_' + MODEL_TIMESTAMP + '.csv'
df_train_pred = pd.read_csv(train_path, index_col=['id','time'], parse_dates=['time'])

valid_path = DATAPATH + MODEL_TYPE_ID + '_valid_pred_' + MODEL_TIMESTAMP + '.csv'
df_valid_pred = pd.read_csv(valid_path, index_col=['id','time'], parse_dates=['time'])

# Tweets with high-confidence predictions

The tweets with the highest female probability are mostly about Poshmark:

In [4]:
df_train_text = pd.read_csv(DATAPATH+TRAIN_RAW, index_col=['id','time'], parse_dates=['time']
                           ).drop(['male'],axis=1).join(df_train_pred).sort_values('pred')
df_train_text.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,text,male,pred
id,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
822945538999984128,2019-05-25 20:54:11+00:00,Check out what I just added to my closet on Po...,False,0.01327
2415931831,2019-05-27 23:43:08+00:00,Check out what I just added to my closet on Po...,False,0.015979
37978225,2019-05-23 00:11:00+00:00,Check out what I just added to my closet on Po...,False,0.016606
2415931831,2019-05-27 03:33:48+00:00,Check out what I just added to my closet on Po...,False,0.017397
1371010783,2019-05-27 03:28:36+00:00,Seeing so happy and playing with makeup makes...,False,0.01776
2415931831,2019-05-27 03:34:14+00:00,Check out what I just added to my closet on Po...,False,0.018193
2415931831,2019-05-27 23:42:50+00:00,Check out what I just added to my closet on Po...,False,0.018272
3251614401,2019-05-27 23:23:59+00:00,Check out what I just added to my closet on Po...,False,0.018317
26642530,2019-05-23 00:04:35+00:00,Check out what I just added to my closet on Po...,False,0.018487
1190127265,2019-05-28 03:45:08+00:00,Check out what I just added to my closet on Po...,False,0.018552


In [5]:
df_train_text.head(10)['text'].values

array(['Check out what I just added to my closet on Poshmark: Social Butterfly BFYHC.  via  ',
       'Check out what I just added to my closet on Poshmark: Free People Gates to the Garden Midi Dress 8 NWOT.… ',
       'Check out what I just added to my closet on Poshmark: Walt Disney Baby Sleeper Size 12 Months.… ',
       'Check out what I just added to my closet on Poshmark: Free People Bridget Stripe Set S Small NWT.… ',
       'Seeing  so happy and playing with makeup makes my heart happy... now I want to play with my makeup this week 💙💙',
       'Check out what I just added to my closet on Poshmark: Free People Abbie Embroidered Set M BNWOT.… ',
       'Check out what I just added to my closet on Poshmark: Free People Hillside Jumper S Small BNWOT.… ',
       'Check out what I just added to my closet on Poshmark: TC leggings Lularoe.  via  ',
       'Check out what I just added to my closet on Poshmark: Victoria’s Secret Dream Angels Lined Demi 34DD NWT.… ',
       'Check out wha

Aside from the Poshmark topic, the tweets with the highest female probability tend to be emotionally expressive.

In [6]:
df_train_text[~df_train_text.text.str.contains('Poshmark')].head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,text,male,pred
id,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1371010783,2019-05-27 03:28:36+00:00,Seeing so happy and playing with makeup makes...,False,0.01776
2675042686,2019-05-24 15:26:40+00:00,Seeing The Vamps meet and greet photos on my t...,False,0.025164
3048307170,2019-05-24 15:30:19+00:00,So good I had to share! Check out all the item...,False,0.026613
704060539,2019-05-23 18:23:17+00:00,I cannot get over how grown many of my babie...,False,0.028862
2351105438,2019-05-27 03:29:03+00:00,I bought an apron that has little strawberries...,False,0.02889
4541535795,2019-05-24 15:45:01+00:00,Yassss!!! Thank you for sharing. 👏👏🏆,False,0.028954
928154424,2019-05-26 07:41:38+00:00,Ugh if I could just never leave my boyfriends ...,False,0.030977
559481203,2019-05-23 00:02:06+00:00,I’m ready to get out this place 😩 I hate looki...,False,0.032194
930865333300318217,2019-05-27 03:13:04+00:00,Gah! Omg this is amazing! Makes me want to m...,False,0.032262
965647417050849280,2019-05-24 18:53:50+00:00,Believe in Pixie Hart is now available to buy ...,False,0.032811


In [7]:
df_train_text[~df_train_text.text.str.contains('Poshmark')].head(10).text.values

array(['Seeing  so happy and playing with makeup makes my heart happy... now I want to play with my makeup this week 💙💙',
       'Seeing The Vamps meet and greet photos on my timeline is genuinely making me sad..... 😭😂 I wanna meet them one day ☹️',
       "So good I had to share! Check out all the items I'm loving on  from @alwaysmorefinds  … ",
       '  I cannot get over how grown many of my babies are!!! Ayodele, Itzel, Caitlin, Sheena,… ',
       'I bought an apron that has little strawberries and I’ve never been so excited! \U0001f970',
       'Yassss!!! Thank you   for sharing. 👏👏🏆',
       'Ugh if I could just never leave my boyfriends arms/side I’d be great. I just love him so much!❤️😫',
       'I’m ready to get out this place 😩 I hate looking at the same people everyday...',
       '  Gah! Omg this is amazing! Makes me want to make something inspired by this! Keep u… ',
       'Believe in Pixie Hart is now available to buy at  🌈😍🌿💗 Lola (despite the serious face) and I are SO

The tweets with the highest male probability are typically associated with sports.

In [8]:
df_train_text.tail(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,text,male,pred
id,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
152321483,2019-05-26 12:36:04+00:00,Now we have football instead. Proof that the ...,True,0.963087
768696526671646720,2019-05-25 20:59:36+00:00,"8.8 tho, according to the match stats.",True,0.9633
1074792761721843712,2019-05-24 18:01:10+00:00,At the same that Betway and West Ham strike a...,True,0.963521
14592289,2019-05-26 07:18:34+00:00,getting a bit tempestuous on the pitch STK v ...,True,0.963861
21166551,2019-05-24 18:48:00+00:00,Almora's diving catch in front of him to end t...,True,0.964009
262215245,2019-05-24 18:47:20+00:00,8' - Millar wins a penalty for a high tackle a...,True,0.965413
423665127,2019-05-24 18:59:24+00:00,I’d give it a go for league minimum. Pro rat...,True,0.965452
335571861,2019-05-21 23:32:22+00:00,Well any manager he played under for a st...,True,0.96676
400009818,2019-05-25 11:13:04+00:00,Offside by 5 yards mate. A seen it,True,0.967377
851151412331520005,2019-05-25 20:59:01+00:00,"mate, gettin the printer out for next season",True,0.970701


In [9]:
df_train_text.tail(10).text.values

array([' Now we have football instead. Proof that the old days were mostly terrible.',
       ' 8.8 tho, according to the match stats.',
       ' At the same that Betway and West Ham strike a record deal.',
       ' getting a bit tempestuous on the pitch STK v CAR end of 3rd...',
       "Almora's diving catch in front of him to end top 2nd (robbing Dietrich of a hit) was a 3-Star grab per Statcast. Co… ",
       "8' - Millar wins a penalty for a high tackle and Walker kicks over halfway. (0-6)",
       '  I’d give it a go for league minimum. Pro rated of course.',
       '     Well any manager he played under for a start… ',
       '      Offside by 5 yards mate. A seen it',
       ' mate, gettin the printer out for next season'], dtype=object)

Similar patterns in the validation data

In [10]:
df_valid_text = pd.read_csv(DATAPATH+VALID_RAW, index_col=['id','time'], parse_dates=['time']
                           ).drop(['male'],axis=1).join(df_valid_pred).sort_values('pred')
df_valid_text.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,text,male,pred
id,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3196880624,2019-05-29 19:02:46+00:00,Check out what I just added to my closet on Po...,False,0.016536
1115489096170901504,2019-05-29 03:56:40+00:00,Check out what I just added to my closet on Po...,False,0.020844
133581522,2019-05-29 03:49:09+00:00,Check out what I just added to my closet on Po...,False,0.021462
101163606,2019-05-29 17:02:33+00:00,Check out what I just added to my closet on Po...,False,0.023454
779647770361724928,2019-05-29 17:26:09+00:00,Check out what I just added to my closet on Po...,False,0.024574
3282622243,2019-05-28 13:00:03+00:00,Check out what I just added to my closet on Po...,False,0.025664
26358918,2019-05-28 13:05:45+00:00,Check out what I just added to my closet on Po...,False,0.025914
465814478,2019-05-29 19:04:21+00:00,Check out what I just added to my closet on Po...,False,0.026221
585239077,2019-05-29 19:04:01+00:00,I’m mad didn’t say not a thing about her bein...,False,0.027279
2909936586,2019-05-29 18:46:35+00:00,Check out what I just added to my closet on Po...,False,0.028876


In [11]:
df_valid_text[~df_valid_text.text.str.contains('Poshmark')].head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,text,male,pred
id,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
585239077,2019-05-29 19:04:01+00:00,I’m mad didn’t say not a thing about her bein...,False,0.027279
2591267360,2019-05-30 03:10:20+00:00,thank you so much omg 🥺💙,False,0.029963
336234924,2019-05-28 12:49:57+00:00,🙌 I’m super excited to be teaching Boho weddin...,False,0.030893
635029166,2019-05-29 15:12:25+00:00,I’m going to have look 😍 I wanted to order on...,False,0.033224
995872262476779520,2019-05-29 17:05:19+00:00,I’m a small makeup artist still trying to put...,False,0.034452
53059640,2019-05-30 03:14:49+00:00,I want all of y’all to glow &amp; to help you ...,False,0.040039
856566359362125825,2019-05-30 03:20:50+00:00,"Face mask, Netflix, and tea.. sooo relaxed 🤩",False,0.040499
1618257992,2019-05-30 02:58:34+00:00,Omg me 😭😭😭,False,0.040562
1138440176,2019-05-29 04:01:39+00:00,Yup. I totally faved that. Even though I don't...,True,0.041261
706614543,2019-05-29 18:55:18+00:00,Me after my bridal spray tan trial 😅,False,0.041825


In [12]:
df_valid_text[~df_valid_text.text.str.contains('Poshmark')].head(10).text.values

array(['I’m mad  didn’t say not a thing about her being in labor until Ava was already here lol. I can’t wait t… ',
       ' thank you so much omg \U0001f97a💙',
       '🙌 I’m super excited to be teaching Boho wedding hair on June 20th and July 18th  for thebridalstylistsworkshops.… ',
       ' I’m going to have look 😍 I wanted to order one from them before Rae was born but never got to it... might have too now 🤗',
       ' I’m a small makeup artist still trying to put together a full makeup kit. I love your products and wou… ',
       'I want all of y’all to glow &amp; to help you achieve your skin goals. \nI do have sessions available. \nBook me. Consult… ',
       'Face mask, Netflix, and tea.. sooo relaxed \U0001f929',
       'Omg me 😭😭😭',
       "Yup. I totally faved that. Even though I don't know that person. Because. Why not? I'm in a faving mood...",
       'Me after my bridal spray tan trial 😅 '], dtype=object)

In [13]:
df_valid_text.tail(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,text,male,pred
id,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
953387960401002497,2019-05-29 03:48:13+00:00,Should be a fun series coming up. The drafted...,False,0.954449
1468513446,2019-05-29 03:22:50+00:00,If my team is good enough to compete for a ...,True,0.954912
227273275,2019-05-29 15:10:11+00:00,If a fight breaks out hold Gary Lineker respon...,False,0.955665
1113162214272319488,2019-05-29 19:02:36+00:00,He played 180 or so games. I was approximat...,True,0.956813
703200433,2019-05-29 19:03:52+00:00,This camera angle looks like the dodgy default...,True,0.957062
146772890,2019-05-29 17:17:19+00:00,check your zeros and then call me mate.,True,0.957895
2326225789,2019-05-29 17:11:56+00:00,Andy Green seems to have something against put...,True,0.961531
353677199,2019-05-29 03:45:10+00:00,The situational hitting is terrible though to...,True,0.965164
344305601,2019-05-29 04:06:07+00:00,"Hey , I can still throw low 80's with pretty ...",True,0.967167
52043141,2019-05-29 17:23:35+00:00,World class players influence big games when ...,True,0.970357


In [14]:
df_valid_text.tail(10).text.values

array(['Should be a fun series coming up. The  drafted Rocco Baldelli in 2000 where his promising career was cut short… ',
       '   If my team is good enough to compete for a chip then adding a pl… ',
       'If a fight breaks out hold Gary Lineker responsible.',
       '   He played 180 or so games. I was approximating\n\n28 homers and 85 RBI is still above average, no?',
       'This camera angle looks like the dodgy default FIFA career mode angle 😷\n\n ',
       '    check your zeros and then call me mate.',
       'Andy Green seems to have something against putting a competitive hitter at the top of the line up',
       ' The situational hitting is terrible though too many strikeouts again and you’re not gonna beat good… ',
       "Hey  , I can still throw low 80's with pretty good control if you're looking for a bullpen arm. Couldn't be much worse.",
       " World class players influence big games when it matters and grealish didn't do that in any of the pla… "],
      dtype=ob

# Feature Importance

In [15]:
model_path = DATAPATH + MODEL_TYPE_ID + '_model_' + MODEL_TIMESTAMP + '.joblib'
model = joblib.load(model_path)

In [16]:
df_train = pd.read_csv(DATAPATH+TRAIN_FNAME, index_col=[0,1], parse_dates=['time'])
X_train = df_train.drop(['male'],axis=1).values / 1e6

In [17]:
pca = model.named_steps['pca']
pcs = pca.transform(X_train)
pcs.max().max(), pcs.min().min()

(0.662978988082227, -0.5454911448664289)

In [18]:
# Get prediction (male probability) associated with each principal component
pca_dim = 50
pc_preds = []
for i in range(pca_dim):
    vector = np.zeros([1,pca_dim])
    vector[0,i] = 1
    pc_preds.append(model.predict_proba(pca.inverse_transform(vector))[0][1])

In [19]:
# Most 'masculine' principal components
pd.Series(pc_preds).nlargest(4)

1     0.930726
9     0.899280
3     0.897400
27    0.860004
dtype: float64

In [20]:
# Most 'feminine' principal components
pd.Series(pc_preds).nsmallest(4)

2     0.156578
13    0.262952
41    0.285052
15    0.286470
dtype: float64

In [21]:
most_masculine_components = pd.Series(pc_preds).nlargest(4).index.values
most_feminine_components = pd.Series(pc_preds).nsmallest(4).index.values

The most 'masculine' component seems to correspond to something like 'argumentativeness':

In [22]:
component_id = most_masculine_components[0]
largest = pd.Series(pca.transform(X_train)[:,component_id]).nlargest(20).index.values
idxs = [df_train.index[b] for b in largest]
print(df_train_text.loc[idxs].text.values, '\n')

['  Sure, but apparently you dont see the other issues that brings up. Such as the measu… '
 ' Except without a compelling reason to do so.'
 'On what "grounds"?  Another NOTHINGBURGER!'
 "  that's such bs! and comes from a spineless complicit person who supports the "
 'And it shouldn’t be avoided for political expediency either.'
 ' What a BS headline. Either you know that and are being dishonest or your incredibly stupid. Or both.\nProbably both.'
 '  Very simplistic. But. Ok.' ' No shit.' '   No shit!'
 ' Bit tricky when he supports it 🤷🏼\u200d♂️'
 '  His claims are simply disingenuous and muddle headedly wrong.'
 '  Not necessarily...' '       To be fair, neither… '
 '“...deplorable...” didn’t work last time.'
 ' And you think these two things are related?'
 "  That's not helpful here, as true as it is."
 "       That's absolutely false… " ' Doesn’t fit their narrative.  POS'
 'your understanding does not see the overall'
 " No they don't.  That's just bs from the MSM."] 



The second most 'masculine' component seems to correspond to something like 'inspirational talk':

In [23]:
component_id = most_masculine_components[1]
largest = pd.Series(pca.transform(X_train)[:,component_id]).nlargest(20).index.values
idxs = [df_train.index[b] for b in largest]
print(df_train_text.loc[idxs].text.values, '\n')

['Dont allow the negative things in your life spoil all of the good things you have, Never stop counting your blessings and being happy!!'
 "Inspirational Thought | Working through whatever pain you're dealing with is part of being a champion! … "
 'Remove yourself from people who treat you like your time doesn’t matter, your feelings are worthless, or like your soul is replaceable.'
 '"Success is getting what you want. Happiness is wanting what you get." -Dale Carnegie  '
 "Life doesn't get easier or more forgiving; we get stronger and more resilient. - Steve Maraboli quote"
 'Dedication is the first but most important step to living the life you want to live.'
 '“Passion is energy. Feel the power that comes from focusing on what excites you” '
 'Enlightenment is just the process of getting over yourself. '
 'The Most Important Trait Shared by Successful Athletes and Entrepreneurs | by davidmeltzer '
 "If you can focus on doing what YOU do best - instead of trying to imitate someone e

The third most 'masculine' component is harder to define. Lots of references to male celebrities.

In [24]:
component_id = most_masculine_components[2]
largest = pd.Series(pca.transform(X_train)[:,component_id]).nlargest(20).index.values
idxs = [df_train.index[b] for b in largest]
print(df_train_text.loc[idxs].text.values, '\n')

['Lmao mans are that shook because of drake???'
 '  That shit was funny asf, Jey just a pussy'
 'that nigga did her like Blake and CP3 did Deandre Jordan'
 '  Wtff lmao I thought he was always on pacers'
 ' And Wayne but he does have a lot of corny bars too'
 ' Ahahaha nah eminem like buck 65 levels of white. Dude keep chapstick in his 5th pocket.'
 '  Had a quick glance at that and thought tyler was at raise the roof tonight, gutter hahahah'
 ' The dude on bulls bench started walking away like he knew that shit was good 😂'
 'Billy the goat needs his own mf show lol'
 'seth rogen really hit the light flex with the gucci ashtray 😭'
 ' Toney woulda been licking his lips watching that game yesterday...'
 ' ELP has You hung in the tree of Whoa but plays to much and takes a german suplex good spot'
 '   That aint R-truth thats K-kwik getting rowdy lmao 😆😁\U0001f92a😜'
 'Taylor tried to do a Jose Canseco, but he’s not quite tall enough. '
 "    he'd give ashley sampi a run for his money leapi

I can't make hide or hair of the 4th most 'masculine' component:

In [25]:
component_id = most_masculine_components[3]
largest = pd.Series(pca.transform(X_train)[:,component_id]).nlargest(20).index.values
idxs = [df_train.index[b] for b in largest]
print(df_train_text.loc[idxs].text.values, '\n')

['  I have never voted for the treacherous bastads NEVER WILL spent my ££ on  &amp; recently '
 'Need 1 for the 1k with   &amp;  dm'
 'Next time I go, DROP OF DOOM IS A MF MUST'
 ' nice man I got a gorgon and a tamamo from one pull'
 ' after such a terrible experience with your roaming pack, we have been stamped with a huge bill of INR 7… '
 'NO DEPOSIT - 10 FREE SPINS on registration + 100% bonus &amp; 100 free spins\n   … '
 ' Lucifer, iron fist, got'
 '  after farming the warrior for days I got this bad boy instead of what I needed is it good? '
 'Yeah necro is pretty cool ' '  Sorry No signet in eldoret signal down'
 ' i got a renegade and will trade u i’ll go first it has ikonik too '
 ' 66. Must be my second marr'
 'TalkTalk data breach customer details found online  '
 "I just earned the 'God Save the Queen (Level 67)' badge on ! "
 '  One of the best albums of all time!'
 ' Okay pls trust me, station 11 is an absolute stone cold banger'
 'Hit LIKE, RETWEET &amp; follow the link

The most 'feminine' component seems to have something to do with emotions, mostly negative emotions, and mostly in an interpersonal context:

In [26]:
component_id = most_feminine_components[0]
largest = pd.Series(pca.transform(X_train)[:,component_id]).nlargest(20).index.values
idxs = [df_train.index[b] for b in largest]
print(df_train_text.loc[idxs].text.values, '\n')

['Like how can you lie to my face alllll the time . You must have no self respect for the things your doing'
 ' I hate that kind of individual. The make life so complicated'
 'But I have that little part of me being like, “do they think I’m a fuckin weirdo”\nThough I know that’s not totally the case.'
 'Hate people like this who say ‘if you actually watched’.\n\nIf you had any idea who you are talking to, you would kno… '
 'I wish I can tell ppl to stop loving me and just go on with their life , is that selfish?🤔😔'
 ' Really frustrates me how someone can be horrible to an animal x'
 'I never give up on people that i love, but if i do, know that you really messed up.'
 'No, that’s immaturity. I might not express I think he’s cute again but you don’t automatically stop liking someone… '
 'I don’t get how someone can meet me and not be in love it don’t be making sense 😔'
 'I mean yea I’m probably angry but don’t ASSUME'
 'I get my mf feelings hurt when I get left on read. YOURE LUCKY I RE

The second most 'feminine' component is hard to define, but it seems to involve a lot of geographic references:

In [27]:
component_id = most_feminine_components[1]
largest = pd.Series(pca.transform(X_train)[:,component_id]).nlargest(20).index.values
idxs = [df_train.index[b] for b in largest]
print(df_train_text.loc[idxs].text.values, '\n')

['People depress me all the time. I expect so much humanity kahit paulit ulit na kong nadisappoint, nasaktan, at naga… '
 " Cheltenham was mostly a disaster for me, so I didn't bother with Aintree and then I watched my fanc… "
 'You know I was wondering for sometime why the level of music in my sweet Barbados has dropped and I now understand.'
 'A thread worth reading, on the residents of . Having grown up and gone to school in Shankill and Killiney, I… '
 ' Holy shit, I’m actually in shock... ALL. THIS. TIME. 🤦🏽\u200d♀️ I wish I saw the Hindi version growing up..… '
 'Jesus thanks  now I AM CRAVING FOR NASI LEMAK ... I DONT EVEN LIKE NASI LEMAK THAT MUCH TO BEGIN WITH?!'
 'When I went to Kasur the first time the children were already tired &amp; harassed by an insensitive media. They desper… '
 'Most of the shows were on during the weekday afternoons for me so I had to watch them cereal-less. We certainly wer… '
 'Belgaum, Karnataka.19 yr old Shivu Uppar was murdered &amp; hanged in B

The third most 'feminine' component. Initially, there's a lot of repetition, so let's try to eliminate that and get a more broadly representative set of tweets.

In [28]:
component_id = most_feminine_components[2]
largest = pd.Series(pca.transform(X_train)[:,component_id]).nlargest(20).index.values
idxs = [df_train.index[b] for b in largest]
print(df_train_text.loc[idxs].text.values, '\n')

[' Hi, BTS UK Army are on a mission to get the London Eye lit purple for the 2 days that BTS will be here.… '
 ' Get stuffed ! Crocodile tears  !'
 'Expressing care and concern can make you feel emotionally awkw... More for Leo '
 'Expressing care and concern can make you feel emotionally awkw... More for Leo '
 'Expressing care and concern can make you feel emotionally awkw... More for Leo '
 'Expressing care and concern can make you feel emotionally awkw... More for Leo '
 'Expressing care and concern can make you feel emotionally awkw... More for Leo '
 'Expressing care and concern can make you feel emotionally awkw... More for Leo '
 'Expressing care and concern can make you feel emotionally awkw... More for Leo '
 'Expressing care and concern can make you feel emotionally awkw... More for Leo '
 'Expressing care and concern can make you feel emotionally awkw... More for Leo '
 'Expressing care and concern can make you feel emotionally awkw... More for Leo '
 'Expressing care and c

A lot of this component is about tears and emotion, but it's hard to characterize generally:

In [29]:
component_id = most_feminine_components[2]
largest = pd.Series(pca.transform(X_train)[:,component_id]).nlargest(34).index.values
idxs = [df_train.index[b] for b in largest]
phrase_to_avoid = 'Expressing care and concern can make you feel emotionally awkw'
mask = ~df_train_text.loc[idxs].text.str.contains(phrase_to_avoid)
print(df_train_text.loc[idxs].text[mask].values, '\n')

[' Hi, BTS UK Army are on a mission to get the London Eye lit purple for the 2 days that BTS will be here.… '
 ' Get stuffed ! Crocodile tears  !' '  Bond, all the way!'
 'Copper the havapookie as a puppy Tote Bag '
 'Russell making the case for Hearts over Diamonds in the wider world.'
 '  That would be football. Basketball accounts for small percentage of tears in young m… '
 ' Rory Stewart the Dr Doolittle of the plant world' 'Fake awards'
 ' Truth Hurts Mr. Green. Is he better than Mike? Or Kobe?'
 'What does one do with their Oakley’s once they’re done rowing? I am open to suggestions'
 'Would you like to donate $1,000 to the Discovery Museum  and get clean, solar energy to power your hom… '
 "Win a Concept 2 Rower and a Year's Supply of O2   "
 'Someone give this dude an Oscar lol'
 ' I was the captain if my House cricket Team at college and Captain of Rifle Shooting Team for 4 years at University.'
 'Our brave veterans serve and then spend a lifetime coping with what they have w

The 4th most 'feminine' component seems to be associated with children/parenthood and with making a living:

In [30]:
component_id = most_feminine_components[3]
largest = pd.Series(pca.transform(X_train)[:,component_id]).nlargest(20).index.values
idxs = [df_train.index[b] for b in largest]
print(df_train_text.loc[idxs].text.values, '\n')

['I really failed as a parent if my children ever become a stripper, “sex worker”,drug dealer, running a celebrity fa… '
 '$LainaPop |  Baby needs to pay off these student loans! Lol'
 ' No  called your son is lazy &amp; was probably GIVEN everything to him in his life😅😅. Now he has to work wah wah wah'
 ' My boy is looking for $450. Just need a proper cleaning job. Tons of life.'
 'I’m sorry but if i was Janelle I’ll leave my husband 😂she was making 6 figures with teen mom'
 '  Former veterinarian should be current prison inmate'
 'Drugmaker Teva to pay $85 million to settle Oklahoma opioid lawsuit '
 "Also please research Chris with Waco quality LLC I feel like it was a scam!! He's got my signature.."
 'Child support'
 'The fact that I pay 720 a month for Ava’s preschool makes me wanna through up'
 ':  Counselor Social Worker Therapist- Coeur d’Alene, ID: Opportunities for Counselor Social Worker… '
 ' I hope he gets more $ to hire a good assistant coach for the offense'
 "'Real Hous