In [1]:
import pandas as pd
import datetime

from pandas import DataFrame, Series

def countPosts(df, sr):
    return len(df.ix[df["subreddit"] == sr, :])

def firstPosts(df, sr):
    return df.ix[df["subreddit"] == sr, "created_utc"].min()

def lastPosts(df, sr):
    return df.ix[df["subreddit"] == sr, "created_utc"].max()

threshold = 10

In [2]:
reddit = pd.read_csv("fem_mr_comments.csv", usecols=["subreddit", "body", "created_utc", "author"])
reddit = reddit.reset_index()
reddit["created_utc"] = reddit["created_utc"].astype(int)
reddit["author"] = reddit["author"].astype(str)
reddit["subreddit"] = reddit["subreddit"].astype(str)
reddit["created_utc"] = reddit["created_utc"].map(lambda x: datetime.datetime.fromtimestamp(x))

reddit.shape

(3181792, 5)

In [3]:
reddit = reddit.ix[reddit["author"] != "[deleted]", :]
reddit = reddit.ix[reddit["body"] != "[deleted]", :]

reddit.shape

(2555622, 5)

In [4]:
start = datetime.datetime(year=2009,month=1,day=1,hour=0,minute=0,second=0)
end = datetime.datetime(year=2015,month=12,day=31,hour=23,minute=59,second=59)
reddit = reddit.ix[(reddit["created_utc"] >= start) & (reddit["created_utc"] <= end)]

reddit.shape

(2438174, 5)

In [5]:
menCount = reddit.groupby("author").apply(lambda x: countPosts(x, "MensRights"))
femCount = reddit.groupby("author").apply(lambda x: countPosts(x, "Feminism"))
menFirst = reddit.groupby("author").apply(lambda x: firstPosts(x, "MensRights"))
femFirst = reddit.groupby("author").apply(lambda x: firstPosts(x, "Feminism"))

In [7]:
authorInfo = DataFrame({"MenPostCount": menCount,
                        "FemPostCount": femCount,
                        "MenFirstPost": menFirst,
                        "FemFirstPost": femFirst},
                       columns=["MenPostCount", "FemPostCount", "MenFirstPost", "FemFirstPost"])

authorInfo.shape

(125525, 4)

In [8]:
crossposter = authorInfo.ix[(authorInfo["MenPostCount"] >= threshold) 
                            & (authorInfo["FemPostCount"] >= threshold), :]
crossposter.shape

(1035, 4)

In [11]:
crossposter.head()

Unnamed: 0_level_0,MenPostCount,FemPostCount,MenFirstPost,FemFirstPost
author,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0xstev3,167,12,2013-06-13 17:21:32,2012-08-26 16:24:12
1338h4x,310,11,2011-08-12 20:03:47,2011-11-11 23:12:10
2Weird2Live2Rare2Die,74,130,2011-08-11 11:07:09,2011-08-13 06:06:41
2wsy,192,58,2012-01-17 11:08:26,2012-05-09 14:43:05
41145and6,131,10,2012-12-06 16:49:37,2013-01-10 14:39:52


In [9]:
menOnlyAuthor = authorInfo.ix[(authorInfo["FemPostCount"] == 0) & (authorInfo["MenPostCount"] >= threshold), :]

menOnlyAuthor.shape

(16471, 4)

In [12]:
menOnlyAuthor.head()

Unnamed: 0_level_0,MenPostCount,FemPostCount,MenFirstPost,FemFirstPost
author,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
--Visionary--,54,0,2015-11-10 18:58:24,NaT
-4cash,12,0,2014-04-27 08:08:56,NaT
-Argus-,11,0,2015-04-26 08:53:44,NaT
-CK,11,0,2014-09-02 17:31:59,NaT
-Eurydice-,27,0,2013-03-19 12:11:08,NaT


In [10]:
femOnlyAuthor = authorInfo.ix[(authorInfo["MenPostCount"] == 0) & (authorInfo["FemPostCount"] >= threshold), :]

femOnlyAuthor.shape

(1893, 4)

In [13]:
femOnlyAuthor.head()

Unnamed: 0_level_0,MenPostCount,FemPostCount,MenFirstPost,FemFirstPost
author,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
---annon---,0,11,NaT,2014-03-15 18:45:30
-waitingforawant-,0,22,NaT,2014-07-28 11:46:38
000000000000000000oo,0,10,NaT,2015-02-01 23:42:03
004forever,0,17,NaT,2014-12-08 14:36:40
0btusegoose,0,19,NaT,2012-10-14 17:33:59


In [14]:
# crosspost
fromCrossposter = reddit.ix[reddit["author"].isin(crossposter.index), :]
crossposter = crossposter.reset_index()
fromCrossposter = fromCrossposter.merge(crossposter, on="author")

fromCrossposter["type"] = map(lambda x: "M2F" if x else "F2M", list(fromCrossposter["MenFirstPost"] < fromCrossposter["FemFirstPost"]))

menToFem = fromCrossposter[(fromCrossposter["type"] == "M2F") & (fromCrossposter["created_utc"] < fromCrossposter["FemFirstPost"])]
femToMen = fromCrossposter[(fromCrossposter["type"] == "F2M") & (fromCrossposter["created_utc"] < fromCrossposter["MenFirstPost"])]

In [16]:
menToFem.shape

(45791, 10)

In [20]:
menToFem.head()

Unnamed: 0,index,body,author,created_utc,subreddit,MenPostCount,FemPostCount,MenFirstPost,FemFirstPost,type
0,8748,"Yeah. The guy is a ""womanizer"" who ""Brittney t...",kragshot,2009-01-01 03:41:03,MensRights,3251,245,2009-01-01 03:41:03,2009-12-09 18:16:50,M2F
1,9101,"So, what if she waits for two to five years be...",kragshot,2009-01-09 03:04:46,MensRights,3251,245,2009-01-01 03:41:03,2009-12-09 18:16:50,M2F
2,9102,This is how you get around that.\r\n\r\nIn the...,kragshot,2009-01-09 03:19:46,MensRights,3251,245,2009-01-01 03:41:03,2009-12-09 18:16:50,M2F
3,9103,Especially when that there are countless sourc...,kragshot,2009-01-09 03:22:32,MensRights,3251,245,2009-01-01 03:41:03,2009-12-09 18:16:50,M2F
4,9613,"Specifically, read the statements in their ""Ge...",kragshot,2009-01-15 14:18:24,MensRights,3251,245,2009-01-01 03:41:03,2009-12-09 18:16:50,M2F


In [19]:
femToMen.shape

(5615, 10)

In [21]:
femToMen.head()

Unnamed: 0,index,body,author,created_utc,subreddit,MenPostCount,FemPostCount,MenFirstPost,FemFirstPost,type
31262,19225,"And we don't live as long, which is a bit of a...",jay76,2009-06-13 06:10:42,Feminism,216,64,2009-07-02 04:15:24,2009-06-13 06:10:42,F2M
48954,42311,Are they saying that being slutty should be en...,hlkolaya,2009-11-16 13:52:21,Feminism,23,44,2010-08-30 10:24:55,2009-11-16 13:52:21,F2M
48955,42318,I think Rygarb there is missing the point- a c...,hlkolaya,2009-11-16 14:25:10,Feminism,23,44,2010-08-30 10:24:55,2009-11-16 13:52:21,F2M
48956,42340,yes it is. ask any psychologist or therapist a...,hlkolaya,2009-11-16 16:25:55,Feminism,23,44,2010-08-30 10:24:55,2009-11-16 13:52:21,F2M
48957,42343,this is great... but... as the mother of a boy...,hlkolaya,2009-11-16 16:37:47,Feminism,23,44,2010-08-30 10:24:55,2009-11-16 13:52:21,F2M


In [22]:
# MensRights only
fromMenOnly = reddit.ix[reddit["author"].isin(menOnlyAuthor.index), :]
menOnlyAuthor = menOnlyAuthor.reset_index()
fromMenOnly = fromMenOnly.merge(menOnlyAuthor, on="author")

fromMenOnly.shape

(1161078, 9)

In [23]:
fromMenOnly.head()

Unnamed: 0,index,body,author,created_utc,subreddit,MenPostCount,FemPostCount,MenFirstPost,FemFirstPost
0,8746,"Wow, I've never actually considered this... G...",catdad,2009-01-01 03:37:40,MensRights,14,0,2009-01-01 03:37:40,NaT
1,8747,The sad thing is that feminists won't have any...,catdad,2009-01-01 03:40:34,MensRights,14,0,2009-01-01 03:37:40,NaT
2,24292,Anything entering ANYONE'S ass without consent...,catdad,2009-07-30 23:04:47,MensRights,14,0,2009-01-01 03:37:40,NaT
3,32263,I must be missing it too...,catdad,2009-09-15 21:00:30,MensRights,14,0,2009-01-01 03:37:40,NaT
4,47780,As far as the 6% of rapists will ever see a da...,catdad,2009-12-18 12:21:01,MensRights,14,0,2009-01-01 03:37:40,NaT


In [24]:
# Feminism only
fromFemOnly = reddit.ix[reddit["author"].isin(femOnlyAuthor.index), :]
femOnlyAuthor = femOnlyAuthor.reset_index()
fromFemOnly = fromFemOnly.merge(femOnlyAuthor, on="author")

fromFemOnly.shape

(55515, 9)

In [25]:
fromFemOnly.head()

Unnamed: 0,index,body,author,created_utc,subreddit,MenPostCount,FemPostCount,MenFirstPost,FemFirstPost
0,16531,Least appropriate thumbnail ever.,alexs,2009-04-30 12:10:31,Feminism,0,21,NaT,2009-04-30 12:10:31
1,79435,&gt; Does it say that women are not as equal t...,alexs,2010-05-26 06:28:08,Feminism,0,21,NaT,2009-04-30 12:10:31
2,79438,Obviously yes. It would also be better the bia...,alexs,2010-05-26 06:43:01,Feminism,0,21,NaT,2009-04-30 12:10:31
3,79443,There is room for more than one approach. Bala...,alexs,2010-05-26 07:03:20,Feminism,0,21,NaT,2009-04-30 12:10:31
4,82003,The same way we know that there's a bias again...,alexs,2010-06-06 19:00:20,Feminism,0,21,NaT,2009-04-30 12:10:31
