# Library

In [1]:
import pandas as pd

In [2]:
import datetime as dt

# Function

In [3]:
def bot_date(year,month,day,duration):
    after = int((dt.datetime(year=year,month=month,day=day)-dt.timedelta(days = duration)).timestamp())
    before = int((dt.datetime(year=year,month=month,day=day)+dt.timedelta(days = duration)).timestamp())
    return after,before 

In [22]:
#Choose comments within 60 days of bot implementation

def within_60days(df,year,month,day):
    after, before, impl_date = bot_date(year=year, month = month, day = day, duration=30)
    df = df[(df['created_utc'] < before) & (df['created_utc'] > after)] #need to check again
    return df

In [46]:
def comm_report(df):
    #print no of comments
    print(f'This df has {len(df)} comments.')

    #print no of del comments
    num_del = len(df[(df['body'] == '[deleted]') | (df['body'] == '[removed]')])
    print(f'{num_del} comments were deleted/removed.')

    #print comments by AutoMod
    num_automod = len(df[df['author'] =='AutoModerator'])
    print(f'Automod posted {num_automod} comments.')

In [24]:
def subm_report(df):
    #print num of sub
    print(f'This df has {len(df)} submissions.')

    #print no of del submissions
    num_del = len((df['selftext'] == '[deleted]') | (df['selftext'] == '[removed]'))
    print(f'{num_del} comments were deleted or removed.')

    #print subm by AutoMod
    num_automod = len(df['author'] !='AutoModerator')
    print(f'Automod posted {num_automod} comments.')

    #print subm is meme
    num_meme = len(df[(df['domain'] == 'i.redd.it') | (df['domain'] == 'i.imgur.com') |  (df['domain'] == 'imgur.com')]) 
    print(f'{num_meme} submissions are memes.')

In [30]:
def clean_comments(source_dir,save_dir,year,month,day):
    df = pd.read_csv(source_dir)

    #read files and choose relevant vars
    df = df.reindex(columns = ['body','author','created_utc','retrieved_on','permalink','parent_id','subreddit','score'])
    df = df[['body','author','created_utc','retrieved_on','permalink','parent_id','subreddit','score']]

    #filter comments within 60 days of bot implementation
    within_60days(df,year,month,day)

    #print numbers
    comm_report(df)

    #filter out deleted and removed comments
    df = df[(df['body'] != '[deleted]') & (df['body'] != '[removed]') & (df['author'] !='AutoModerator')]

    #change epoch time to human time
    df['created_utc'] = pd.to_datetime(df['created_utc'], unit='s')
    df['retrieved_on'] = pd.to_datetime(df['retrieved_on'], unit='s')

    #write csv
    df.to_csv(save_dir,encoding = 'utf-8-sig')

    return df


In [8]:
def clean_subm(source_dir,save_dir,year,month,day):
    df = pd.read_csv(source_dir)

    #read files and choose relevant vars
    df = df[['author','author_flair_text','created_utc','retrieved_on','domain','full_link','id','is_reddit_media_domain','permalink','is_video','locked','num_comments','subreddit','subreddit_id','score','selftext','subreddit_subscribers','title','total_awards_received','updated_utc','removed_by','poll_data']]

    #filter comments within 60 days of bot implementation
    within_60days(df,save_dir,year,month,day)

    #print numbers
    subm_report(df)

    #filter out deleted and removed comments
    df = df[((df['body'] != '[deleted]') | (df['body'] != '[removed]') | (df['selftext'].astype(bool)))& (df['author'] !='AutoModerator') & ((df['domain'] == 'i.redd.it') | (df['domain'] == 'i.imgur.com') |  (df['domain'] == 'imgur.com'))]

    #change epoch time to human time
    df['created_utc'] = pd.to_datetime(df['created_utc'], unit='s')
    df['retrieved_on'] = pd.to_datetime(df['retrieved_on'], unit='s')
    df['updated_on'] = pd.to_datetime(df['updated_on'], unit='s')

    #write csv
    df.to_csv(save_dir,encoding = 'utf-8-sig')

    return df

# Apply to subr

## r/exfds
Nov 23 2020

In [47]:
clean_comments(source_dir = './data/exfds/exfds_comments.csv', save_dir = './data/exfds/test.csv', year = 2020, month = 11, day =23)

This df has 1156 comments.
43 comments were deleted/removed.
Automod posted 0 comments.


Unnamed: 0,body,author,created_utc,retrieved_on,permalink,parent_id,subreddit,score
0,I'm on the cusp of considering leaving it. I d...,clover921000,2020-09-11 21:45:38,2020-09-12 00:02:30,/r/exfds/comments/hkc6hx/aside_from_the_hate_t...,t3_hkc6hx,exfds,1
2,One thing I find absolutely ridiculous is that...,suberEE,2020-09-09 10:51:02,2020-09-09 10:51:12,/r/exfds/comments/iozfzk/the_fds_handbook_or_w...,t3_iozfzk,exfds,1
3,"I don't know, the rules keep changing accordin...",phantom_0007,2020-09-09 07:25:24,2020-09-09 07:25:34,/r/exfds/comments/iozfzk/the_fds_handbook_or_w...,t1_g4iycj5,exfds,1
4,well i just discovered a month ago that i'm ge...,phantom_0007,2020-09-09 07:21:06,2020-09-09 07:21:16,/r/exfds/comments/hg39so/random_butthurt_fds_u...,t1_fwhg79x,exfds,1
5,"Wait, I thought they recommended not to sleep ...",towapa,2020-09-09 07:16:54,2020-09-09 07:17:05,/r/exfds/comments/iozfzk/the_fds_handbook_or_w...,t1_g4ivhqi,exfds,1
...,...,...,...,...,...,...,...,...
1150,I was banned in my first week or two for posti...,thetruthishere_,2020-11-07 14:21:04,2020-11-07 14:21:15,/r/exfds/comments/jpr8at/im_an_ex_fdser_i_want...,t3_jpr8at,exfds,1
1151,Oh hey girly! I remember seeing you in the FDS...,IrritatedMango,2020-11-07 14:09:18,2020-11-07 14:09:29,/r/exfds/comments/jpr8at/im_an_ex_fdser_i_want...,t3_jpr8at,exfds,1
1153,So not getting laid is an excuse to be misogyn...,phantom_0007,2020-11-07 04:08:53,2020-11-07 05:39:30,/r/exfds/comments/j02cqr/god_damn_she_went_ful...,t1_gbeiek3,exfds,1
1154,Could someone explain to me what TERF means.,R3s0und3r,2020-11-07 00:47:20,2020-11-07 02:47:13,/r/exfds/comments/jl8qp6/fds_blocks_the_word_t...,t3_jl8qp6,exfds,1


In [37]:
df_test=pd.read_csv('./data/exfds/exfds_comments.csv')

In [39]:
len(df_test[df_test['author'] == 'AutoModerator'])

0

In [43]:
len(df_test[df_test['body'] == '[deleted]'])

24

# Manual as reference

### After comments

In [3]:
fds_comments_after = pd.read_csv('./data/fds_comments_after.csv')

  fds_comments_after = pd.read_csv('fds_comments_after.csv')


In [6]:
#Only choose what relevant
fds_comments_after_df = fds_comments_after[['body','author','created_utc','retrieved_on','permalink','parent_id','subreddit','score']]

In [7]:
#Change epoch time to human time 
fds_comments_after_df['created_utc'] = pd.to_datetime(fds_comments_after_df['created_utc'], unit='s')
fds_comments_after_df['retrieved_on'] = pd.to_datetime(fds_comments_after_df['retrieved_on'], unit='s')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fds_comments_after_df['created_utc'] = pd.to_datetime(fds_comments_after_df['created_utc'], unit='s')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fds_comments_after_df['retrieved_on'] = pd.to_datetime(fds_comments_after_df['retrieved_on'], unit='s')


In [9]:
#Get month (Oct and Nov)
# Two conditions, to check if data is what I want
fds_comments_after_df[(fds_comments_after_df['created_utc'].dt.month == 10) & (fds_comments_after_df['created_utc'].dt.day == 28)]

Unnamed: 0,body,author,created_utc,retrieved_on,permalink,parent_id,subreddit,score
14995,[removed],[deleted],2019-10-28 23:33:05,2019-10-29 01:27:28,/r/FemaleDatingStrategy/comments/dm5epg/heres_...,t3_dm5epg,FemaleDatingStrategy,1
14996,[removed],[deleted],2019-10-28 23:31:00,2019-10-29 01:25:54,/r/FemaleDatingStrategy/comments/do8wn7/your_w...,t1_f5kwoh5,FemaleDatingStrategy,1
14997,Reminder that this sub is **FEMALE ONLY**. All...,AutoModerator,2019-10-28 23:30:24,2019-10-29 01:25:28,/r/FemaleDatingStrategy/comments/dohevr/just_r...,t3_dohevr,FemaleDatingStrategy,1
14998,Reminder that this sub is **FEMALE ONLY**. All...,AutoModerator,2019-10-28 23:28:58,2019-10-29 01:24:20,/r/FemaleDatingStrategy/comments/dohe7x/shout_...,t3_dohe7x,FemaleDatingStrategy,1
14999,"This is my biggest struggle right now, but it'...",LittleBoBooBoo,2019-10-28 23:28:49,2019-10-29 01:24:12,/r/FemaleDatingStrategy/comments/do375c/the_im...,t3_do375c,FemaleDatingStrategy,1
...,...,...,...,...,...,...,...,...
100934,Most men want to waste your time and use you f...,throwawayy92838383,2019-10-28 16:44:54,2019-10-28 16:49:43,/r/FemaleDatingStrategy/comments/doa03p/a_remi...,t1_f5lkx90,FemaleDatingStrategy,1
100935,Yep it’s the true 50/50✨,dispositiondesxyz,2019-10-28 16:42:44,2019-10-28 16:47:22,/r/FemaleDatingStrategy/comments/dnwd12/look_a...,t1_f5jk3mo,FemaleDatingStrategy,1
100936,Welcome!,TheOGJammies,2019-10-28 16:40:57,2019-10-28 16:45:34,/r/FemaleDatingStrategy/comments/dob9vz/i_just...,t3_dob9vz,FemaleDatingStrategy,1
100937,"Oh wow, tunics look so cute! Are there any sh...",Meredeen,2019-10-28 16:40:41,2019-10-28 16:45:19,/r/FemaleDatingStrategy/comments/do2s08/doc_ma...,t1_f5lmivz,FemaleDatingStrategy,1


In [10]:
#Get month (Oct and Nov) - 1 mo after bot; Dec - 2 mo after bot; Jan - 3 mo after bot
fds_comments_after_1mo_df = fds_comments_after_df[(fds_comments_after_df['created_utc'].dt.month == 10) | (fds_comments_after_df['created_utc'].dt.month == 11)]
fds_comments_after_2mo_df = fds_comments_after_df[fds_comments_after_df['created_utc'].dt.month == 12]
fds_comments_after_3mo_df = fds_comments_after_df[fds_comments_after_df['created_utc'].dt.month == 1]

In [12]:
#Save datasets
fds_comments_after_1mo_df.to_csv('fds_comments_after_1mo.csv')
fds_comments_after_2mo_df.to_csv('fds_comments_after_2mo.csv')
fds_comments_after_3mo_df.to_csv('fds_comments_after_3mo.csv')

### Before comments
Replicate the steps above

In [24]:
fds_comments_before = pd.read_csv('fds_comments_before.csv')

Unnamed: 0,all_awardings,author,author_flair_background_color,author_flair_css_class,author_flair_richtext,author_flair_template_id,author_flair_text,author_flair_text_color,author_flair_type,author_fullname,...,stickied,subreddit,subreddit_id,total_awards_received,author_cakeday,edited,steward_reports,distinguished,awarders,associated_award
0,[],CassidyMain,,,[],,,,text,t2_3nrlnhus,...,False,FemaleDatingStrategy,t5_xaiot,0,,,,,,
1,[],CassidyMain,,,[],,,,text,t2_3nrlnhus,...,False,FemaleDatingStrategy,t5_xaiot,0,,,,,,
2,[],CassidyMain,,,[],,,,text,t2_3nrlnhus,...,False,FemaleDatingStrategy,t5_xaiot,0,,,,,,
3,[],gimmethejugs3434,,,[],,,,text,t2_3odknxtj,...,False,FemaleDatingStrategy,t5_xaiot,0,,,,,,
4,[],gimmethejugs3434,,,[],,,,text,t2_3odknxtj,...,False,FemaleDatingStrategy,t5_xaiot,0,,,,,,


In [25]:
fds_comments_before_df = fds_comments_before[['body','author','created_utc','retrieved_on','permalink','parent_id','subreddit','score']]

In [28]:
#Change epoch time to human time 
fds_comments_before_df['created_utc'] = pd.to_datetime(fds_comments_before_df['created_utc'], unit='s')
fds_comments_before_df['retrieved_on'] = pd.to_datetime(fds_comments_before_df['retrieved_on'], unit='s')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fds_comments_before_df['created_utc'] = pd.to_datetime(fds_comments_before_df['created_utc'], unit='s')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fds_comments_before_df['retrieved_on'] = pd.to_datetime(fds_comments_before_df['retrieved_on'], unit='s')


In [32]:
#Check if the date range is what I want
fds_comments_before_df[(fds_comments_before_df['created_utc'].dt.month == 10) & (fds_comments_before_df['created_utc'].dt.day == 27)]

Unnamed: 0,body,author,created_utc,retrieved_on,permalink,parent_id,subreddit,score
900,Reminder that this sub is **FEMALE ONLY**. All...,AutoModerator,2019-10-27 11:59:01,2019-10-27 11:59:13,/r/FemaleDatingStrategy/comments/dnsay0/id_nev...,t3_dnsay0,FemaleDatingStrategy,1
901,Save it for someone who gives a shit. Enjoy th...,_HEDONISM_BOT,2019-10-27 11:58:02,2019-10-27 11:58:13,/r/FemaleDatingStrategy/comments/dnrkad/daily_...,t1_f5f04sg,FemaleDatingStrategy,1
902,Did he really come of as a douche though?,L8zin,2019-10-27 11:57:39,2019-10-27 11:57:50,/r/FemaleDatingStrategy/comments/d9xkqr/this_s...,t1_f49g5ax,FemaleDatingStrategy,1
903,"We are all ""nice girls"" on this sub, y u b fuc...",Genghis-Gas,2019-10-27 11:57:27,2019-10-27 11:57:38,/r/FemaleDatingStrategy/comments/dns9ar/ladies...,t3_dns9ar,FemaleDatingStrategy,1
904,Right? This sub is horrifying.,Derpizzle,2019-10-27 11:56:47,2019-10-27 11:57:12,/r/FemaleDatingStrategy/comments/dju7ep/time_t...,t1_f5emwmi,FemaleDatingStrategy,1
...,...,...,...,...,...,...,...,...
22612,[removed],[deleted],2019-10-27 04:19:42,2019-10-27 04:19:53,/r/FemaleDatingStrategy/comments/d2lg9t/girl_g...,t1_f0i7vw5,FemaleDatingStrategy,1
22613,Thanks for the quick response! Will do :),Jenn_There_Done_That,2019-10-27 04:18:56,2019-10-27 04:19:07,/r/FemaleDatingStrategy/comments/dngfc2/amen/f...,t1_f5d4dv2,FemaleDatingStrategy,1
22614,Damn right! Know your worth!!,sweatydeath,2019-10-27 04:18:51,2019-10-27 04:19:02,/r/FemaleDatingStrategy/comments/dnnboy/former...,t1_f5cxky6,FemaleDatingStrategy,1
22615,Believe me some guys actually try to use post-...,sweatydeath,2019-10-27 04:18:17,2019-10-27 04:18:29,/r/FemaleDatingStrategy/comments/dnkvm9/ditchi...,t1_f5cyiry,FemaleDatingStrategy,1


In [33]:
#Get month
fds_comments_before_1mo_df = fds_comments_after_df[fds_comments_after_df['created_utc'].dt.month == 10]
fds_comments_before_2mo_df = fds_comments_after_df[fds_comments_after_df['created_utc'].dt.month == 9]
fds_comments_before_3mo_df = fds_comments_after_df[fds_comments_after_df['created_utc'].dt.month == 8]
fds_comments_before_4mo_df = fds_comments_after_df[fds_comments_after_df['created_utc'].dt.month == 7]

In [34]:
#Save datasets
fds_comments_before_1mo_df.to_csv('fds_comments_before_1mo.csv')
fds_comments_before_2mo_df.to_csv('fds_comments_before_2mo.csv')
fds_comments_before_3mo_df.to_csv('fds_comments_before_3mo.csv')
fds_comments_before_4mo_df.to_csv('fds_comments_before_4mo.csv')

### Submission Before

In [5]:
fds_sub_b4 = pd.read_csv('./data/fds_submissions_before.csv')

  fds_sub_b4 = pd.read_csv('./data/fds_submissions_before.csv')


In [13]:
fds_sub_b4_df = fds_sub_b4[['author','author_flair_text','created_utc','retrieved_on','domain','full_link','id','is_reddit_media_domain','permalink','is_video','locked','num_comments','subreddit','subreddit_id','score','selftext','subreddit_subscribers','title','total_awards_received','updated_utc','removed_by','poll_data']]

In [15]:
#Change to human date
fds_sub_b4_df['created_utc'] = pd.to_datetime(fds_sub_b4_df['created_utc'], unit='s')
fds_sub_b4_df['retrieved_on'] = pd.to_datetime(fds_sub_b4_df['retrieved_on'], unit='s')
fds_sub_b4_df['updated_utc'] = pd.to_datetime(fds_sub_b4_df['updated_utc'], unit='s')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fds_sub_b4_df['created_utc'] = pd.to_datetime(fds_sub_b4_df['created_utc'], unit='s')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fds_sub_b4_df['retrieved_on'] = pd.to_datetime(fds_sub_b4_df['retrieved_on'], unit='s')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fds_sub_b4_df['updated_utc'] = 

In [18]:
#Check if the date range is what I want
fds_sub_b4_df[(fds_sub_b4_df['created_utc'].dt.month == 7) & (fds_sub_b4_df['created_utc'].dt.day == 27)].head()

Unnamed: 0,author,author_flair_text,created_utc,retrieved_on,domain,full_link,id,is_reddit_media_domain,permalink,is_video,...,subreddit,subreddit_id,score,selftext,subreddit_subscribers,title,total_awards_received,updated_utc,removed_by,poll_data
699,thatlonghairedbitch,,2020-07-27 21:20:35,2020-07-27 21:20:45,self.FemaleDatingStrategy,https://www.reddit.com/r/FemaleDatingStrategy/...,hz15is,False,/r/FemaleDatingStrategy/comments/hz15is/had_my...,False,...,FemaleDatingStrategy,t5_xaiot,1,[removed],86190,Had my first threesome,0,NaT,,
700,greatflo,FDS Newbie,2020-07-27 21:06:18,2020-07-27 21:06:30,i.redd.it,https://www.reddit.com/r/FemaleDatingStrategy/...,hz0vt5,True,/r/FemaleDatingStrategy/comments/hz0vt5/dumped...,False,...,FemaleDatingStrategy,t5_xaiot,1,,86185,"Dumped a LVM for the first time, cut off all m...",0,NaT,,
701,Cucharamama,FDS Newbie,2020-07-27 20:54:55,2020-07-27 20:55:06,self.FemaleDatingStrategy,https://www.reddit.com/r/FemaleDatingStrategy/...,hz0o0g,False,/r/FemaleDatingStrategy/comments/hz0o0g/is_it_...,False,...,FemaleDatingStrategy,t5_xaiot,1,[removed],86182,Is it a red flag when guys on OLD ask to hango...,0,NaT,,
702,bonenecklace,FDS Newbie,2020-07-27 20:50:52,2020-07-27 20:51:04,i.redd.it,https://www.reddit.com/r/FemaleDatingStrategy/...,hz0lbc,True,/r/FemaleDatingStrategy/comments/hz0lbc/zero_t...,False,...,FemaleDatingStrategy,t5_xaiot,1,,86182,Zero to one hundred first thing in the morning...,0,NaT,,
703,dior-not-war,FDS Newbie,2020-07-27 20:37:51,2020-07-27 20:38:02,i.redd.it,https://www.reddit.com/r/FemaleDatingStrategy/...,hz0cn4,True,/r/FemaleDatingStrategy/comments/hz0cn4/always...,False,...,FemaleDatingStrategy,t5_xaiot,1,,86178,Always block them so they don’t even get the c...,0,NaT,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29732,StressedMacaroon25,FDS Newbie,2020-07-27 21:48:59,2020-07-27 21:49:10,self.FemaleDatingStrategy,https://www.reddit.com/r/FemaleDatingStrategy/...,hz1okt,False,/r/FemaleDatingStrategy/comments/hz1okt/ladies...,False,...,FemaleDatingStrategy,t5_xaiot,1,[removed],86200,"Ladies, please help me to understand if he's h...",0,NaT,,
29733,kindthoughtsandwords,FDS Newbie,2020-07-27 21:48:53,2020-07-27 21:49:04,self.FemaleDatingStrategy,https://www.reddit.com/r/FemaleDatingStrategy/...,hz1oia,False,/r/FemaleDatingStrategy/comments/hz1oia/im_not...,False,...,FemaleDatingStrategy,t5_xaiot,1,[removed],86200,"""I'm not judging you, I'm just looking at you!""",0,NaT,,
29734,Myplummms,FDS Disciple,2020-07-27 21:47:27,2020-07-27 21:47:38,i.redd.it,https://www.reddit.com/r/FemaleDatingStrategy/...,hz1nmd,True,/r/FemaleDatingStrategy/comments/hz1nmd/youre_...,False,...,FemaleDatingStrategy,t5_xaiot,1,,86200,You're all so lovely! I hope you ladies find s...,0,NaT,,
29735,Myplummms,FDS Disciple,2020-07-27 21:44:03,2020-07-27 21:44:13,i.redd.it,https://www.reddit.com/r/FemaleDatingStrategy/...,hz1lcw,True,/r/FemaleDatingStrategy/comments/hz1lcw/the_re...,False,...,FemaleDatingStrategy,t5_xaiot,1,,86199,The reason these are statements women need to ...,0,NaT,,


In [19]:
#Get month
fds_subm_before_1mo_df = fds_sub_b4_df[fds_sub_b4_df['created_utc'].dt.month == 10]
fds_subm_before_2mo_df = fds_sub_b4_df[fds_sub_b4_df['created_utc'].dt.month == 9]
fds_subm_before_3mo_df = fds_sub_b4_df[fds_sub_b4_df['created_utc'].dt.month == 8]
fds_subm_before_4mo_df = fds_sub_b4_df[fds_sub_b4_df['created_utc'].dt.month == 7]

In [20]:
#Save datasets
fds_subm_before_1mo_df.to_csv('./data/fds_subm_before_1mo.csv')
fds_subm_before_2mo_df.to_csv('./data/fds_subm_before_2mo.csv')
fds_subm_before_3mo_df.to_csv('./data/fds_subm_before_3mo.csv')
fds_subm_before_4mo_df.to_csv('./data/fds_subm_before_4mo.csv')

### Submissions after

In [5]:
fds_sub_after = pd.read_csv('./data/fds_submissions_after.csv')

In [8]:
fds_sub_after_df = fds_sub_after[['author','author_flair_text','created_utc','retrieved_on','domain','full_link','id','is_reddit_media_domain','permalink','is_video','locked','num_comments','subreddit','subreddit_id','score','selftext','subreddit_subscribers','title','total_awards_received','updated_utc','removed_by']]

In [9]:
#Change to human date
fds_sub_after_df['created_utc'] = pd.to_datetime(fds_sub_after_df['created_utc'], unit='s')
fds_sub_after_df['retrieved_on'] = pd.to_datetime(fds_sub_after_df['retrieved_on'], unit='s')
fds_sub_after_df['updated_utc'] = pd.to_datetime(fds_sub_after_df['updated_utc'], unit='s')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fds_sub_after_df['created_utc'] = pd.to_datetime(fds_sub_after_df['created_utc'], unit='s')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fds_sub_after_df['retrieved_on'] = pd.to_datetime(fds_sub_after_df['retrieved_on'], unit='s')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fds_sub_after_df['u

In [14]:
fds_sub_after_df[(fds_sub_after_df['created_utc'].dt.month == 10) & (fds_sub_after_df['created_utc'].dt.day == 28)].head()

Unnamed: 0,author,author_flair_text,created_utc,retrieved_on,domain,full_link,id,is_reddit_media_domain,permalink,is_video,...,num_comments,subreddit,subreddit_id,score,selftext,subreddit_subscribers,title,total_awards_received,updated_utc,removed_by
5544,alittleinterested,FDS Newbie,2019-10-28 23:54:23,2019-10-28 23:54:26,i.redd.it,https://www.reddit.com/r/FemaleDatingStrategy/...,dohq8i,True,/r/FemaleDatingStrategy/comments/dohq8i/when_m...,False,...,2,FemaleDatingStrategy,t5_xaiot,1,,29384,When moids on twitter meme this.. YOU’RE THE T...,0,2019-10-29 23:54:06,
5545,DxMePls,FDS Newbie,2019-10-28 23:30:24,2019-10-28 23:30:25,self.FemaleDatingStrategy,https://www.reddit.com/r/FemaleDatingStrategy/...,dohevr,False,/r/FemaleDatingStrategy/comments/dohevr/just_r...,False,...,15,FemaleDatingStrategy,t5_xaiot,52,"THANKS to FDS, I finally realized why I went f...",29353,Just realized some guys I thought we were my f...,0,2019-10-29 23:30:39,
5546,TheOGJammies,Ruthless Strategist,2019-10-28 23:28:57,2019-10-28 23:28:58,reddit.com,https://www.reddit.com/r/FemaleDatingStrategy/...,dohe7x,False,/r/FemaleDatingStrategy/comments/dohe7x/shout_...,False,...,6,FemaleDatingStrategy,t5_xaiot,15,,29351,"Shout out to r/FemaleDatingStrategy, one of th...",0,2019-10-29 23:28:48,
5547,A_Fox_In_The_Closet,,2019-10-28 22:40:19,2019-10-28 22:40:23,self.FemaleDatingStrategy,https://www.reddit.com/r/FemaleDatingStrategy/...,dogqx6,False,/r/FemaleDatingStrategy/comments/dogqx6/i_look...,False,...,0,FemaleDatingStrategy,t5_xaiot,1,[removed],29303,I look forward to being banned,0,2019-10-29 22:40:46,
5548,modernmedusaa,Ruthless Strategist,2019-10-28 22:11:27,2019-10-28 22:11:28,youtube.com,https://www.reddit.com/r/FemaleDatingStrategy/...,dogcn0,False,/r/FemaleDatingStrategy/comments/dogcn0/evil_w...,False,...,21,FemaleDatingStrategy,t5_xaiot,0,,29277,EVIL WEEK: SEDUCTION TECHNIQUES: How To Manipu...,0,2019-10-29 22:11:12,


In [15]:
#Get month
fds_subm_after_1mo_df = fds_sub_after_df[(fds_sub_after_df['created_utc'].dt.month == 11) &(fds_sub_after_df['created_utc'].dt.month == 10)]
fds_subm_after_2mo_df = fds_sub_after_df[fds_sub_after_df['created_utc'].dt.month == 12]
fds_subm_after_3mo_df = fds_sub_after_df[fds_sub_after_df['created_utc'].dt.month == 1]

In [16]:
#Save datasets
fds_subm_after_1mo_df.to_csv('./data/fds_subm_after_1mo.csv')
fds_subm_after_2mo_df.to_csv('./data/fds_subm_after_2mo.csv')
fds_subm_after_3mo_df.to_csv('./data/fds_subm_after_3mo.csv')

## Anomaly Analysis
For Comments: Deleted comments

For Submissions: Deleted submissions, non-text content

In [17]:
#Import BEFORE data. Dataset already in this notebook
fds_b4_1mo =  pd.read_csv('./data/fds_comments_before_1mo.csv')

#Number of deleted and remove
len(fds_b4_1mo[(fds_b4_1mo['body'] == '[deleted]') | (fds_b4_1mo['body'] == '[removed]')])/len(fds_b4_1mo)*100

8.734121215951463

In [19]:
#Import AFTER data
fds_aft_1mo =  pd.read_csv('./data/fds_comments_after_1mo.csv')

#Number of deleted and remove
len(fds_aft_1mo[(fds_aft_1mo['body'] == '[deleted]') | (fds_aft_1mo['body'] == '[removed]')])/len(fds_aft_1mo)*100

20.9742686806907

### Number of deleted commments - Other months

In [20]:
#Import BEFORE data - Jul
fds_jul =  pd.read_csv('./data/fds_comments_before_4mo.csv')

#Number of deleted and remove
len(fds_jul[(fds_jul['body'] == '[deleted]') | (fds_jul['body'] == '[removed]')])/len(fds_jul)*100

0.0

In [21]:
#Import BEFORE data - Aug
fds_aug =  pd.read_csv('./data/fds_comments_before_3mo.csv')

#Number of deleted and remove
len(fds_aug[(fds_aug['body'] == '[deleted]') | (fds_aug['body'] == '[removed]')])/len(fds_aug)*100

0.42087542087542085

In [22]:
#Import BEFORE data - Sep
fds_sep =  pd.read_csv('./data/fds_comments_before_2mo.csv')

#Number of deleted and remove
len(fds_sep[(fds_sep['body'] == '[deleted]') | (fds_sep['body'] == '[removed]')])/len(fds_sep)*100

3.892270798745619

In [23]:
#Import AFTER data - Dec
fds_dec =  pd.read_csv('./data/fds_comments_after_2mo.csv')

#Number of deleted and remove
len(fds_dec[(fds_dec['body'] == '[deleted]') | (fds_dec['body'] == '[removed]')])/len(fds_dec)*100

15.543538546866333

In [24]:
#Import AFTER data - Jan
fds_jan =  pd.read_csv('./data/fds_comments_after_3mo.csv')

#Number of deleted and remove
len(fds_jan[(fds_jan['body'] == '[deleted]') | (fds_jan['body'] == '[removed]')])/len(fds_jan)*100

12.443613605884504