In [6]:
! pip install selenium



In [3]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import os
import praw
import pandas as pd
from dotenv import load_dotenv

import time

In [49]:
# Set message title
message_subject = 'Request access for academic research'
# Get the message
with open("private_subreddits_contact_letter.txt","r") as file:
    message_text=file.read()
message_text

"Hello, \n\nMy name is Andreea, and I am a data science master's student at Birkbeck College, University of London. My thesis research focuses on prevalent discussion themes within autism Reddit communities. In particular, I'm planning to track the frequency of specific keywords linked to Applied Behavioural Analysis and assess the overall sentiment of discussions revolving around this topic. My aim is to gain a comprehensive understanding of how the autism community perceives Applied Behavioural Analysis. I am reaching out to respectfully request permission to join and access the discussions in your private subreddit. \n\nI understand how crucial privacy and security are in your community, and thus I am committed to maintaining the anonymity and confidentiality of all members. The information gathered will be used strictly for academic purposes. \n\nIn recognition of the trust and sensitivity associated with your community's discussions, I am more than willing to adhere to any guideli

In [4]:
load_dotenv()
# login credentials
REDDIT_USERNAME = os.getenv('REDDIT_USERNAME')
REDDIT_PASSWORD = os.getenv('REDDIT_PASSWORD')


In [57]:
# Get all private subreddits
asd_private_subreddits = pd.read_csv('asd_new_null_rows_list.csv')
asd_private_subreddits.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36 entries, 0 to 35
Data columns (total 18 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Unnamed: 0             36 non-null     int64  
 1   restrict_posting       0 non-null      float64
 2   display_name           36 non-null     object 
 3   title                  36 non-null     object 
 4   display_name_prefixed  36 non-null     object 
 5   subscribers            0 non-null      float64
 6   name                   36 non-null     object 
 7   public_description     26 non-null     object 
 8   community_reviewed     0 non-null      float64
 9   created                36 non-null     float64
 10  subreddit_type         36 non-null     object 
 11  id                     36 non-null     object 
 12  over18                 0 non-null      float64
 13  header_title           0 non-null      float64
 14  description            0 non-null      float64
 15  url     

In [58]:
asd_private_subreddits[['display_name', 'url']]

Unnamed: 0,display_name,url
0,neurodivergents_ZA,/r/neurodivergents_ZA/
1,LateStageAutism,/r/LateStageAutism/
2,asd,/r/asd/
3,AutisticWomen,/r/AutisticWomen/
4,HighFunctioning,/r/HighFunctioning/
5,AutismArtistic,/r/AutismArtistic/
6,autismspectrum,/r/autismspectrum/
7,AspiePartners,/r/AspiePartners/
8,autismus_treff,/r/autismus_treff/
9,bottomlulz,/r/bottomlulz/


In [105]:
# Start a browser session - browser used = Chrome
driver = webdriver.Chrome() 

In [106]:
# Open Reddit login page
driver.get("https://www.reddit.com/login")

In [107]:
# Locate the username and password fields and enter credentials
username_field = driver.find_element(By.XPATH,"//*[@id='loginUsername']")
password_field = driver.find_element(By.XPATH,"//*[@id='loginPassword']")

username_field.send_keys(REDDIT_USERNAME)
password_field.send_keys(REDDIT_PASSWORD)

# Submit the login form
password_field.send_keys(Keys.RETURN)
# Wait for a few seconds to allow the login process to complete
time.sleep(5)

In [108]:
# Reject cookies
reject_cookies_link = driver.find_element(By.XPATH, "//button[contains(text(), 'Reject non-essential')]")
reject_cookies_link.click()
time.sleep(5)

In [109]:
# catch errors and subs that didn't get the message
errored_subs_urls = dict()
# Open private messaging page and fill in the message for the mods
for sub in asd_private_subreddits['url']:
    try:
        # Navigating to a specific subreddit URL
        subreddit_url = f"https://www.reddit.com/{sub}"
        driver.execute_script("window.open('" + subreddit_url + "','_blank');")
        time.sleep(10)
        
        # Switch to the newly opened tab
        driver.switch_to.window(driver.window_handles[-1])
        # Wait for a few seconds to let the new tab load
        time.sleep(5)

        # Find and click on the "Message Mods" link using the specified XPath
        message_mods_link = driver.find_element(By.XPATH, "//a[contains(text(), 'message the moderators')]")
        message_mods_link.click()
        # Wait for a few seconds to let the new tab load
        time.sleep(5)

        
        # Add the subject to the subject input field
        # Find the iframe element by its class attribute
        iframe = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "saPujbGMyXRwqISHcmJH9"))
        )
        # Switch to the iframe context
        driver.switch_to.frame(iframe)

        # Find the <input> element within the iframe by its name attribute
        subject_input = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.NAME, "subject"))
        )
        # Clear any existing text and type the new text
        subject_input.clear()
        subject_input.send_keys(message_subject)
        # Wait for a few seconds to allow the text to be typed
        time.sleep(3)


        # Add the message to the message field
        # Find the <textarea> element within the iframe by its name attribute
        message_textarea = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, "/html/body/div[3]/form[2]/div[7]/div/div/div[2]/div/div[1]/textarea"))
        )
        # Clear any existing text and type the new text
        message_textarea.clear()
        message_textarea.send_keys(message_text)

        # Switch back to the main content
        driver.switch_to.default_content()

        # Wait for a few seconds to allow the text to be typed
        time.sleep(3)

    except Exception as e:
        print(sub, e)
        errored_subs_urls[sub] = e


In [110]:
# all subs private messaging pages were opened
errored_subs_urls

{}

In [122]:
not_able_to_message_error500 = [
    '/r/LateStageAutism/', 
    '/r/HighFunctioning/', 
    '/r/autismspectrum/',
    '/r/Autism_Awareness/',
    '/r/Autism_Irl/',
    '/r/AutisticPeopleSpeak/',
    '/r/SelfDxAutism/',
    '/r/Asdwomen/',
]
len(not_able_to_message_error500)

8

In [123]:

asd_private_subreddits.loc[asd_private_subreddits['url'].isin(not_able_to_message_error500)]

Unnamed: 0.1,Unnamed: 0,restrict_posting,display_name,title,display_name_prefixed,subscribers,name,public_description,community_reviewed,created,subreddit_type,id,over18,header_title,description,url,created_utc,_path
1,47,,LateStageAutism,LateStageAutism: Mocking ironic forwards from ...,r/LateStageAutism,,t5_3hsn0,Created for the sole intention of mocking Late...,,1480600000.0,private,3hsn0,,,,/r/LateStageAutism/,1480600000.0,r/LateStageAutism/
4,68,,HighFunctioning,High Functioning,r/HighFunctioning,,t5_2tswf,,,1332876000.0,private,2tswf,,,,/r/HighFunctioning/,1332876000.0,r/HighFunctioning/
6,91,,autismspectrum,autismspectrum,r/autismspectrum,,t5_51px53,,,1631762000.0,private,51px53,,,,/r/autismspectrum/,1631762000.0,r/autismspectrum/
13,171,,Autism_Awareness,Autism Awareness by The Autism Dad,r/Autism_Awareness,,t5_2yu8a,This is the official subreddit for The Autism ...,,1382220000.0,private,2yu8a,,,,/r/Autism_Awareness/,1382220000.0,r/Autism_Awareness/
18,235,,Autism_Irl,Placeholder,r/Autism_Irl,,t5_3m46t,,,1498055000.0,private,3m46t,,,,/r/Autism_Irl/,1498055000.0,r/Autism_Irl/
22,299,,AutisticPeopleSpeak,AutisticPeopleSpeak,r/AutisticPeopleSpeak,,t5_40yqua,This is a very BASED subreddit about Autism Sp...,,1614375000.0,private,40yqua,,,,/r/AutisticPeopleSpeak/,1614375000.0,r/AutisticPeopleSpeak/
33,538,,SelfDxAutism,SelfDxAutism,r/SelfDxAutism,,t5_5u9zcg,"This is a community for self diagnosed, questi...",,1644676000.0,private,5u9zcg,,,,/r/SelfDxAutism/,1644676000.0,r/SelfDxAutism/
35,566,,Asdwomen,ASD Women,r/Asdwomen,,t5_70nrlq,This community is a safe space for people with...,,1662829000.0,private,70nrlq,,,,/r/Asdwomen/,1662829000.0,r/Asdwomen/


Date messagess were sent: 20/08/2023

The urls were accessed automatically using the script above. The subject and the message was filled in automatically using the script above. After all 36 pages were loded ( each in a separate tab ), the CAPTCHA was resolved manually and the message button was also clicked manually. 

After the send button was clicked, if the message was sent the following message appear on the page: 'your message has been delivered'. However 8 out of the 36 pages failed to send the message, due to apperently an internal server error - the message displayed on screen after the send button was clicked: 'an error occurred status: 500'. 

In Reddit a 500 status is a catch all error so it is not informative to why this is happening. Upon further research, it appears that this error - when messaging private subreddits - might be due to the subreddit being inactive or unmoderated (https://www.reddit.com/r/help/comments/15dbva2/getting_an_error_occurred_status_500_when_trying/). As there is no realistic way of geathering the data from these subreddits, they will be dropped from the analysis.

Deadline date for moderator replies: 27/08/2023

List of subreddits for which access was approved:
TO BE DETERMINED 



### Response from private moderators: 
Date : 03/09/2023

In [7]:
# API credentials
CLIENT_SECRET = os.environ.get('CLIENT_SECRET')
CLIENT_ID = os.environ.get('CLIENT_ID')

In [8]:
# get access to the Reddit API
reddit = praw.Reddit(
    client_id=CLIENT_ID,
    client_secret=CLIENT_SECRET,
    username=REDDIT_USERNAME,
    password=REDDIT_PASSWORD,
    user_agent="MyMsciAPI/0.0.1",   # this can be named anything
)

In [66]:
# access the sent messages
sent_messages = reddit.inbox.sent(limit=None)
# keep track of the messages sent
count_sent = 0
for m in sent_messages:
    if m.subject == 'Request access for academic research':
        print((m.subreddit, m.subject, m.body))
        count_sent +=1

(Subreddit(display_name='TrulyAutism'), 'Request access for academic research', "Hello, \n\nMy name is Andreea, and I am a data science master's student at Birkbeck College, University of London. My thesis research focuses on prevalent discussion themes within autism Reddit communities. In particular, I'm planning to track the frequency of specific keywords linked to Applied Behavioural Analysis and assess the overall sentiment of discussions revolving around this topic. My aim is to gain a comprehensive understanding of how the autism community perceives Applied Behavioural Analysis. I am reaching out to respectfully request permission to join and access the discussions in your private subreddit. \n\nI understand how crucial privacy and security are in your community, and thus I am committed to maintaining the anonymity and confidentiality of all members. The information gathered will be used strictly for academic purposes. \n\nIn recognition of the trust and sensitivity associated wi

In [67]:
# 32 messages sent 
# 36 total private subreddits
# 4 of the subreddits did not allow contact ( error status 500 ) - see screenshot 'private subreddit message error.png' - this is usually a erver error but on reddit is also used as a catch -all when there is lag
# potential explanation for the error : subreddit is unmoderated (= there is no mod to contact) - see https://www.reddit.com/r/help/comments/15dbva2/getting_an_error_occurred_status_500_when_trying/
count_sent

32

In [60]:
# access the messages (inbox) replies 
private_subs_replies = reddit.inbox.all(limit=None)

for m in private_subs_replies:
    # print(vars(m))
    print((m.subreddit, m.subject, m.body) if m.subject == 're: Request access for academic research' else '')


(Subreddit(display_name='asd'), 're: Request access for academic research', "I'm afraid we are private as part of a protest and do not allow exceptions for research purposes.")
(Subreddit(display_name='AspieGirlsDND'), 're: Request access for academic research', "Dear Andreea,\n\nif there was anything worth looking for here, I would gladly offer you access, but neither myself nor the users of the subreddit have posted anything here for ages and what we have posted is focused on our Dungeons and Dragons-roleplaying group and world only. I don't think there's anything interesting for you here, primarily because about 80% or so of the posts are information about our roleplaying world and the rest are memes and scheduling posts from several years ago.\n\nI wish you all the best with your research!")
(Subreddit(display_name='bottomlulz'), 're: Request access for academic research', 'You have been [temporarily muted](https://www.reddithelp.com/en/categories/reddit-101/moderators/modmail-mut

As can be seen above, the message was succesfully sent to 32 (out of 36) moderators of private subreddits. After 10 days, only 4 replied:

- r/asd has been set to private as a protest action and they refused access for research purposes.

- the other 3 subreddits are inactive and closed thus there are no discussions/posts to be accesed, and thus request was refused as well.

The private subreddits will, thus need to be dropped from the analysis as it is not possible to access the data. 