# Data Sourcing
### Via PRAW (The Python Reddit API Wrapper)

#### Scrape data and list

In [1]:
import praw
import pandas as pd

In [2]:
# instantiate a reddit instance 
reddit = praw.Reddit()

In [3]:
subreddit_sto = 'Stoicism'
subreddit_buh = 'Buddhism'

In [4]:
# params – a dictionary containing additional query string parameters to send with the request.
# posts = reddit.subreddit(my_subreddit).new(params = {'after': t3_196akv6})
# feed options: new, controversial, gilded, hot, rising, top

# create listing generators
sto_1_n = reddit.subreddit(subreddit_sto).new(limit=None)
sto_1_t = reddit.subreddit(subreddit_sto).top(limit=None)
sto_1_h = reddit.subreddit(subreddit_sto).hot(limit=None)
sto_1_r = reddit.subreddit(subreddit_sto).rising(limit=None)
sto_1_c = reddit.subreddit(subreddit_sto).controversial(limit=None)


In [5]:
# create listing generators (can only iterate through them once)
buh_1_n = reddit.subreddit(subreddit_buh).new(limit=None)
buh_1_c = reddit.subreddit(subreddit_buh).controversial(limit=None)
buh_1_h = reddit.subreddit(subreddit_buh).hot(limit=None)
buh_1_r = reddit.subreddit(subreddit_buh).rising(limit=None)
buh_1_t = reddit.subreddit(subreddit_buh).top(limit=None)

In [6]:
# define function to add submissions to list
def listgen_to_df(listgen):
    posts_list = []
    for post in listgen: 
        posts_list.append (
            { 
                'title' : post.title,
                'selftext' : post.selftext,
                'subreddit' : post.subreddit,
                'created_utc' : post.created_utc,
                'name' : post.name, # unique identifier used by reddit behind the scenes
                'upvote ratio' : post.upvote_ratio,
                'num_upvotes': post.score
            })
    df = pd.DataFrame(posts_list)
    return (df)


### Stoicism 

In [7]:
sto_1_t_df = listgen_to_df(sto_1_t)
sto_1_t_df.shape

(993, 7)

In [8]:
sto_1_n_df = listgen_to_df(sto_1_n)
sto_1_n_df.shape

(880, 7)

In [9]:
sto_1_h_df = listgen_to_df(sto_1_h)
sto_1_h_df.shape

(988, 7)

In [10]:
sto_1_r_df = listgen_to_df(sto_1_r)
sto_1_r_df.shape

(25, 7)

In [11]:
sto_1_c_df = listgen_to_df(sto_1_c)
sto_1_c_df.shape

(1000, 7)

In [12]:
# merge scrapes into a single df
stoicism_master_df = pd.concat([sto_1_n_df, sto_1_h_df, sto_1_r_df, sto_1_t_df, sto_1_c_df])
stoicism_master_df.shape

(3886, 7)

In [13]:
# export scrape to csv file
pd.DataFrame.to_csv(stoicism_master_df, 'sto_scrape_11.csv', index=False)

### Buddhism

In [14]:
buh_1_n_df = listgen_to_df(buh_1_n)
buh_1_n_df.shape

(989, 7)

In [15]:
buh_1_c_df = listgen_to_df(buh_1_c)
buh_1_c_df.shape

(998, 7)

In [16]:
buh_1_h_df = listgen_to_df(buh_1_h)
buh_1_h_df.shape

(997, 7)

In [17]:
buh_1_r_df = listgen_to_df(buh_1_r)
buh_1_r_df.shape

(25, 7)

In [18]:
buh_1_t_df = listgen_to_df(buh_1_t)
buh_1_t_df.shape

(998, 7)

In [19]:
# merge scrapes into a single df
buddhism_master_df = pd.concat([buh_1_t_df, buh_1_r_df, buh_1_h_df, buh_1_c_df,  buh_1_n_df])
buddhism_master_df.shape

(4007, 7)

In [20]:
# export scrape to csv file
pd.DataFrame.to_csv(buddhism_master_df, 'buddhism_scrape_11.csv', index=False)

## Combine Datasets

Data was scraped over the course of 9 days from Jan 18th Jan 26th 2024. Below all scraped datasets are combined and exported, ready for cleaning and EDA.

In [21]:
# Combine buddhism scrapes
all_buddhism_scrapes = []
for i in range(1, 12): 
    all_buddhism_scrapes.append(f"buddhism_scrape_{i}.csv")

b_dfs = [pd.read_csv(file) for file in all_buddhism_scrapes]
combined_b_dfs = pd.concat(b_dfs, ignore_index=True)
pd.DataFrame.to_csv(combined_b_dfs, './data/combined_buddhism_scrapes.csv', index=False)
combined_b_dfs

Unnamed: 0,title,selftext,subreddit,created_utc,name,upvote ratio,num_upvotes
0,Buddha’s Four Noble Truths for a four year old,,Buddhism,1.614250e+09,t3_ls3y66,0.98,3879
1,Thic Nhat Hanh has passed away,,Buddhism,1.642795e+09,t3_s9j9q9,0.98,3731
2,Drew this last night. Buddhism has enriched my...,,Buddhism,1.486483e+09,t3_5sm4r9,0.91,3667
3,Made this Buddha painting,,Buddhism,1.603192e+09,t3_jen9kf,0.98,3493
4,Found this video that compares mindfulness to ...,,Buddhism,1.631042e+09,t3_pjto21,0.98,3336
...,...,...,...,...,...,...,...
29557,"Book review - Rebirth: A Guide to Mind, Karma,...",*Book author: Roger R. Jackson* \n*Publicatio...,Buddhism,1.704115e+09,t3_18vwomu,0.89,15
29558,How do I let go of the anger? Found out that t...,Sorry for the long post ahead.\n\nI was dating...,Buddhism,1.704108e+09,t3_18vupif,0.83,21
29559,¤¤¤ Weekly /r/Buddhism General Discussion ¤¤¤ ...,"This thread is for general discussion, such as...",Buddhism,1.704103e+09,t3_18vtpno,0.88,6
29560,"How to apologize without using the word ""I""","Attempting not to use ""I"" or ""me"" in my speech...",Buddhism,1.704097e+09,t3_18vsc51,0.39,0


In [22]:
# Combine buddhism scrapes
all_sto_scrapes = []
for i in range(1, 12): 
    all_sto_scrapes.append(f"sto_scrape_{i}.csv")

s_dfs = [pd.read_csv(file) for file in all_sto_scrapes]
combined_s_dfs = pd.concat(s_dfs, ignore_index=True)
pd.DataFrame.to_csv(combined_s_dfs, './data/combined_stoicism_scrapes.csv', index=False)
combined_s_dfs

Unnamed: 0,title,selftext,subreddit,created_utc,name,upvote ratio,num_upvotes
0,Looking for Seneca's quote on why even bed fle...,I think it was Seneca who wrote something alon...,Stoicism,1.705696e+09,t3_19aswwj,0.67,1
1,READ BEFORE POSTING: r/Stoicism beginner's gui...,"Welcome to the r/Stoicism subreddit, a forum f...",Stoicism,1.705694e+09,t3_19as7c7,0.76,2
2,The New Agora: Daily WWYD and light discussion...,"Welcome to the New Agora, a place for you and ...",Stoicism,1.705694e+09,t3_19as6qt,0.76,2
3,My biggest life mistake was wanting to live an...,"2023 summons this the best, I didn’t want to e...",Stoicism,1.705691e+09,t3_19aqv6w,0.94,27
4,What’s your favorite way to practice gratitude...,You can mention some relevant quotes as well.,Stoicism,1.705691e+09,t3_19aqp1z,1.00,3
...,...,...,...,...,...,...,...
28990,Greetings from the Caribbeans,"My name is Lætitia, I just landed here, nice t...",Stoicism,1.694926e+09,t3_16krukb,0.53,1
28991,Stoic Approach to Dealing with Pedestrians Who...,I'm sure we've all been in this situation befo...,Stoicism,1.694474e+09,t3_16gavw2,0.44,0
28992,Does God have a sense of humour?,"Does God, as referred to by Marcus Aurelius, ...",Stoicism,1.693052e+09,t3_161ts4i,0.50,0
28993,Ryan Holiday Reading From Anything…,Sorry this is old news but can we talk about R...,Stoicism,1.689195e+09,t3_14y0bgp,0.43,0
