## API Data

In [1]:
import requests
import pandas as pd
import time

#### Getting the first set of data

In [2]:
url = 'https://api.pushshift.io/reddit/search/submission'

params= {
    'subreddit': 'AmongUsCompetitive',
    'size': 100
}

res = requests.get(url, params)

res.status_code

200

#### Changing it to json and checking the key 'data

In [3]:
res.json()

type(res.json())

data = res.json()

print(data.keys())

print(data['data'][1])

dict_keys(['data'])
{'all_awardings': [], 'allow_live_comments': False, 'author': 'ericinterno', 'author_flair_css_class': None, 'author_flair_richtext': [], 'author_flair_text': None, 'author_flair_type': 'text', 'author_fullname': 't2_ag5duns4', 'author_is_blocked': False, 'author_patreon_flair': False, 'author_premium': False, 'awarders': [], 'can_mod_post': False, 'contest_mode': False, 'created_utc': 1632857106, 'domain': 'youtu.be', 'full_link': 'https://www.reddit.com/r/AmongUsCompetitive/comments/pxdrp8/cops_vs_robbers_among_us_no_commentary_part_2/', 'gildings': {}, 'id': 'pxdrp8', 'is_created_from_ads_ui': False, 'is_crosspostable': False, 'is_meta': False, 'is_original_content': False, 'is_reddit_media_domain': False, 'is_robot_indexable': False, 'is_self': False, 'is_video': False, 'link_flair_background_color': '#d10099', 'link_flair_richtext': [{'e': 'text', 't': 'Photo | Video'}], 'link_flair_template_id': 'ddafadf8-ea77-11ea-99cb-0e37641c3e89', 'link_flair_text': 'Photo

In [4]:
#Save it to a variable named posts
posts = data['data']

#Making sure it is the size of posts that was given in the params variable
len(posts)

100

#### Getting the created time to get more data from the subreddit

In [5]:
#Getting the created utc time to get more posts 
posts[len(posts)-1]['created_utc']

1628475347

#### Saving it to a DataFrame with only two columns

In [6]:
df = pd.DataFrame(posts)

In [7]:
ndf = pd.DataFrame(df[['subreddit', 'title']])

In [8]:
ndf.head()

Unnamed: 0,subreddit,title
0,AmongUsCompetitive,COPS vs ROBBERS *minigame* (Among us) Part 7
1,AmongUsCompetitive,Cops VS Robbers (Among Us) No Commentary Part 2
2,AmongUsCompetitive,CAUGHT THE IMPOSTOR IN MR BEAST TIKTOK VIDEO (...
3,AmongUsCompetitive,Among Us : The crew has no face to go out with...
4,AmongUsCompetitive,Among us Zombie Highlights Part 9


## Getting More Data from API
---

#### Function getting the posts

In [9]:
def get_api_data(before_time):
    url = 'https://api.pushshift.io/reddit/search/submission'
    params= {
        'subreddit': 'AmongUsCompetitive',
        'size': 100,
        'before': before_time
    }
    res_api = requests.get(url, params)
    
    data = res_api.json()
    
    posts = data['data']
    
    return (posts)

#### Function getting the before time to get more data

In [10]:
def get_before_data(posts):
    return posts[len(posts)-1]['created_utc']

#### Function to create the DataFrame

In [11]:
def get_df(to_df):
    to_df = pd.DataFrame(to_df)
    get_df_cols = to_df[['subreddit', 'title']]
    return get_df_cols

#### Looping five times to get about 1100 rows of data 

In [12]:
before = 1628470703 #This is from the first one we got from the API; it will be changed everytime the for loop executes 
all_data_df = ndf #creating a Data Frame with all the data that will be needed for this project
for i in range(10):
    posts = get_api_data(before)
    before = get_before_data(posts)
    all_data_df = all_data_df.append(get_df(posts), ignore_index=True)
    time.sleep(5)
    

In [13]:
all_data_df.tail()

Unnamed: 0,subreddit,title
1095,AmongUsCompetitive,When the
1096,AmongUsCompetitive,when
1097,AmongUsCompetitive,This among us flipbook is awesome
1098,AmongUsCompetitive,How would a crewmate get out of a false self-r...
1099,AmongUsCompetitive,Pro league strats


In [14]:
#print(all_data_df.loc[[627,628,629,630,631,632,633,634,635]])
# Deleting some rows because it contained a vulgar word
all_data_df.drop([628,629,630,631,632,633,634], inplace=True)

In [15]:
all_data_df.shape

(1093, 2)

In [16]:
all_data_df.reset_index(drop=True, inplace=True)

In [17]:
all_data_df.shape

(1093, 2)

In [18]:
all_data_df.loc[[628]]

Unnamed: 0,subreddit,title
628,AmongUsCompetitive,speedrun also subsribe https://www.youtube.com...


In [19]:
all_data_df.loc[[629]]

Unnamed: 0,subreddit,title
629,AmongUsCompetitive,Be like this.catch the impostors.theyll always...


In [20]:
all_data_df.loc[[630]]

Unnamed: 0,subreddit,title
630,AmongUsCompetitive,Imposter partner gets caught... no problem


In [21]:
all_data_df.loc[[631]]

Unnamed: 0,subreddit,title
631,AmongUsCompetitive,Craziest Desync Kill - Steve Kills DumbDog? Wh...


In [22]:
all_data_df.loc[[632]]

Unnamed: 0,subreddit,title
632,AmongUsCompetitive,Among Us *BUT* I'm Lil Baby


In [23]:
all_data_df.loc[[633]]

Unnamed: 0,subreddit,title
633,AmongUsCompetitive,7 Among Us Tips And Tricks To Make You A 1500 ...


In [24]:
all_data_df.loc[[634]]

Unnamed: 0,subreddit,title
634,AmongUsCompetitive,Lady Dimitrescu As A Impostor (Among Us Meme)


## Saving it as a CSV File
---

In [25]:
all_data_df.to_csv('../datasets/AmongUs_Competitive.csv', index=False)

### Checking the CSV

In [26]:
read = pd.read_csv('../datasets/AmongUs_Competitive.csv')

In [27]:
read.head()

Unnamed: 0,subreddit,title
0,AmongUsCompetitive,COPS vs ROBBERS *minigame* (Among us) Part 7
1,AmongUsCompetitive,Cops VS Robbers (Among Us) No Commentary Part 2
2,AmongUsCompetitive,CAUGHT THE IMPOSTOR IN MR BEAST TIKTOK VIDEO (...
3,AmongUsCompetitive,Among Us : The crew has no face to go out with...
4,AmongUsCompetitive,Among us Zombie Highlights Part 9


In [28]:
read.tail()

Unnamed: 0,subreddit,title
1088,AmongUsCompetitive,When the
1089,AmongUsCompetitive,when
1090,AmongUsCompetitive,This among us flipbook is awesome
1091,AmongUsCompetitive,How would a crewmate get out of a false self-r...
1092,AmongUsCompetitive,Pro league strats
