    The tickers for the 49 cryptocurrencies were obtained through random sampling are shown below in the blocks code output. It's important to note that multiple cryptocurrencies may share the same ticker. However, CoinMarketCap's API assigns a unique ID to each currency, ensuring clarity in our analysis. Furthermore the 30 subreddits we scraped are below in both the code block as well as the output.

In [1]:
import pandas as pd
import requests
import praw
import datetime
import time

# read the list of microcaps from the excel file previously created when scraping the list of microcaps from coinmarketcap

df_microcaps = pd.read_excel('microcaps.xlsx')
microcaps = df_microcaps['symbol'].to_list()
print(microcaps)

# scrape the subreddits for the 49 microcaps we previously identified through random sampling using coinmarketcap

reddit = praw.Reddit(
    client_id="your client id",
    client_secret="your client secret",
    user_agent="Microcap Cryptos",
)

# sample size of 30 subreddits to scrape all specified subreddits deal with microcaps

subreddits = ['SatoshiBets', 'Shitcoins', 'MoonBets', 'PancakeswapICO', 'AllCryptoBets', 'CryptoNews', 'airdrops', 'ethtrader', 'CryptoMoonShots', 'shitcoinmoonshots', 'Crypto_General', 'CryptoMars', 'crypto_currency', 'SatoshiStreetBets', 'BountyICO', 'CryptoCurrencyClassic', 'bscbombs', 'CryptoIDOS', 'SHIBADULTS', 'dogecoin', 'BSCMoonShots', 'BitcoinDiscussion', 'Shibainucoin', 'CryptoMoon', 'Shibu_Inu', 'ico', 'ICOAnalysis', 'CryptocurrencyICO', 'CryptoMarsShots', 'cryptostreetbets']  # list of subreddits you're interested in
keywords = microcaps  # list of keywords you're interested in

# search all posts in all 30 subreddits for the 49 microcaps, collect the title, content, upvotes, subreddit, date, and author of each post, and save the results to a csv file.

all_posts = []

for subreddit_name in subreddits:
    posts = []
    subreddit = reddit.subreddit(subreddit_name)
    for keyword in keywords:
        for submission in subreddit.search(keyword, time_filter='all'):
            posts.append({
                'Title': submission.title,
                'Content': submission.selftext,
                'Ups': submission.ups,
                'Subreddit': str(subreddit),
                'Date': datetime.datetime.fromtimestamp(submission.created),
                'Author': str(submission.author),
            })
    df = pd.DataFrame(posts)
    df.to_csv(f'{subreddit_name}_posts.csv', index=False)
    all_posts.extend(posts)
    print(f"Finished scraping subreddit: {subreddit_name}")
    time.sleep(60)  # delay for 60 seconds

    # save the results to a csv file

df_all = pd.DataFrame(all_posts)
df_all.to_csv('all_posts.csv', index=False)


['XACT', 'IS', 'CHAD', 'DOGECEO', 'JADE', 'TITTER', 'ICE', 'CAH', 'RUNY', 'PRVC', 'TONI', 'BS', 'LAMBO', 'HBT', 'DOGECUBE', 'TSUBASAUT', 'LAI', '$L', 'ARCHI', 'FADER', 'HKTIGER', '$420CHAN', 'ZZZ', 'CGT', 'BLUE', 'PRNT', 'pogai', 'MAZI', 'REUNI', 'ZANGAI', 'SONIC', 'PER', '$TOAD', 'MUZZ', 'OT', 'GDX', 'pepecoin', 'CAWCEO', 'EVY', 'MILO', 'JIM', 'BLU', 'DATADOGE', 'NEMS', 'APED', 'SPC', 'MONKEYS', 'YOKEN', 'SWITCH']
Finished scraping subreddit: SatoshiBets
Finished scraping subreddit: Shitcoins
Finished scraping subreddit: MoonBets
Finished scraping subreddit: PancakeswapICO
Finished scraping subreddit: AllCryptoBets
Finished scraping subreddit: CryptoNews
Finished scraping subreddit: airdrops
Finished scraping subreddit: ethtrader
Finished scraping subreddit: CryptoMoonShots
Finished scraping subreddit: shitcoinmoonshots
Finished scraping subreddit: Crypto_General
Finished scraping subreddit: CryptoMars
Finished scraping subreddit: crypto_currency
Finished scraping subreddit: SatoshiSt

In [10]:
# read the csv file containing the scraped data to make sure it worked
print(df_all.head(10))

                                               Title  \
0  The future of gaming is in the vibrant Neon Li...   
1  $CUMINU is ready to compete with OnlyFans. Ond...   
2  $CUMINU 5m mcap is ready to compete with OnlyF...   
3  $CUMINU at $8.4M is about to launch a web3 18+...   
4  📣 Exciting News: Seneca Testnet and Devnet Pub...   
5  Spodmoon is the First "REAL" Memecoin of the I...   
6                               $SHENT IS SO EARLY🐲🔥   
7  $POOH is best recommended! | Launched stealth ...   
8                        $NOOD is the next #100xgem!   
9                        $NOOD is the next #100xgem!   

                                             Content  Ups    Subreddit  \
0  https://preview.redd.it/tudc6doie88b1.png?widt...    2  SatoshiBets   
1  The first milestone is having 500 creators and...   15  SatoshiBets   
2  The first milestone is having 500 creators and...   10  SatoshiBets   
3  $CUMINU has new and verified creators either f...   27  SatoshiBets   
4  [Seneca Te

In [9]:
# check the number of posts scraped
df_all.count()

Title        24697
Content      24697
Ups          24697
Subreddit    24697
Date         24697
Author       24697
dtype: int64