# Analysis of Reddit Data from Ethereum, Bitcoin, and Ethereum Classic Subreddits

#### The below analysis was in examination of the public-facing information contained in the Ethereum, Bitcoin, and Ethereum Classic subreddits. However, the code can be manipulated to analyze information in any subreddit.
#### It does need a lot of cleaning up which I will do once I've drafted my results and analysis, so stay tuned for changes and any suggestions are greatly appreciated. I will eventually turn this into an analysis library for the LIbreQDA Project. :)

## Load Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from wordcloud import WordCloud
from string import punctuation
from collections import Counter
import re
import os

In [None]:
%matplotlib inline

## Load the CSV file output from Scrapy into DataFrames

In [None]:
# Open CSV files from Scrapy output
# your schema from the Scrapy output should be: author; tagline; time_rel; comments; title; time_all(this is where dow, mon, day, time, year, tz, and dtg come from)
# your final schema should be: author; tagline; time_rel(relative time of post from date of viewing); comments(' comments' removed); title; dow(day of the week 3-letter); mon(month 3-letter); day(1-31); time(24 hour clock); year; tz(timezone); dtg(date/time group).

# To parse 'comments' to int
# threads_eth['comments'] = threads_eth['comments'].map(lambda x: x.rstrip(' comments'))
# parser for the 'time_all' will be posted when I have it functional

In [None]:
# import CSV with final schema into DataFrames

threads_eth = pd.read_csv('redditData/threads_eth_fix.csv')
threads_btc = pd.read_csv('redditData/threads_btc_fix.csv')
threads_etc = pd.read_csv('redditData/threads_etc_fix.csv')


## Analysis of Reddit Authors

In [None]:
# Graph of author posts for Ethereum

eth_author = threads_eth.author
eth_author_bar = eth_author.value_counts().head(25).plot(kind='bar')
eth_author_bar


In [None]:
# Graph of author posts for Bitcoin

btc_author = threads_btc.author
btc_author_bar = btc_author.value_counts().head(25).plot(kind='bar')
btc_author_bar


In [None]:
# Graph of author posts for Ethereum Classic

etc_author = threads_etc.author
etc_author_bar = etc_author.value_counts().head(25).plot(kind='bar')
etc_author_bar


### Comments per author in ETH, BTC, or ETC

In [None]:
# Comments from Ethereum with authors
eth_comments = threads_eth[['author', 'comments']].copy()

# Highest number of comments on unique post by author
eth_comments.sort_values(by='comments', ascending=False).head(30)


In [None]:
# Total number of comments received by author for all author's posts

eth_comment_tot = eth_comments.groupby('author').sum()
eth_comment_tot_sort = eth_comment_tot.sort_values(by='comments', ascending=False).head(30).plot(kind='bar')
eth_comment_tot_sort


In [None]:
# Comments from Bitcoin with authors
btc_comments = threads_btc[['author', 'comments']].copy()

# Highest number of comments on unique post by author
btc_comments.sort_values(by='comments', ascending=False).head(30)


In [None]:
# Total number of comments received by author for all author's posts

btc_comment_tot = btc_comments.groupby('author').sum()
btc_comment_tot_sort = btc_comment_tot.sort_values(by='comments', ascending=False).head(30).plot(kind='bar')
btc_comment_tot_sort


In [None]:
# Comments from Ethereum Classic with authors
etc_comments = threads_etc[['author', 'comments']].copy()

# Highest number of comments on unique post by author
etc_comments.sort_values(by='comments', ascending=False).head(30)


In [None]:
# Total number of comments received by author for all author's posts

etc_comment_tot = etc_comments.groupby('author').sum()
etc_comment_tot_sort = etc_comment_tot.sort_values(by='comments', ascending=False).head(30).plot(kind='bar')
etc_comment_tot_sort


### Count of authors that are found in ETH, BTC, and ETC forums

In [None]:
# Convert the above Series into DataFrames

df_eth = pd.Series.to_frame(eth_author.drop_duplicates(keep='first'))
df_btc = pd.Series.to_frame(btc_author.drop_duplicates(keep='first'))
df_etc = pd.Series.to_frame(etc_author.drop_duplicates(keep='first'))


In [None]:
# Count of unique authors in forums

unique_author = pd.DataFrame({'ETH': [len(df_eth)], 'BTC': [len(df_btc)], 'ETC': [len(df_etc)]}).plot(kind='bar')
unique_author
print('BTC:', len(df_btc), 'ETC:', len(df_etc), 'ETH:',len(df_eth))

In [None]:
# Create a dataframe with all authors

df_authors = pd.DataFrame({}, columns=('btc', 'eth', 'etc'))
df_authors.btc = df_btc.author
df_authors.eth = df_eth.author
df_authors.etc = df_etc.author
# df_authors
    

In [None]:
# Count of which posts in one forum have been created by unique authors from another forum
# Keep in mind that this is from the top 1000 posts; an author may have a top post in one but not another

btc_in_eth = Counter(df_authors.btc.dropna().isin(eth_author) == True)
btc_in_etc = Counter(df_authors.btc.dropna().isin(etc_author) == True)
eth_in_etc = Counter(df_authors.eth.dropna().isin(etc_author) == True)
eth_in_btc = Counter(df_authors.eth.dropna().isin(btc_author) == True)
etc_in_btc = Counter(df_authors.etc.dropna().isin(btc_author) == True)
etc_in_eth = Counter(df_authors.etc.dropna().isin(eth_author) == True)

multiple_author = pd.DataFrame({'BTC in ETH': [btc_in_eth[True]], 'BTC in ETC': [btc_in_etc[True]], 'ETH in BTC': [eth_in_btc[True]], 'ETH in ETC': [eth_in_etc[True]], 'ETC in BTC': [etc_in_btc[True]], 'ETC in ETH': [etc_in_eth[True]]})
ma_graph = multiple_author.plot(kind='bar')
ma_graph.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)


In [None]:
# Table for above graph

multiple_author

### Table of author names that are found between ETH, BTC, and ETC forums

In [None]:
# Author names from Bitcoin subreddit who posted in Ethereum subreddit

authors_btc_in_eth = df_authors[df_authors.btc.isin(df_authors.eth)]
authors_btc_in_eth = authors_btc_in_eth.btc
authors_btc_in_eth = authors_btc_in_eth.dropna()

# Author names from Ethereum subreddit who posted in Bitcoin subreddit

authors_eth_in_btc = df_authors[df_authors.eth.isin(df_authors.btc)]
authors_eth_in_btc = authors_eth_in_btc.eth
authors_eth_in_btc = authors_eth_in_btc.dropna()

# Author names from Bitcoin subreddit who posted in Ethereum Classic subreddit

authors_btc_in_etc = df_authors[df_authors.btc.isin(df_authors.etc)]
authors_btc_in_etc = authors_btc_in_etc.btc
authors_btc_in_etc = authors_btc_in_etc.dropna()

# Author names from Ethereum Classic subreddit who posted in Bitcoin subreddit

authors_etc_in_btc = df_authors[df_authors.etc.isin(df_authors.btc)]
authors_etc_in_btc = authors_etc_in_btc.etc
authors_etc_in_btc = authors_etc_in_btc.dropna()

# Author names from Ethereum subreddit who posted in Ethereum Classic subreddit

authors_eth_in_etc = df_authors[df_authors.eth.isin(df_authors.etc)]
authors_eth_in_etc = authors_eth_in_etc.eth
authors_eth_in_etc = authors_eth_in_etc.dropna()

# Author names from Ethereum Classic subreddit who posted in Ethereum subreddit

authors_etc_in_eth = df_authors[df_authors.etc.isin(df_authors.eth)]
authors_etc_in_eth = authors_etc_in_eth.etc
authors_etc_in_eth = authors_etc_in_eth.dropna()


In [None]:
# authors_btc_in_eth
# authors_eth_in_btc
# authors_btc_in_etc
# authors_etc_in_btc
# authors_eth_in_etc
# authors_etc_in_eth

In [None]:
# a simple count of author's number of posts from the perspectives of the above results

all_authors_names = list(authors_btc_in_etc) + list(authors_btc_in_eth) + list(authors_etc_in_btc) + list(authors_etc_in_eth) + list(authors_eth_in_btc) + list(authors_eth_in_etc)
all_authors_names_count = Counter(all_authors_names)
all_authors_names_count = pd.DataFrame.from_dict(all_authors_names_count, orient='index')
# all_authors_names_count


In [None]:
# Table of authors found in one subreddit who have published in another subreddit

all_authors_names_pivot = pd.DataFrame({}, columns=('names', 'btc_in_eth', 'eth_in_btc', 'btc_in_etc', 'etc_in_btc', 'eth_in_etc', 'etc_in_eth'))
all_authors_names_pivot.names = all_authors_names_count.index.unique()
all_authors_names_pivot = all_authors_names_pivot.fillna(value=0)
all_authors_names_pivot2 = all_authors_names_pivot.set_index(['names'])

# all_authors_names_pivot
# all_authors_names_pivot2

# Fill pivot2 with '1' for each instancy a unique author from one subreddit was found in another

for x in list(all_authors_names_pivot.names):
    if (x in list(authors_btc_in_etc)) == True:
        all_authors_names_pivot2.ix[x, ['btc_in_etc']] += 1

for x in list(all_authors_names_pivot.names):
    if (x in list(authors_btc_in_eth)) == True:
        all_authors_names_pivot2.ix[x, ['btc_in_eth']] += 1

for x in list(all_authors_names_pivot.names):
    if (x in list(authors_eth_in_btc)) == True:
        all_authors_names_pivot2.ix[x, ['eth_in_btc']] += 1

for x in list(all_authors_names_pivot.names):
    if (x in list(authors_eth_in_etc)) == True:
        all_authors_names_pivot2.ix[x, ['eth_in_etc']] += 1

for x in list(all_authors_names_pivot.names):
    if (x in list(authors_etc_in_btc)) == True:
        all_authors_names_pivot2.ix[x, ['etc_in_btc']] += 1

for x in list(all_authors_names_pivot.names):
    if (x in list(authors_etc_in_eth)) == True:
        all_authors_names_pivot2.ix[x, ['etc_in_eth']] += 1

# Style the '1' values green within DataFrame output

all_authors_names_pivot2.style.highlight_max(color='green')


## Time series analysis of forums

### Number of Posts within General Reddit Timeframes - Date of Information (DOI): 26 December 2016

In [None]:
# input for this funtion is DataFrame with column 'time_rel' from the Reddit scrape

def posttime_rel(threads):
    rel_time = threads.time_rel.value_counts()
    rel_time.plot(kind='bar')


In [None]:
posttime_rel(threads_eth)

In [None]:
posttime_rel(threads_btc)

In [None]:
posttime_rel(threads_etc)

### Posts per month over all records (as of DOI)

In [None]:
# input for this funtion is DataFrame with column 'mon' from the Reddit scrape

def time_month_all(threads):
    months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    month_dict = dict(threads.mon.value_counts())
    month_count = pd.DataFrame.from_dict(data=month_dict, orient='index')
    month_count = month_count.rename(columns={'': 'mon', 0: 'count'})
    month_count = month_count.reindex(months)
    month_count.plot(kind='bar')

In [None]:
time_month_all(threads_eth)

In [None]:
time_month_all(threads_btc)

In [None]:
time_month_all(threads_etc)

### Posts per year over all records (as of DOI)

In [None]:
# input for this funtion is DataFrame with column 'year' from the Reddit scrape

def time_year_all(threads):
    years = [2013, 2014, 2015, 2016]
    year_dict = dict(threads.year.value_counts())
    year_count = pd.DataFrame.from_dict(data=year_dict, orient='index')
    year_count = year_count.rename(columns={'': 'year', 0: 'count'})
    year_count = year_count.reindex(years)
    year_count.plot(kind='bar')

In [None]:
time_year_all(threads_eth)

In [None]:
time_year_all(threads_btc)

In [None]:
time_year_all(threads_etc)

### Post per day of the week over all records (as of DOI)

In [None]:
# input for this funtion is DataFrame with column 'dow' from the Reddit scrape

def time_dow_all(threads):
    dows = ['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat']
    dow_dict = dict(threads.dow.value_counts())
    dow_count = pd.DataFrame.from_dict(data=dow_dict, orient='index')
    dow_count = dow_count.rename(columns={'': 'dow', 0: 'count'})
    dow_count = dow_count.reindex(dows)
    dow_count.plot(kind='bar')

In [None]:
time_dow_all(threads_eth)

In [None]:
time_dow_all(threads_btc)

In [None]:
time_dow_all(threads_etc)

### Posts per day of the month over all records (as of DOI)

In [None]:
# input for this funtion is DataFrame with column 'day' from the Reddit scrape

def time_day_all(threads):
    days = list(range(1,32,1))
    day_dict = dict(threads.day.value_counts())
    day_count = pd.DataFrame.from_dict(data=day_dict, orient='index')
    day_count = day_count.rename(columns={'': 'day', 0: 'count'})
    day_count = day_count.reindex(days)
    day_count.plot(kind='bar')


In [None]:
time_day_all(threads_eth)

In [None]:
time_day_all(threads_btc)

In [None]:
time_day_all(threads_etc)

## Analysis of Title Content

### Wordclouds for all posts in ETH, BTC, or ETC subreddit

In [None]:
# Create text files for wordclouds

# regex for punctuation
exclude = re.compile('[%s]' % re.escape(punctuation))

# write titles to file with punctuation replaced by spaces
with open('redditData/threads_eth.txt', 'w') as eth_f:
    for line in threads_eth['title']:
        line_nopunct = exclude.sub(' ', line)
        eth_f.write(line_nopunct + ' ')

with open('redditData/threads_btc.txt', 'w') as btc_f:
    for line in threads_btc['title']:
        line_nopunct = exclude.sub(' ', line)
        btc_f.write(line_nopunct + ' ')

with open('redditData/threads_etc.txt', 'w') as etc_f:
    for line in threads_etc['title']:
        line_nopunct = exclude.sub(' ', line)
        etc_f.write(line_nopunct + ' ')

In [None]:
# wordcloud generator function for raw files

def wcg(file):
    text = open(file).read()
    text = text.lower()
    #reddit_mask = np.array(Image.open('sil.jpg'))
    wc = WordCloud(width=1440, height=900, background_color='white') # and mask=reddit_mask for your desired JPG
    wc.generate(text)
    image_file = '%s.jpg' % file
    wc.to_file(image_file)

In [None]:
# JPG wordcloud files for all words in subreddit

wcg('redditData/threads_eth.txt')
wcg('redditData/threads_btc.txt')
wcg('redditData/threads_etc.txt')

Click to view output
[ETH](redditData/threads_eth.txt.jpg)
[BTC](redditData/threads_btc.txt.jpg)
[ETC](redditData/threads_etc.txt.jpg)

### Word frequency tables for ETH, BTC, and ETC

In [None]:
# Stop word list

stopwords = ['-', '&', ',', ':', ';', '.', ',', 'a', 'about', 'above', 'across', 'after', 'afterwards', 'again', 'against', 'all', 'almost', 'alone', 'along', 'already', 'also', 'although', 'always', 'am', 'among', 'amongst', 'amoungst', 'an', 'and', 'another', 'any', 'anyhow', 'anyone', 'anything', 'anyway', 'anywhere', 'are', 'around', 'as', 'at', 'back', 'be', 'became', 'because', 'become', 'becomes', 'becoming', 'been', 'before', 'beforehand', 'behind', 'being', 'below', 'beside', 'besides', 'between', 'beyond', 'both', 'bottom', 'but', 'by', 'call', 'can', 'cannot', 'cant', 'co', 'con', 'could', 'couldnt', 'cry', 'de', 'describe', 'detail', 'did', 'do', 'done', 'down', 'due', 'during', 'each', 'eg', 'either', 'else', 'elsewhere', 'empty', 'enough', 'etc', 'even', 'ever', 'every', 'everyone', 'everything', 'everywhere', 'except', 'few', 'fill', 'find', 'for', 'former', 'formerly', 'found', 'from', 'front', 'full', 'further', 'get', 'give', 'go', 'had', 'has', 'hasnt', 'have', 'he', 'hence', 'her', 'here', 'hereafter', 'hereby', 'herein', 'hereupon', 'hers', 'herself', 'him', 'himself', 'his', 'how', 'however', 'i', 'ie', 'if', 'in', 'inc', 'indeed', 'interest', 'into', 'is', 'it', 'its', 'itself', 'keep', 'last', 'latter', 'latterly', 'least', 'less', 'ltd', 'made', 'many', 'may', 'me', 'meanwhile', 'might', 'mine', 'more', 'moreover', 'most', 'mostly', 'move', 'much', 'must', 'my', 'myself', 'name', 'namely', 'neither', 'never', 'nevertheless', 'next', 'no', 'nobody', 'none', 'noone', 'nor', 'not', 'nothing', 'now', 'nowhere', 'of', 'off', 'often', 'on','once', 'only', 'onto', 'or', 'other', 'others', 'otherwise', 'our', 'ours', 'ourselves', 'out', 'over', 'own', 'part', 'per', 'perhaps', 'please', 'put', 'rather', 're', 's', 'same', 'see', 'seem', 'seemed', 'seeming', 'seems', 'serious', 'several', 'she', 'should', 'show', 'side', 'since', 'sincere', 'so', 'some', 'somehow', 'someone', 'something', 'sometime', 'sometimes', 'somewhere', 'still', 'such', 'take', 'than', 'that', 'the', 'their', 'them', 'themselves', 'then', 'thence', 'there', 'thereafter', 'thereby', 'therefore', 'therein', 'thereupon', 'these', 'they', 'thick', 'thin', 'third', 'this', 'those', 'though', 'through', 'throughout', 'thru', 'thus', 'to', 'together', 'too', 'top', 'toward', 'towards', 'un', 'under', 'until', 'up', 'upon', 'us', 'very', 'via', 'was', 'we', 'well', 'were', 'what', 'whatever', 'when', 'whence', 'whenever', 'where', 'whereafter', 'whereas', 'whereby', 'wherein', 'whereupon', 'wherever', 'whether', 'which', 'while', 'whither', 'who', 'whoever', 'whole', 'whom', 'whose', 'why', 'will', 'with', 'within', 'without', 'would', 'yet', 'you', 'your', 'yours', 'yourself', 'yourselves']

In [None]:
# word frequency table for Ethereum

with open('redditData/threads_eth.txt', 'r') as eth_f:
    words_eth = eth_f.read()
    wordlist_eth = words_eth.lower().split()
    wordcount_eth = Counter(wordlist_eth)
    wordcount_eth2 = pd.DataFrame.from_dict(wordcount_eth, orient='index').reset_index()
    wordcount_eth2 = wordcount_eth2.rename(columns={'index':'word', 0:'count'})
    
# DataFrame of unique values not in stopword list in descending order
wordcount_eth2[wordcount_eth2['word'].map(lambda x: x not in stopwords)].sort_values('count', ascending=False).head(30)

# Summary of above eth2 DataFrame
summary_wordcount_eth2 = wordcount_eth2[wordcount_eth2['word'].map(lambda x: x not in stopwords)].sort_values('count', ascending=False)
summary_wordcount_eth2.describe()

In [None]:
# word frequency table for Bitcoin

with open('redditData/threads_btc.txt', 'r') as btc_f:
    words_btc = btc_f.read()
    wordlist_btc = words_btc.lower().split()
    wordcount_btc = Counter(wordlist_btc)
    wordcount_btc2 = pd.DataFrame.from_dict(wordcount_btc, orient='index').reset_index()
    wordcount_btc2 = wordcount_btc2.rename(columns={'index':'word', 0:'count'})
    
# DataFrame of unique values not in stopword list in descending order
wordcount_btc2[wordcount_btc2['word'].map(lambda x: x not in stopwords)].sort_values('count', ascending=False).head(30)

# Summary of above btc2 DataFrame
summary_wordcount_btc2 = wordcount_btc2[wordcount_btc2['word'].map(lambda x: x not in stopwords)].sort_values('count', ascending=False)
summary_wordcount_btc2.describe()

In [None]:
# word frequency table for Ethereum Classic

with open('redditData/threads_etc.txt', 'r') as etc_f:
    words_etc = etc_f.read()
    wordlist_etc = words_etc.lower().split()
    wordcount_etc = Counter(wordlist_etc)
    wordcount_etc2 = pd.DataFrame.from_dict(wordcount_etc, orient='index').reset_index()
    wordcount_etc2 = wordcount_etc2.rename(columns={'index':'word', 0:'count'})
    
# DataFrame of unique values not in stopword list in descending order
wordcount_etc2[wordcount_etc2['word'].map(lambda x: x not in stopwords)].sort_values('count', ascending=False).head(30)

# Summary of above etc2 DataFrame
summary_wordcount_etc2 = wordcount_etc2[wordcount_etc2['word'].map(lambda x: x not in stopwords)].sort_values('count', ascending=False)
summary_wordcount_etc2.describe()

### Word clouds for all posts in ETH, BTC, or ETC with stopwords

In [None]:
# create stopword functions for ETH, BTC, and ETC from word frequency lists

def stopword_eth(top_terms_remove):
    stopword_eth = wordcount_eth2[wordcount_eth2['word'].map(lambda x: x not in stopwords)].sort_values('count', ascending=False).head(top_terms_remove)
    stopword_eth = list(stopword_eth.word)
    return stopword_eth

# stopword_eth(5)

def stopword_btc(top_terms_remove):
    stopword_btc = wordcount_btc2[wordcount_btc2['word'].map(lambda x: x not in stopwords)].sort_values('count', ascending=False).head(top_terms_remove)
    stopword_btc = list(stopword_btc.word)
    return stopword_btc

# stopword_btc(5)

def stopword_etc(top_terms_remove):
    stopword_etc = wordcount_etc2[wordcount_etc2['word'].map(lambda x: x not in stopwords)].sort_values('count', ascending=False).head(top_terms_remove)
    stopword_etc = list(stopword_etc.word)
    return stopword_etc

# stopword_etc(5)

In [None]:
# wordcloud generator function for raw files with stopwords

def wcg_stop(file, stop, number_top_terms):
    text = open(file).read()
    text = text.lower()
    if stop == 'eth':
        stop_wc = stopword_eth(number_top_terms)
    elif stop == 'btc':
        stop_wc = stopword_btc(number_top_terms)
    elif stop == 'etc':
        stop_wc = stopword_etc(number_top_terms)
    wc = WordCloud(width=1440, height=900, background_color='white', stopwords=stop_wc + stopwords)
    wc.generate(text)
    wc.to_file('%s.jpg' % (file+('_notop_')+str(number_top_terms)))

In [None]:
wcg_stop('redditData/threads_eth.txt', 'eth', 10)

In [None]:
wcg_stop('redditData/threads_btc.txt', 'btc', 10)

In [None]:
wcg_stop('redditData/threads_etc.txt', 'etc', 10)

### Word clouds per author

In [None]:
# Create text files for wordclouds

# regex for punctuation
exclude = re.compile('[%s]' % re.escape(punctuation))

# write titles for specific Ethereum subreddit author to txt file
# wordcloud of all Ethereum author posts
def auth_wordcloud_eth(auth):
    with open('redditData/threads_eth_auth.txt', 'w') as eth_fauth:
        lines = threads_eth.loc[threads_eth['author'] == auth]
        for line in lines['title']:
            line_nopunct = exclude.sub(' ', line)
            eth_fauth.write(line_nopunct + ' ')
    auth_rename = 'redditData/threads_eth_%s.txt' % auth
    os.rename('redditData/threads_eth_auth.txt', auth_rename)
    wcg(auth_rename)

# write titles for specific Bitcoin subreddit author to txt file
# wordcloud of all Bitcoin author posts
def auth_wordcloud_btc(auth):
    with open('redditData/threads_btc_auth.txt', 'w') as btc_fauth:
        lines = threads_btc.loc[threads_btc['author'] == auth]
        for line in lines['title']:
            line_nopunct = exclude.sub(' ', line)
            btc_fauth.write(line_nopunct + ' ')
    auth_rename = 'redditData/threads_btc_%s.txt' % auth
    os.rename('redditData/threads_btc_auth.txt', auth_rename)
    wcg(auth_rename)

# write titles for specific Ethereum Classic subreddit author to txt file
# wordcloud of all Ethereum Classic author posts
def auth_wordcloud_etc(auth):
    with open('redditData/threads_etc_auth.txt', 'w') as etc_fauth:
        lines = threads_etc.loc[threads_etc['author'] == auth]
        for line in lines['title']:
            line_nopunct = exclude.sub(' ', line)
            etc_fauth.write(line_nopunct + ' ')
    auth_rename = 'redditData/threads_etc_%s.txt' % auth
    os.rename('redditData/threads_etc_auth.txt', auth_rename)
    wcg(auth_rename)


In [None]:
# auth_wordcloud_eth('vbuterin')

# generate Ethereum author word cloud of all author posts with stop words
# wcg_stop('redditData/threads_eth_vbuterin.txt', 'eth', 5)

In [None]:
# auth_wordcloud_btc('Egon_1')

# generate Bitcoin author word cloud of all author posts with stop words
# wcg_stop('redditData/threads_btc_Egon_1.txt', 'eth', 5)

In [None]:
# auth_wordcloud_etc('bit_novosti')

# generate Ethereum Classic author word cloud of all author posts with stop words
# wcg_stop('redditData/threads_etc_bit_novosti.txt', 'eth', 5)

### Word clouds per time frames

In [None]:
# Working on it!

## Comparison of Reddit Content to Interviews