diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index 3337b5d..125ead4 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_Unique_users_subreddit/__pycache__/__init__.cpython-36.pyc b/q01_Unique_users_subreddit/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..a7878e8 Binary files /dev/null and b/q01_Unique_users_subreddit/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_Unique_users_subreddit/__pycache__/build.cpython-36.pyc b/q01_Unique_users_subreddit/__pycache__/build.cpython-36.pyc index ca2efed..5c318c8 100644 Binary files a/q01_Unique_users_subreddit/__pycache__/build.cpython-36.pyc and b/q01_Unique_users_subreddit/__pycache__/build.cpython-36.pyc differ diff --git a/q01_Unique_users_subreddit/build.py b/q01_Unique_users_subreddit/build.py index 82fe7e5..a8c52da 100644 --- a/q01_Unique_users_subreddit/build.py +++ b/q01_Unique_users_subreddit/build.py @@ -1,7 +1,14 @@ +# %load q01_Unique_users_subreddit/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split +from greyatomlib.recommendor_system_project.q01_Unique_users_subreddit.build import q01_Unique_users_subreddit as original + +path = 'data/subreddit-interactions-for-25000-users.zip' + +def q01_Unique_users_subreddit(path): + data = pd.read_csv(path, compression = 'zip') + return data, np.unique(data.username).shape[0], np.unique(data.subreddit).shape[0] -def q01_Unique_users_subreddit(): diff --git a/q01_Unique_users_subreddit/tests/__pycache__/__init__.cpython-36.pyc b/q01_Unique_users_subreddit/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..9a0de26 Binary files /dev/null and b/q01_Unique_users_subreddit/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_Unique_users_subreddit/tests/__pycache__/test.cpython-36.pyc b/q01_Unique_users_subreddit/tests/__pycache__/test.cpython-36.pyc new file mode 100644 index 0000000..b8870d5 Binary files /dev/null and b/q01_Unique_users_subreddit/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q02_top_subreddits_wordcloud/__pycache__/__init__.cpython-36.pyc b/q02_top_subreddits_wordcloud/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..2b657f0 Binary files /dev/null and b/q02_top_subreddits_wordcloud/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_top_subreddits_wordcloud/__pycache__/build.cpython-36.pyc b/q02_top_subreddits_wordcloud/__pycache__/build.cpython-36.pyc index 98c1cbb..1759299 100644 Binary files a/q02_top_subreddits_wordcloud/__pycache__/build.cpython-36.pyc and b/q02_top_subreddits_wordcloud/__pycache__/build.cpython-36.pyc differ diff --git a/q02_top_subreddits_wordcloud/build.py b/q02_top_subreddits_wordcloud/build.py index ead5d42..f9f52da 100644 --- a/q02_top_subreddits_wordcloud/build.py +++ b/q02_top_subreddits_wordcloud/build.py @@ -1,10 +1,20 @@ +# %load q02_top_subreddits_wordcloud/build.py import pandas as pd import numpy as np import matplotlib.pyplot as plt from wordcloud import WordCloud -from sklearn.model_selection import train_test_split from greyatomlib.recommendor_system_project.q01_Unique_users_subreddit.build import q01_Unique_users_subreddit -def q02_top_subreddits_wordcloud(): +path = 'data/subreddit-interactions-for-25000-users.zip' + +def q02_top_subreddits_wordcloud(path): + data, users, subred = q01_Unique_users_subreddit(path) + cloud = WordCloud(background_color='white',width=1600,height=800,relative_scaling=0.5,normalize_plurals=True) + wordcloud = cloud.generate_from_frequencies(data['subreddit'].value_counts()) + plt.imshow(wordcloud, interpolation='bilinear') + plt.axis('off') + plt.show() + + diff --git a/q02_top_subreddits_wordcloud/tests/__pycache__/__init__.cpython-36.pyc b/q02_top_subreddits_wordcloud/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..37b3f61 Binary files /dev/null and b/q02_top_subreddits_wordcloud/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_top_subreddits_wordcloud/tests/__pycache__/test.cpython-36.pyc b/q02_top_subreddits_wordcloud/tests/__pycache__/test.cpython-36.pyc new file mode 100644 index 0000000..7754482 Binary files /dev/null and b/q02_top_subreddits_wordcloud/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q03_plot_topK_subreddit_of_a_user/__pycache__/__init__.cpython-36.pyc b/q03_plot_topK_subreddit_of_a_user/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..d79a413 Binary files /dev/null and b/q03_plot_topK_subreddit_of_a_user/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_plot_topK_subreddit_of_a_user/__pycache__/build.cpython-36.pyc b/q03_plot_topK_subreddit_of_a_user/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..fe64e54 Binary files /dev/null and b/q03_plot_topK_subreddit_of_a_user/__pycache__/build.cpython-36.pyc differ diff --git a/q03_plot_topK_subreddit_of_a_user/build.py b/q03_plot_topK_subreddit_of_a_user/build.py index 59a1799..e156fc7 100644 --- a/q03_plot_topK_subreddit_of_a_user/build.py +++ b/q03_plot_topK_subreddit_of_a_user/build.py @@ -1,8 +1,14 @@ +# %load q03_plot_topK_subreddit_of_a_user/build.py import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from greyatomlib.recommendor_system_project.q01_Unique_users_subreddit.build import q01_Unique_users_subreddit -def q03_plot_topK_subreddit_of_a_user(): - +def q03_plot_topK_subreddit_of_a_user(path, user='kabanossi', k=14): + data, users, subred = q01_Unique_users_subreddit(path) + new_df = pd.DataFrame(data.groupby('username')['subreddit'].value_counts()[user][:k]) + new_df.plot.bar() + plt.show(); + + diff --git a/q03_plot_topK_subreddit_of_a_user/tests/__pycache__/__init__.cpython-36.pyc b/q03_plot_topK_subreddit_of_a_user/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..050ae22 Binary files /dev/null and b/q03_plot_topK_subreddit_of_a_user/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_plot_topK_subreddit_of_a_user/tests/__pycache__/test.cpython-36.pyc b/q03_plot_topK_subreddit_of_a_user/tests/__pycache__/test.cpython-36.pyc new file mode 100644 index 0000000..0f3cc00 Binary files /dev/null and b/q03_plot_topK_subreddit_of_a_user/tests/__pycache__/test.cpython-36.pyc differ