In [None]:
#Taking V14 as final

# **Import Libraries**

In [1]:
import tkinter
from tkinter import *
import wordcloud
from wordcloud import WordCloud, STOPWORDS
from PIL import Image, ImageTk
from tkinter import messagebox
from tkinter import ttk
import praw #Python Reddit API Wrapper
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.figure import Figure
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import datetime as dt
import numpy as np
import seaborn as sns
import nltk #Natural Language Toolkit
from nltk.corpus import stopwords
from nltk.sentiment.vader import SentimentIntensityAnalyzer  # Added for sentiment analysis
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Download NLTK stopwords and Vader Lexicon if not already downloaded
# nltk.download('stopwords')
# nltk.download('vader_lexicon')

# **Sentiment Analyzer**

In [2]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer # Added for sentiment analysis

# Initialize the VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Function to analyze the sentiment of a text using VADER
def perform_sentiment_analysis(data, threshold=0.3):
    sentiments = []

    for post in data.itertuples():
        text = post.title + " " + post.body

        # Perform sentiment analysis using VADER
        sentiment_scores = analyzer.polarity_scores(text)
        compound_score = sentiment_scores['compound']
        
        if compound_score >= threshold:
            sentiment_label = 'Positive'
        elif compound_score <= -threshold:
            sentiment_label = 'Negative'
        else:
            sentiment_label = 'Neutral'

        sentiments.append({
            "text": text,
            "sentiment": sentiment_label,
            "score": compound_score
        })

    return sentiments


# **Post Classification Fn**

In [3]:
def classify_post_media(post):
    if post.url.endswith(('.jpg', '.jpeg', '.png', '.gif')):
        return 'Image'
    elif 'v.redd.it' in post.url:
        return 'Video'
    else:
        return 'Text'

# Reddit Search Fn

In [None]:
from collections import Counter
from nltk.corpus import stopwords
import spacy
import string
from PIL import Image as PILImage
from IPython.display import Image, display


def reddit_search():
    tb.delete('1.0', END)
    flag = 0
    test = ''
    reddit = praw.Reddit(client_id='DP78tG9HeZiMQg', client_secret='xF80XIHboP51Lq63viNLTzxJrmE', user_agent='RedditWebScraping')
    
    # Getting title from input
    Sub = E1.get()

    tb.insert(INSERT, '\n------------------------------Subreddit-------------------------------------\n')
    # Get 10000 hot posts from the given subreddit
    hot_posts = reddit.subreddit(Sub).hot(limit=10000)
    try:
        for post in hot_posts:
            tb.insert(INSERT, post.title)
            tb.insert(INSERT, "\n")
    except TclError:
        pass

    # Store the data from the posts
    posts = []
    this_subreddit = reddit.subreddit(Sub)
    for post in this_subreddit.hot(limit=10000):
        posts.append([post.title, post.score, post.id, post.subreddit, post.url, post.num_comments, post.selftext, post.created])
    posts = pd.DataFrame(posts, columns=['title', 'score', 'id', 'subreddit', 'url', 'num_comments', 'body', 'created'])
    
    def get_date(created):
        return dt.datetime.fromtimestamp(created)

    try:
        tb.insert(INSERT, posts)
        _timestamp = posts["created"].apply(get_date)
        posts = posts.assign(timestamp=_timestamp)
        tb.insert(INSERT, posts[['title', 'score', 'timestamp']])
        tb.insert(INSERT, "\n")
        posts['interaction'] = posts['score'].divide(posts['num_comments'], fill_value=1)
        tb.insert(INSERT, posts[['title', 'score', 'interaction']])
        tb.insert(INSERT, "\n")
    except TclError:
        pass
    
    # Text numerical analysis(new)
    # Define the number of posts to process
    limit_posts = 10000

    # Initialize the Counter for word frequencies
    word_counts = Counter()

    # Load the English language model from spaCy
    nlp = spacy.load("en_core_web_sm")

    # Define a set of punctuation characters to be removed
    punctuations = set(string.punctuation)
    
    # Process the posts and update word counts
    for post in this_subreddit.hot(limit=limit_posts):
        title_words = nlp(post.title)
        title_words = [word.text.lower() for word in title_words if not word.is_stop and word.text not in punctuations and "‘" not in word.text and "’" not in word.text and not word.text.isnumeric()]
        word_counts.update(title_words)
        print(title_words)

############
    # Get the Media Types
    # Collect data on the type of post media
    media_types = []    
        
    for post in this_subreddit.hot(limit=limit_posts):
        media_type = classify_post_media(post)
        media_types.append(media_type)
    
    # Count the number of each media type
    media_counts = Counter(media_types)

    
############
    # Sort and display the most important topics
    most_common_words = word_counts.most_common(20)

    finalstr = "\n---------------Most Important Topics----------------------\n"
    for word, count in most_common_words:
        finalstr += f"{word}: {count}\n"
        print(f"{word}: {count}")

    # Insert the result into your text box 
    tb.insert(INSERT, finalstr)
    
    tb.insert(INSERT,'\n--------------------------------Graphs----------------------------------->\n')

############
    # Line Plot
    figure1 = plt.Figure(figsize=(4,3), dpi=100)
    ax1 = figure1.add_subplot(111)
    line1 = FigureCanvasTkAgg(figure1, top)
    line1.get_tk_widget().grid(row=1,column=3,columnspan=1, padx=5)
    posts.plot(kind="line",x='title',y='num_comments',color='red',ax=ax1)
    posts.plot(kind="line",x='title',y='interaction',color='blue',ax=ax1)
    ax1.axes.get_xaxis().set_visible(False)
    ax1.set_title('Timewise Presence Of Subreddit \''+Sub+'\'', fontsize=10)
    
    
############
    # Heat Map
    # DataFrame with the selected variables
    hm_selected_variables = posts[['timestamp', 'interaction', 'score', 'num_comments']]
    
    figure2 = plt.Figure(figsize=(4,3), dpi=100)
    ax2 = figure2.add_subplot(111)
    hm = FigureCanvasTkAgg(figure2, top) 
    hm.get_tk_widget().grid(row=1,column=4,columnspan=1, padx=5)
    
    # Create a heatmap of the correlation between selected variables
    correlation_matrix = hm_selected_variables.corr()
    sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", ax=ax2, annot_kws={'fontsize': 6})
    ax2.set_title('Correlation Heatmap Of Subreddit \''+Sub+'\'', fontsize=10)
    
    
############
    # Media Types  
    # Display the post media distribution plot in the tkinter window
    figure3 = plt.Figure(figsize=(4, 3), dpi=100)
    ax3 = figure3.add_subplot(111)
    ax3_bar = FigureCanvasTkAgg(figure3, top)
    ax3_bar.get_tk_widget().grid(row=2, column=3, columnspan=1, padx=5)
    ax3.bar(media_counts.keys(), media_counts.values())
    ax3.set_xlabel('Media Type')
    ax3.set_ylabel('No. of Posts')
    ax3.set_title('Distribution of Media Types')


############    
    # Grouped bar chart
    # DataFrame for the selected variables
    gd_selected_variables = posts[['title', 'score', 'num_comments']]
    
    # Group the data by 'title' and calculate the sum of 'score' and 'num_comments' for each title
    grouped_data = gd_selected_variables.groupby('title').sum()
    
     # Get the number of titles for the x-axis labels
    num_titles = len(grouped_data)
    
    # Set the width of each bar and the positions for each group
    bar_width = 0.5
    index = range(num_titles)

    # Plot a grouped bar chart
    figure4 = plt.Figure(figsize=(4, 3), dpi=100)
    ax4 = figure4.add_subplot(111)
    ax4.bar(index, grouped_data['score'], bar_width, label='Score')
    ax4.bar([i + bar_width for i in index], grouped_data['num_comments'], bar_width, label='Num Comments')
    ax4.set_xlabel('Title')
    ax4.set_ylabel('Value')
    ax4.set_title('Comparison of Title,Score & No. of Comments')
#     ax4.set_xticklabels([''] * num_titles)  # Set empty labels for the titles
    ax4.legend()
    chart = FigureCanvasTkAgg(figure4, top)
    chart.get_tk_widget().grid(row=2, column=4, columnspan=1, padx=5)
    

############    
    tb.insert(INSERT, '\n------------------ Sentiment Analysis ------------------↓\n\n')
    
    # Perform sentiment analysis
    sentiments = perform_sentiment_analysis(posts)
    
    # Pie chart for sentiment analysis results
    sentiment_labels = [sentiment['sentiment'] for sentiment in sentiments]
    sentiment_counts = Counter(sentiment_labels)
    
     # Perform sentiment analysis
    sentiments = perform_sentiment_analysis(posts)
    
    # Extract positive, negative, and neutral words from the sentiment analysis results
    positive_words = []
    negative_words = []
    neutral_words = []
    
    for sentiment in sentiments:
        if sentiment['sentiment'] == 'Positive':
            words = nlp(sentiment['text'])
            for word in words:
                if not word.is_stop and word.text not in punctuations and not word.text.isnumeric():
                    positive_words.append(word.text)
        elif sentiment['sentiment'] == 'Negative':
            words = nlp(sentiment['text'])
            for word in words:
                if not word.is_stop and word.text not in punctuations and not word.text.isnumeric():
                    negative_words.append(word.text)
        else:
            words = nlp(sentiment['text'])
            for word in words:
                if not word.is_stop and word.text not in punctuations and not word.text.isnumeric():
                    neutral_words.append(word.text)
    
    
    # Display the 20 most common positive, negative, and neutral words
    tb.insert(INSERT, '------ Positive Words ------\n')
    tb.insert(INSERT, ', '.join(positive_words[:20]) + '\n\n')
    
    tb.insert(INSERT, '------ Negative Words ------\n')
    tb.insert(INSERT, ', '.join(negative_words[:20]) + '\n\n')
    
    tb.insert(INSERT, '------ Neutral Words ------\n')
    tb.insert(INSERT, ', '.join(neutral_words[:20]) + '\n\n')

    
    fig, ax = plt.subplots()
    ax.pie(sentiment_counts.values(), labels=sentiment_counts.keys(), autopct='%1.1f%%', startangle=90)
    ax.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
    
    # Create a FigureCanvasTkAgg to display the pie chart in the GUI
    canvas = FigureCanvasTkAgg(fig, master=top)
    canvas.get_tk_widget().grid(row=2, column=0, columnspan=3)
    

############
    # WordCloud
    dataset = str(posts.title.values)
    wordCloudDf = pd.DataFrame(posts,columns=['title'])

    comment_words = ''
    stopstop = set(STOPWORDS) 

    #iterate through the csv file 
    for val in wordCloudDf.title: 

     # typecaste each val to string 
        val = str(val) 

     # split the value 
        tokens = val.split() 

     # Converts each token into lowercase 
        for i in range(len(tokens)): 
            tokens[i] = tokens[i].lower() 

        comment_words += " ".join(tokens)+" "
    
    # Generate the WordCloud and save it to a file
    wordcloud = WordCloud(width = 800, height = 800,background_color ='white',stopwords = stopstop, min_font_size = 10).generate(comment_words) 
    wordcloud.to_file("wordCloud.png") 
    
    # Resize the WordCloud image using Pillow
    img = PILImage.open("wordCloud.png")
    img = img.resize((350, 300), PILImage.BICUBIC)
    img.save("wordCloud_resized.png")

    # Display the resized WordCloud in the Jupyter Notebook
    display(Image(filename="wordCloud_resized.png"))    

    
top = tkinter.Tk()
top.tk.call('encoding', 'system', 'utf-8')
top.wm_title("Reddit Analysis")

# Create and place the elements in the grid
L1 = Label(text="Subreddit Name")
L1.grid(row=0, column=0, sticky="nsew")

E1 = Entry(bd=5)
E1.grid(row=0, column=1)

B = tkinter.Button(text="Search", command=reddit_search)
B.grid(row=0, column=2)

tb = Text(top)
tb.grid(row=1, column=0, columnspan=3)

top.mainloop()

# Network Graph

In [None]:
import networkx as nx
from PIL import Image as PILImage
from IPython.display import Image, display
from tkinter import Label, Entry, Text, Button

def reddit_graph():
    tb.delete('1.0', tkinter.END)
    flag = 0
    test = ''
    reddit = praw.Reddit(client_id='DP78tG9HeZiMQg', client_secret='xF80XIHboP51Lq63viNLTzxJrmE', user_agent='RedditWebScraping')

    # Getting title from input
    Sub = E1.get()

    tb.insert(INSERT, '\n------------------------------Subreddit-------------------------------------\n')
    # Get 50 hot posts from the given subreddit
    hot_posts = reddit.subreddit(Sub).hot(limit=50)
    
    posts = []
    for post in hot_posts:
        tb.insert(INSERT, post.title)
        tb.insert(INSERT, "\n")

        posts.append([post.title, post.score, post.id, post.subreddit, post.url, post.num_comments, post.selftext, post.created, post.author])

    posts = pd.DataFrame(posts, columns=['title', 'score', 'id', 'subreddit', 'url', 'num_comments', 'body', 'created', 'author'])

    def get_date(created):
        return pd.to_datetime(created, unit='s')

    try:
        posts['interaction'] = posts['score'] / posts['num_comments'].fillna(1)
        tb.insert(INSERT, posts[['title', 'score', 'interaction']])
        tb.insert(INSERT, "\n")
    except tk.TclError:
        pass

    # Create a directed graph for user interactions
    G = nx.DiGraph()

    # Extract user interactions from the provided data
    for index, post in posts.iterrows():
        submission = reddit.submission(id=post['id'])
        submission.comments.replace_more(limit=None) 
        for comment in submission.comments.list():
            if hasattr(comment, 'author') and hasattr(post, 'author') and comment.author:
                parent_user = post['author'].name
                reply_user = comment.author.name
                G.add_edge(parent_user, reply_user)

    tb.insert(INSERT, '\n--------------------------------Network Graph-----------------------------------\n')

    # Visualize the network graph
    pos = nx.kamada_kawai_layout(G)
    nx.draw(G, pos, with_labels=False, node_size=100, node_color="orange", edge_color="gray", width=0.5)
    plt.title('User Interaction Network for \''+Sub+'\'')
    plt.show()
    

top = tkinter.Tk()
top.tk.call('encoding', 'system', 'utf-8')
top.wm_title("Reddit Network Analysis")

# Create and place the elements in the grid
L1 = Label(text="Subreddit Name")
L1.grid(row=0, column=0, sticky="nsew")

E1 = Entry(bd=5)
E1.grid(row=0, column=1)

B = tkinter.Button(text="Search", command=reddit_graph)
B.grid(row=0, column=2)

tb = Text(top)
tb.grid(row=1, column=0, columnspan=3)

top.mainloop()