# Reddit Flair Predictor

---

### 1) Import required modules

In [1]:
import pickle
import os
import warnings
import praw
import pandas as pd
import string
import nltk
import numpy as np
from keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model
from nltk.corpus import stopwords
warnings.filterwarnings('ignore')

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


### 2) Create a PRAW API instance and load model with tokenizer

In [2]:
reddit = praw.Reddit(client_id='#', client_secret='#', user_agent='#')
model = load_model("../models/model.h5")

with open('../models/tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


### 3) Define helper functions to clean text and predict flair

In [3]:
def nltk_clean(field):
    # remove punctuation from each word
    words = field.split(" ")
    table = str.maketrans('', '', string.punctuation)
    stripped = [w.translate(table) for w in words]
    field = ' '.join(stripped)

    # filter out stop words
    words = field.split(" ")
    stop_words = set(stopwords.words('english'))
    words = [w for w in words if not w in stop_words]
    field = ' '.join(words)
    return field

def predict_title(title):
    sequences = tokenizer.texts_to_sequences([title])
    word_index = tokenizer.word_index
    #print('Found %s unique tokens.' % len(word_index))
    #print(':',sequences) #list
    X = pad_sequences(sequences, maxlen=35)
    result = model.predict([X,X,X])
    return np.argmax(result,axis=-1)

### 4) Fetch top 10 posts from ```r/india``` and predict their flair

In [4]:
dict = {8:"Scheduled", 7:"Politics",5:"Photography",6:"Policy/Economy",3:"Food", 2:"Coronavirus",1:"Business/Finance",4:"Non-Political",9:"Science/Technology",10:"Sports",0:"AskIndia"}
# [R]eddiquette has been discontinued

subreddit = reddit.subreddit('india')
for submission in subreddit.top(limit=10):
    print("Title is:" , submission.title)
    print("Original Flair is:", submission.link_flair_text)
    title = nltk_clean(submission.title)
    print("Predicted Flair is:", dict.get(predict_title(title)[0]))
    print()

Title is: Will donate thrice the number of upvotes (amount in Rs.) i get for this thread in next 24 hours
Original Flair is: [R]eddiquette
Predicted Flair is: Business/Finance

Title is: Indian reply to NYtimes cartoon on Paris climate accord by Satish Acharya.
Original Flair is: /r/all
Predicted Flair is: Policy/Economy

Title is: The essence of the Indian soap opera, distilled into one GIF.
Original Flair is: r/all
Predicted Flair is: Policy/Economy

Title is: Fuck all Religion
Original Flair is: Politics
Predicted Flair is: Business/Finance

Title is: German exchange Student at IIT Madras is being sent back home by the Indian immigration department because he joined the protest.
Original Flair is: Politics
Predicted Flair is: Politics

Title is: Tragedy of India
Original Flair is: r/all
Predicted Flair is: Policy/Economy

Title is: Today's The Hindu
Original Flair is: Coronavirus
Predicted Flair is: Policy/Economy

Title is: If you are not moved by this picture, I wish I had your he