In [1]:
# Import basic packages
import os
from urllib.request import urlretrieve, urlopen
import time

# Import data science packages
import pandas as pd

# Import reddit related packages
import praw
import pdb
import re

# Import CNN
#from neuralnetwork import food_CNN

## Config parameters

In [193]:
# Set download destinations 
pic_dest = "C:/project/imgbot/pics/"
thumbnail_dest = "C:/project/imgbot/thumbnails/"

# Set up reddit instance
reddit = praw.Reddit('bot1')

# Check or create csv file to save data
csv_file = 'replies.csv'
col_list = ['Post_ID', 'Post_title', 'Post_URL', 'Subreddit', 'Post_Date', 'Post_Score', 'Bot_Reply', 'Reply_Date', 'Classification', 'Calories', 'Reply_Score']

if os.path.isfile('replies.csv'):
   # if there is a log, load it as a pandas dataframe    
    reply_log = pd.read_csv(csv_file)
else:
    reply_log = pd.DataFrame(columns=col_list)

reply_log.set_index('Post_ID', inplace=True)

# Set up list of subreddits to monitor
subreddit_list = 'food+HealthyFood'
subreddit = reddit.subreddit('pythonforengineers')

## Helper functions

In [22]:
# Download helper that checks if file already exists
def download(url, filename, destination):
    """
    Download file from <url>
    :param url: URL to file
    :param filename: name that file is saved as (also checks for duplications)
    :param destination: Local file save path
    """
    full_destination = destination + filename
    
    if not url:
        return print("invalid URL. Skipping...")
    
    if not os.path.isfile(full_destination):
                urlretrieve(url, full_destination) 
    
    return print("Downloaded ", url)


# Helper to check if submission is an image
def submission_is_image(url):
    """
    Check if the submission is an image. Returns boolean
    :param url: URL of Reddit submission
    """   
    response = urlopen(url)
    type = response.headers.get_content_maintype()    
    if type == 'image':   
        return True    
    else:    
        return False 


# Helper to save a new reply entry or update existing entry in dataframe and csv    
def save_reply(data, save_dataframe, csv_file):
    """
    Save submission and reply data to reply_log dataframe and csv file
    :param data: list of data in same format/structure as reply_log
    :param save_dataframe: the target dataframe to update with new data
    :param csv_file: filename of the csv_file for saving
    """
    col_list = list(save_dataframe.columns.values)        
    new_entry = pd.DataFrame([new_row], columns=col_list)
    new_entry.set_index('Post_ID', inplace=True)
    
    if new_entry.index[0] in save_dataframe.index:
        save_dataframe.update(new_entry)
    else:
        save_dataframe = save_dataframe.append(new_entry)
    
    save_dataframe.to_csv(csv_file, encoding='utf-8', index=False) # Save dataframe as .csv file   
    return print("Reply added to dataframe and saved to {}".format(csv_file)) 


# Helper to reply to a submission with classification data, and return new entry to log        
def send_reply(text, submission, classification=None, calories=None):
    """
    Save submission and reply data to reply_log dataframe and csv file
    :param text: list of data in same format/structure as reply_log
    :param submission: the target dataframe to update with new data
    :param classification: food type / classification to save in record
    :param calories: calories score to save in record. Should be same as posted in reply text
    """       
    reply_date = time.time()      
    reply_id = submission.reply(text)       
    reply = reddit.comment(reply_id)
    reply_score = None
    new_row = [submission.id,
               submission.title,
               submission.url,
               submission.subreddit.name,
               submission.created_utc,
               submission.score,
               reply.id,
               reply_date,
               classification,
               calories,
               reply_score]
    return new_row

## Subreddit loop and reply function
#### Scans through subreddits and classifies food images 

In [20]:
for submission in subreddit.hot(limit=5):
    
    if submission.id not in reply_log.Post_ID.values:  # Check that the bot hasn't already replied to this post
        if submission.stickied: # Ignore stickied posts e.g. rules
            continue
        
        # Download and save picture and thumbnail 
        if submission_is_image:
            filename = submission.id + ".jpg"
            download(submission.url, filename, pic_dest)
            download(submission.thumbnail, filename, thumbnail_dest)
            
            # Call CNN to classify and estimate calories
            #classification, calories = food_CNN(pic_dest, filename)
            classification, calories = (None, None)                
                
            # Reply to post and generate new log entry
            text = "Hi" # TODO: learn to format 
            new_reply = send_reply(text, submission, classification, calories)
 
            # Save reply in dataframe and csv file
            save_reply(new_reply, reply_log, csv_file)
            
        else:
            print("Already replied to post")
    

Downloaded  https://www.reddit.com/r/pythonforengineers/comments/7fxac4/testing80/
invalid URL. Skipping...


UnboundLocalError: local variable 'reply_log' referenced before assignment

## Reply score loop function
#### Scans previous replies and records score for any reply made more than 24 hours ago

In [243]:
for i in reply_log.index:   
    
    elapsed_hours = (time.time() - reply_log['Reply_Date'][i]) / 3600 # 3600 seconds per hour 
    saved_reply_score = reply_log['Reply_Score'][i]
    
    if saved_reply_score == None and elapsed_hours >= 24:
        reply = reddit.comment(reply_log['Bot_Reply'][i])
        score_update = reply.score
        reply_log['Reply_Score'][i] = score_update
        reply_log.to_csv(csv_file, encoding='utf-8', index=False) # Save dataframe as .csv file 
    