# Scraping Orangetheory Reddit Posts

### Importing relevant packages

In [109]:
import praw
import time
import json
from datetime import datetime, timedelta
import codecs
import os
from dotenv import load_dotenv

### Initializing a Reddit object and defining the subreddit, keywords, and time frame of interest

In [114]:
load_dotenv()

reddit = praw.Reddit(
   client_id=os.getenv("REDDIT_CLIENT_ID"),
   client_secret=os.getenv("REDDIT_CLIENT_SECRET"),
   user_agent=os.getenv("REDDIT_USER_AGENT"),
   username=os.getenv("REDDIT_USERNAME"),
   password=os.getenv("REDDIT_PASSWORD")
)

In [116]:
subreddit = reddit.subreddit("orangetheory")
keywords = ['lift45', 'lift 45']
one_month_ago = int(time.time()) - 7 * 24 * 60 * 60

### Fetching all posts, comments, and replies about Lift45 from a specified time frame

In [117]:
def fetch_comments(comment, posts):
    # Add comment to posts
    decoded_comment = codecs.unicode_escape_decode(comment.body)[0]
    clean_comment = decoded_comment.replace('’', "'")
    clean_comment = clean_comment.encode('ascii', 'ignore').decode()
    
    posts.append({
        'date': datetime.utcfromtimestamp(comment.created_utc).strftime('%Y-%m-%d'),
        'author': comment.author.name if comment.author else 'Deleted',
        'title': f'Re: {post.title}',
        'body': clean_comment.replace('\n', ' ').replace('\t', ' ').replace('\r', ' ')
    })

    # Process replies to the comment
    for reply in comment.replies:
        fetch_comments(reply, posts)

posts = []

for post in subreddit.new(limit = 1000):
    if any(keyword in post.title.lower() for keyword in keywords):
        if post.created_utc >= one_month_ago:
            decoded_text = codecs.unicode_escape_decode(post.selftext)[0]
            clean_text = decoded_text.replace('’', "'")
            clean_text = clean_text.encode('ascii', 'ignore').decode()
            
            posts.append({
                'date': datetime.utcfromtimestamp(post.created_utc).strftime('%Y-%m-%d'),
                'author': post.author.name if post.author else 'Deleted',
                'title': post.title,
                'body': clean_text.replace('\n', ' ').replace('\t', ' ').replace('\r', ' ')
            })
            
            for comment in post.comments:
                fetch_comments(comment, posts)

In [118]:
posts[0]

{'date': '2023-07-19',
 'author': 'SailBCC',
 'title': '7/17/23 Lift 45 Total Body 3',
 'body': "I didn't see it posted yet. I think I remembered everything. Good day to choose some challenging weights!   Warmup: the usual   Block 1: 6:30.  Bench low row (explosive) - 8 reps each side  TRX Pull Ups - 8 reps  Block 2: 6:30 Clean to front squat - 8 reps   Split stance pulse squats - 8 reps each side   Block 3: 6:30 S/A chest press (explosive) - 8 reps each side  TRX tricep extensions -8 reps  Block 4: 6:30 Alternating 90 degree pop squats - 8 total Split stance deadlifts- 8 reps each side   Finisher - 2 rounds  30 second low plank knee drives 30 second bicycle crunch 15 second rest"}

## ChatGPT API Setup
#### Load the API key and relevant Python libaries.
In this course, we've provided some code that loads the OpenAI API key for you.

In [13]:
import os
import openai
import sys
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

openai.api_key  = os.environ['OPENAI_API_KEY']

In [14]:
def get_completion(prompt, model="gpt-3.5-turbo"):
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message["content"]

### Summarizing each Reddit post from the last month, focusing on customer sentiment about Lift45

In [61]:
post_bodies = [post['post_body'] for post in posts]

In [62]:
post_bodies

["I didn't see it posted yet. I think I remembered everything. Good day to choose some challenging weights!   Warmup: the usual   Block 1: 6:30.  Bench low row (explosive) - 8 reps each side  TRX Pull Ups - 8 reps  Block 2: 6:30 Clean to front squat - 8 reps   Split stance pulse squats - 8 reps each side   Block 3: 6:30 S/A chest press (explosive) - 8 reps each side  TRX tricep extensions -8 reps  Block 4: 6:30 Alternating 90 degree pop squats - 8 total Split stance deadlifts- 8 reps each side   Finisher - 2 rounds  30 second low plank knee drives 30 second bicycle crunch 15 second rest",
 'Havent seen this one posted yet and Im newer to OT so Im still not sure exactly how the Lift45 templates cycle through. Also apologies as I may screw up some of these exercise names lol  30 Min Block:  Each exercise was 6/8/10/12 reps, 4 rounds total. Also working unilaterally so 6 reps R, 6 reps L each exercise then up the reps.  DB Squat R/L DB Shoulder press R/L DB Lateral lunge R/L DB Standing o

In [65]:
for i in range(len(post_bodies)):
    prompt = f"""
    Your role is to provide relevant information from a social media post about the course Lift45, which is \
    offered by the fitness company Orangetheory. Orangetheory is sometimes referred to as OTF. 

    Summarize the review below, delimited by triple backticks, in at most 50 words. Focus on the customer \
    sentiment about the course Lift45 and any compliments, criticisms, and feedback members have.

    Social Media Post: ```{post_bodies[i]}```
    """

    response = get_completion(prompt)
    print(i, response, "\n")
    
    time.sleep(20)

0 The social media post mentions a challenging Lift45 class at Orangetheory, with various exercises including bench low row, TRX pull-ups, clean to front squat, split stance pulse squats, S/A chest press, TRX tricep extensions, alternating 90 degree pop squats, and split stance deadlifts. The post also includes a finisher with low plank knee drives and bicycle crunches. The sentiment of the post is positive, with the author expressing excitement about choosing challenging weights. 

1 The reviewer is new to Orangetheory and is unsure about the Lift45 templates. They mention the exercises and reps for a 30-minute block, followed by three core exercises and a 3-minute finisher. The reviewer enjoyed the workout, particularly the 30-minute block for lifting heavy weights at a slower pace. 

2 The social media post provides a detailed breakdown of the Lift45 course offered by Orangetheory. It includes exercises for both the floor and tread sides, with slow and controlled movements. The post

In [103]:
test_posts = posts[:5]

In [108]:
json_objects = []

for i in range(len(test_posts)):
    prompt = f"""
    Your role is to summarize relevant customer sentiment information from a social media post or comment about \
    the course Lift45, which is offered by the fitness company Orangetheory. Orangetheory is sometimes referred \
    to as OTF. 

    The social media post or comment of interest is delimited by 3 backticks. 


    date: #{posts[i]['date']}#

    title: ##{posts[i]['title']}##

    source: 'Reddit'

    author: ###{posts[i]['author']}###

    sentiment: Determine whether the post/comment has a positive, neutural, or negative customer sentiment. \
    Use 1 word only.

    category: Determine the area that the author is providing feedback about, including 'Template', \
    'Intensity', 'Time Allocation', 'Instruction', 'Class Size', or 'Other'. Categorize as one of the 6 categories \
    only.

    summary: Provide a 1 sentence summary on customer sentiment and relevant compliments, criticisms, \
    and feedback about Lift45. 

    text: Only extract text that is about customer sentiment and/or compliments, criticisms, \
    and feedback about Lift45. Do not extract any other texts.

    Step 10: If the post DOES NOT include any specific customer sentiment, compliments, criticisms, or \
    feedback about Lift45, simply respond with "Irrelevant feedback". Otherwise, provide the following \
    information in a JSON format: date, title, source, author, sentiment, category, summary, and text.

    Social Media Post: ```{posts[i]['body']}```
    """

    response = get_completion(prompt)
    print(i, response, "\n")
    
    if response.strip() != 'Irrelevant feedback.':
        # Load the response string as a dictionary and add to the list
        json_objects.append(json.loads(response))

    time.sleep(20)

0 Irrelevant feedback. 

1 {
  "date": "2023-07-19",
  "title": "Re: 7/17/23 Lift 45 Total Body 3",
  "source": "Reddit",
  "author": "Otherwise_Nature_506",
  "sentiment": "Positive",
  "category": "Other",
  "summary": "The author expresses excitement and gratitude for the Lift45 class, with a minor mention of pop squats.",
  "text": "Except for those pop squats, this looks like a great class. Thank you for sharing. Im looking forward to this tomorrow."
} 

2 {
  "date": "2023-07-19",
  "title": "Re: 7/17/23 Lift 45 Total Body 3",
  "source": "Reddit",
  "author": "mother_of_rags",
  "sentiment": "positive",
  "category": "Other",
  "summary": "The author had a positive sentiment about Lift45, mentioning that it was great except for the pop squats.",
  "text": "Did this one earlier today, and yes can attest, aside from the pop squats it was great!"
} 

3 Irrelevant feedback. 

4 {
  "date": "2023-07-19",
  "title": "Re: 7/17/23 Lift 45 Total Body 3",
  "source": "Reddit",
  "author":