In [2]:
import pandas as pd
from bs4 import BeautifulSoup as bs
import requests

all_reviews_df = pd.DataFrame()

for i in range(1, 21):
    url = f'https://www.imdb.com/title/tt1877830/reviews?spoiler=hide&sort=curated&dir=desc&ratingFilter={i}'
    headers = {'User-Agent': 'Mozilla/5.0'}
    
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        soup = bs(response.content, 'html.parser')
        
        reviews = soup.find_all('div', class_='text show-more__control')
        
        review_texts = []
        for review in reviews:
            review_text = review.get_text(strip=True)
            review_texts.append(review_text)
        
        review_data = {
            'Rating': [i] * len(review_texts),
            'Review Text': review_texts
        }
        df = pd.DataFrame(review_data)
        
        all_reviews_df = pd.concat([all_reviews_df, df], ignore_index=True)

print(all_reviews_df)


     Rating                                        Review Text
0       1.0  Dull & Drab at Best. Insipid Writing, Outdated...
1       1.0  Between an insipid Robert Pattinson and a woef...
2       1.0  Why does everything have to be anti-white, wok...
3       1.0  This movie was seriously overhyped. After the ...
4       1.0  Pattinson is no batman, he will never be Batma...
..      ...                                                ...
170    20.0  What's the one thought you had when Warner Bro...
171    20.0  For better or worse (mostly better), The Batma...
172    20.0  I really don't understand all the love this mo...
173    20.0  The Batman (2022) is a movie my wife and I cau...
174    20.0  Dull & Drab at Best. Insipid Writing, Outdated...

[175 rows x 2 columns]


In [3]:
from textblob import TextBlob
import re

def preprocess_text(text):
    #lowercase
    text = text.lower()
    #special chracters
    text = re.sub(r'[^a-z\s]', '', text)
    return text

def get_sentiment(text):
    analysis = TextBlob(text)
    polarity = analysis.sentiment.polarity
    if polarity > 0:
        return 'positive'
    elif polarity < 0:
        return 'negative'
    else:
        return 'neutral'

def get_polarity(text):
    analysis = TextBlob(text)
    return analysis.sentiment.polarity

In [4]:
all_reviews_df['Cleaned Reviews'] = all_reviews_df['Review Text'].apply(preprocess_text)

all_reviews_df['Sentiment'] = all_reviews_df['Cleaned Reviews'].apply(get_sentiment)
all_reviews_df['Polarity'] = all_reviews_df['Cleaned Reviews'].apply(get_polarity)

In [5]:
all_reviews_df=all_reviews_df.sample(frac=1)

all_reviews_df

Unnamed: 0,Rating,Review Text,Cleaned Reviews,Sentiment,Polarity
115,9.0,This was dark and gritty. On several occasions...,this was dark and gritty on several occasions ...,positive,0.117014
64,5.0,This movie is insulting to the audience's inte...,this movie is insulting to the audiences intel...,positive,0.000758
156,20.0,"Robert Pattinson is great, I loved this film b...",robert pattinson is great i loved this film bu...,positive,0.250000
32,2.0,Unfortunately the movie was an absolute disapp...,unfortunately the movie was an absolute disapp...,negative,-0.223810
127,15.0,"A serial killer strikes in Gotham City, killin...",a serial killer strikes in gotham city killing...,positive,0.153841
...,...,...,...,...,...
129,15.0,Approaching this review holistically as my fav...,approaching this review holistically as my fav...,positive,0.301850
18,1.0,Like some movies recently this one is super ov...,like some movies recently this one is super ov...,positive,0.098611
77,8.0,"THE BATMAN (2022) *** Robert Pattinson, Zoe Kr...",the batman robert pattinson zoe kravitz jeff...,positive,0.002041
39,2.0,The cinematography was a constant reddish-blac...,the cinematography was a constant reddishblack...,negative,-0.090909


In [6]:
all_reviews_df['Sentiment'].value_counts()

Sentiment
positive    123
negative     51
neutral       1
Name: count, dtype: int64