### <div dir="rtl"> ایمپورت پکیج‌های مورد نیاز </div>

In [1]:
import os
import datetime

import requests
from bs4 import BeautifulSoup
import pandas as pd

### <div dir="rtl"> تعریف کلاس‌ها </div>

<div dir="rtl"> کلاس Zoomg شامل دو attribute اصلی به نام‌های main_url و review_urls می‌باشد. </div>
<div dir="rtl"> آدرس اصلی سایت (قسمت نقد و بررسی فیلم‌ها و سریال‌ها) در main_url هنگام ساخت آبجکت ذخیره می‌شود. </div>
<div dir="rtl"> سپس از طریق تابع fine_review_urls آدرس صفحات ۲۰۰ مقاله از سایت در review_urls ذخیره می‌گردد. </div>

In [2]:
class Zoomg:

    def __init__(self, n_pages=10):
        self.name = "Zoomg"
        self.main_url = "https://www.zoomg.ir/movie-tv-show-review/"
        self.n_pages = n_pages
        self.review_urls = []
        return

    def find_review_urls(self):
        for page_number in range(1, self.n_pages+1):
            url = self.main_url if page_number == 1 else self.main_url + f"page/{page_number}/"
            web_page = requests.get(url)
            soup = BeautifulSoup(web_page.content, "html.parser")
            main_section = soup.find("div", {"class": "col-md-7 col-sm-12 col-xs-12 centerLayout"})
            for section in main_section.find_all("div", {"class": ["boxWrapper latestArticles fullwithImage js-topic-row",
                                                                  "boxWrapper latestArticles js-topic-row"]}):
                self.review_urls.append(section.find("h3").find("a")["href"])
        return

<div dir="rtl"> کلاس ZoomgReview دارای سه attribure اصلی با نام‌های url و description و body می‌باشد. </div>
<div dir="rtl"> آدرس یک مقاله از سایت در url ذخیره می‌شود. </div>
<div dir="rtl"> سپس به کمک ماژول BeautifulSoup محتوای مقاله شامل description و body به دست می‌آیند. </div>

In [3]:
class ZoomgReview:
    
    def __init__(self, url):
        self.url = url
        self.web_page = requests.get(self.url)
        self.soup = BeautifulSoup(self.web_page.content, "html.parser")
        self.description = self.get_description()
        self.body = self.get_body()
        self.full_text = self.description + self.body
        return
            
    def get_description(self):
        return self.soup.find("div", {"class": "article-summery"}).find("p").getText()
    
    def get_body(self):
        return self.soup.find("div", {"class": "article-section"}).getText()

In [4]:
zoomg = Zoomg(n_pages=10)
zoomg.find_review_urls()

In [5]:
review_list = []
for index, review_url in enumerate(zoomg.review_urls):
    print(index+1, review_url)
    zoomg_review = ZoomgReview(url=review_url)
    review_list.append(zoomg_review)

1 https://www.zoomg.ir/movie-tv-show-review/347394-cyberpunk-edgerunners-review/
2 https://www.zoomg.ir/movie-tv-show-review/347393-gods-creatures-movie-review/
3 https://www.zoomg.ir/movie-tv-show-review/347353-house-of-the-dragon-season-1-episode-10-review/
4 https://www.zoomg.ir/movie-tv-show-review/347330-a-love-song-film-review/
5 https://www.zoomg.ir/movie-tv-show-review/347221-the-paloni-show-halloween-special-review/
6 https://www.zoomg.ir/movie-tv-show-review/347227-straw-dogs-film-review/
7 https://www.zoomg.ir/movie-tv-show-review/347089-she-hulk-attorney-at-law-season-1-reveiw/
8 https://www.zoomg.ir/movie-tv-show-review/347100-lotr-rings-of-power-alloyed-episode-review/
9 https://www.zoomg.ir/movie-tv-show-review/347199-house-of-the-dragon-season-1-episode-9-review/
10 https://www.zoomg.ir/movie-tv-show-review/347134-bodies-bodies-bodies-movie-review/
11 https://www.zoomg.ir/movie-tv-show-review/347141-speak-no-evil-movie-review/
12 https://www.zoomg.ir/movie-tv-show-revie

98 https://www.zoomg.ir/movie-tv-show-review/344624-doctor-strange-in-the-multiverse-of-madness-review/
99 https://www.zoomg.ir/movie-tv-show-review/344538-spiderhead-movie-review/
100 https://www.zoomg.ir/movie-tv-show-review/344405-hustle-movie-review/
101 https://www.zoomg.ir/movie-tv-show-review/344489-hatching-movie-review/
102 https://www.zoomg.ir/movie-tv-show-review/344320-station-eleven-tv-show-review/
103 https://www.zoomg.ir/movie-tv-show-review/344303-last-seen-alive-movie-review/
104 https://www.zoomg.ir/movie-tv-show-review/344377-under-the-banner-of-heaven-review/
105 https://www.zoomg.ir/movie-tv-show-review/344211-the-unbearable-weight-of-massive-talent-movie-review/
106 https://www.zoomg.ir/movie-tv-show-review/344134-father-stu-movie-review/
107 https://www.zoomg.ir/movie-tv-show-review/344106-the-loser-man-film-review/
108 https://www.zoomg.ir/movie-tv-show-review/344085-memory-movie-review/
109 https://www.zoomg.ir/movie-tv-show-review/343955-outer-range-serie-revi

197 https://www.zoomg.ir/movie-tv-show-review/341649-a-private-meeting-movie-review/
198 https://www.zoomg.ir/movie-tv-show-review/341558-bi-roya-movie-review/
199 https://www.zoomg.ir/movie-tv-show-review/341559-ice-age-6-animated-review/
200 https://www.zoomg.ir/movie-tv-show-review/341498-the-tenderbar-movie-review/


In [6]:
all_reviews = ""
for review in review_list:
    all_reviews += review.full_text + "\n"
print(len(all_reviews))

2636579


In [7]:
reviews_file = open("reviews.txt", "w")
reviews_file.write(all_reviews)
reviews_file.close()

In [8]:
review_df = pd.DataFrame(data={"url": [review.url for review in review_list], "text": [review.full_text for review in review_list]})
review_df.to_csv("reviews.csv", index=False)