<a href="https://colab.research.google.com/github/Yhola/Movie_Review_Analyzer/blob/main/MovieReviewModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
# Import required libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Step 1: Set up review elements
review_elements = [
    "Movie Year",
    "Opening",
    "Graphics",
    "Summary",
    "Plot Synopsis",
    "Aspect Evaluation",
    "Conclusion"
]

# Step 2: Set up details for review elements
element_details = [
    "Movie Release Year",
    "Actor, Director, Producer, Name, Company, Crew, Ads, Sponsors",
    "Screenshots of Movie Scenes",
    "Genre and Reviewers",
    "Context for the evaluation",
    "Quality of acting, scripting, screenwriting, cinematography, audio, tracks, pacing",
    "Who will like or dislike the movie and strengths and weaknesses"
]

# Set up keywords for each element
element_keywords = {
    "Movie Year": ["release year", "released in", "year of release"],
    "Opening": ["actor", "director", "producer", "company", "crew", "ads", "sponsors"],
    "Graphics": ["screenshots", "visuals", "graphics", "scenes"],
    "Summary": ["genre", "review", "summary", "overview"],
    "Plot Synopsis": ["plot", "storyline", "synopsis", "context"],
    "Aspect Evaluation": ["acting", "scripting", "screenwriting", "cinematography", "audio", "pacing"],
    "Conclusion": ["like or dislike", "strengths", "weaknesses", "recommendation"]
}

# Step 3: Create empty DataFrame to store review analysis
movies = ["Movie 1", "Movie 2", "Movie 3", "Movie 4", "Movie 5"]
df = pd.DataFrame(columns=["Index", "Review Items", "Detail of Items"] + movies)

# Add elements to DataFrame
df["Index"] = list(range(1, len(review_elements) + 1))
df["Review Items"] = review_elements
df["Detail of Items"] = element_details

# Step 4: Function to analyze reviews and check for the presence of elements
def analyze_review(url):
    # Request HTML content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Initialize dictionary to store presence of elements
    analysis = {element: "Absent" for element in review_elements}

    # Extract specific parts of the content for better analysis
    title = soup.title.string if soup.title else ""
    headers = " ".join([header.get_text() for header in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])])
    paragraphs = " ".join([p.get_text() for p in soup.find_all('p')])
    content = f"{title} {headers} {paragraphs}".lower()

    # Analyze the content for each review element using keywords
    for element, keywords in element_keywords.items():
        for keyword in keywords:
            if keyword.lower() in content:
                analysis[element] = "Present"
                break

    return analysis

# Step 5: Get the review links from user and analyze
review_links = [
    # Add your 5 review links here
    "https://www.rogerebert.com/reviews/daddys-head-shudder",
    "https://www.commonsensemedia.org/movie-reviews/lonely-planet",
    "https://www.commonsensemedia.org/movie-reviews/girl-haunts-boy",
    "https://ew.com/queer-review-daniel-craig-luca-guadagnino-pretentious-visually-appealing-slog-8710060",
    "https://ew.com/the-order-review-jude-law-nicholas-hoult-face-off-solid-crime-thriller-8709204"
]

# Step 6: Loop through each review link and analyze
for idx, link in enumerate(review_links):
    review_analysis = analyze_review(link)
    for element in review_elements:
        df.loc[df["Review Items"] == element, movies[idx]] = review_analysis[element]

# Step 7: Display DataFrame
from IPython.display import display

display(df)

# Step 8: Save DataFrame to CSV (optional)
df.to_csv("movie_review_analysis.csv", index=False)

Unnamed: 0,Index,Review Items,Detail of Items,Movie 1,Movie 2,Movie 3,Movie 4,Movie 5
0,1,Movie Year,Movie Release Year,Absent,Absent,Absent,Absent,Absent
1,2,Opening,"Actor, Director, Producer, Name, Company, Crew...",Present,Present,Absent,Present,Present
2,3,Graphics,Screenshots of Movie Scenes,Absent,Present,Present,Present,Present
3,4,Summary,Genre and Reviewers,Present,Present,Present,Present,Present
4,5,Plot Synopsis,Context for the evaluation,Absent,Absent,Present,Absent,Present
5,6,Aspect Evaluation,"Quality of acting, scripting, screenwriting, c...",Present,Absent,Present,Absent,Present
6,7,Conclusion,Who will like or dislike the movie and strengt...,Absent,Present,Present,Absent,Absent
