In [1]:
# The Dataset is to be used as a reference list to select the branches rated as WORST located in Selangor (Rating < 2.5)
# It was taken from Google Reviews using the extension tool and turn it into excel file

In [3]:
import pandas as pd

# Load the Excel file
file_path = "Dataset/McD Selangor List.xlsx"
xls = pd.ExcelFile(file_path)

# Load the specific sheet
df = xls.parse('Recovered_Sheet1')

# Display the first few rows to inspect the data
df.head()

Unnamed: 0,hfpxzc href,qBF1Pd,MW4etd,UY7F9,AJB7ye,AJB7ye 2,W4Efsd,doJOZc,W4Efsd 3,W4Efsd 4,...,UsdlK,lcr4fd href,Cw1rxd,R8c4Qb,Cw1rxd 2,R8c4Qb 2,ah5Ghc,ah5Ghc 2,M4A5Cf 2,ah5Ghc 3
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,https://www.google.com/maps/place/McDonald%27s...,McDonald's Caltex Bandar Puteri Klang,3.8,"(2,155)",·,RM 1–20,Fast Food,,·,"Lot 133826, Lebuh Sanggul 1/KS7, Bandar Puteri...",...,03-5162 3985,http://www.mcdonalds.com.my/,,Website,,Directions,Dine-in,Drive-through,·,No-contact delivery
3,https://www.google.com/maps/place/McDonald%27s...,McDonald's Kuala Selangor DT,4.0,"(3,362)",·,RM 1–20,Fast Food,,·,"Lot 640, Teluk Piah, Mukim",...,03-3281 1285,https://www.mcdonalds.com.my/,,Website,,Directions,Dine-in,Drive-through,·,No-contact delivery
4,https://www.google.com/maps/place/McDonald%27s...,McDonald's Amverton DT,3.4,(369),·,RM 1–20,Restaurant,,·,"Lot 85631, Jalan U17/16",...,03-6039 8253,https://www.mcdonalds.com.my/,,Website,,Directions,Dine-in,Drive-through,·,No-contact delivery


In [5]:
# Keep only the specified columns and rename them
df_cleaned = df[['hfpxzc href', 'qBF1Pd', 'MW4etd', 'UY7F9']].copy()
df_cleaned.columns = ['Branch Link', 'Branch', 'Ratings', 'Number of Reviews']

# Drop rows where all values are NaN
df_cleaned.dropna(how='all', inplace=True)

# Clean up "Number of Reviews" column by remove brackets and commas
df_cleaned['Number of Reviews'] = (
    df_cleaned['Number of Reviews']
    .str.replace(r"[(),]", "", regex=True)
    .str.strip()
    .replace("", pd.NA)
    .astype('Int64')  # convert to integer type that supports NA
)

# Show the cleaned dataset
df_cleaned.head()

Unnamed: 0,Branch Link,Branch,Ratings,Number of Reviews
2,https://www.google.com/maps/place/McDonald%27s...,McDonald's Caltex Bandar Puteri Klang,3.8,2155
3,https://www.google.com/maps/place/McDonald%27s...,McDonald's Kuala Selangor DT,4.0,3362
4,https://www.google.com/maps/place/McDonald%27s...,McDonald's Amverton DT,3.4,369
5,https://www.google.com/maps/place/McDonald%27s...,McDonald's Bandar Utama DT,3.9,2828
6,https://www.google.com/maps/place/McDonald%27s...,McDonald's PKNS PJ DT,2.7,417


In [7]:
# Save the cleaned DataFrame to a new Excel file
df_cleaned.to_excel("Dataset/Cleaned_McDonald_Selangor_List.xlsx", index=False)