## Import the Library

In [1]:
from bs4 import BeautifulSoup
import requests
import time
import re

## Scraping Macbook's Review Data

In [20]:
macbook_texts = []
rates = []
origin_url = 'https://review.kakaku.com/review/newreview/CategoryCD=0029/PageNo='

In [3]:
for page_num in range(1, 30):
    url = origin_url + str(page_num)
    response = requests.get(url)  
    response.encoding = 'utf-8'


    soup = BeautifulSoup(response.text, 'html.parser')

   
    elements = soup.find_all('a')
    for element in elements:
        text = element.get_text().strip()
        match = re.search(r'MacBook (?:Pro|Air)', text)
        if match:
            matched_text = match.group()
            macbook_texts.append(matched_text)

  
    rate_elements = soup.find_all('td', class_=re.compile(r'rate'))
    for rate in rate_elements:
        rate_text = rate.get_text().strip()
        rates.append(rate_text)


    time.sleep(0.1)  


grouped_rates = [rates[i:i + 10] for i in range(0, len(rates), 10)]

In [4]:
print('30 Page Macbook Comment:')
for matched_text, rate in zip(macbook_texts, grouped_rates):
    print(f'Computer : {matched_text}, Rate: {rate}')

30 Page Macbook Comment:
Computer : MacBook Pro, Rate: ['5', '4', '5', '���]��', '5', '3', '2', '4', '5', '5']
Computer : MacBook Pro, Rate: ['4', '5', '5', '5', '3', '4', '5', '5', '5', '3']
Computer : MacBook Pro, Rate: ['5', '5', '5', '5', '5', '5', '4', '5', '5', '5']
Computer : MacBook Pro, Rate: ['4', '5', '4', '4', '1', '3', '4', '4', '5', '4']
Computer : MacBook Pro, Rate: ['5', '5', '5', '4', '1', '5', '3', '5', '4', '5']
Computer : MacBook Pro, Rate: ['4', '4', '5', '5', '2', '4', '4', '���]��', '5', '3']
Computer : MacBook Pro, Rate: ['4', '5', '5', '5', '3', '5', '4', '5', '5', '4']
Computer : MacBook Pro, Rate: ['5', '5', '5', '5', '3', '5', '5', '5', '5', '5']
Computer : MacBook Air, Rate: ['4', '5', '5', '5', '3', '5', '5', '5', '5', '4']
Computer : MacBook Air, Rate: ['5', '5', '5', '5', '4', '5', '3', '4', '5', '4']
Computer : MacBook Pro, Rate: ['4', '5', '5', '4', '1', '3', '3', '5', '5', '2']
Computer : MacBook Pro, Rate: ['5', '5', '5', '4', '2', '5', '4', '5', '5'

In [11]:
macbook_data = list(zip(macbook_texts, grouped_rates))

## Scraping Window's Computer Review Data

In [1]:
origin_url_window = 'https://review.kakaku.com/review/K0000800909/?Page='
win_rates = []

In [3]:
for page_num in range(1,30):
    url_window = origin_url_window + str(page_num)
    response_window = requests.get(url_window)
    soup = BeautifulSoup(response_window.text, 'html.parser')

    win_elements = soup.find_all('td', class_=re.compile(r'rate'))

    for win_rate in win_elements:
        win_rate_text = win_rate.get_text().strip()
        win_rates.append(win_rate_text)

time.sleep(0.1)

win_group_rates = [win_rates[i:i + 7] for i in range(0,len(win_rates), 7)]

In [4]:
print('30 Pages Window Computer Rates')
for window in win_group_rates:
    print(f'Window Rates: {window}')

30 Pages Window Computer Rates
Window Rates: ['3', '3', '2', '4', '1', '\x96³\x95]\x89¿', '\x96³\x95]\x89¿']
Window Rates: ['5', '5', '4', '5', '3', '3', '3']
Window Rates: ['1', '1', '1', '1', '1', '1', '1']
Window Rates: ['2', '3', '2', '1', '2', '\x96³\x95]\x89¿', '\x96³\x95]\x89¿']
Window Rates: ['1', '1', '1', '1', '1', '1', '\x96³\x95]\x89¿']
Window Rates: ['1', '1', '3', '1', '1', '1', '\x96³\x95]\x89¿']
Window Rates: ['4', '4', '2', '4', '4', '\x96³\x95]\x89¿', '\x96³\x95]\x89¿']
Window Rates: ['4', '2', '3', '3', '\x96³\x95]\x89¿', '2', '\x96³\x95]\x89¿']
Window Rates: ['1', '3', '2', '1', '1', '2', '1']
Window Rates: ['4', '\x96³\x95]\x89¿', '\x96³\x95]\x89¿', '\x96³\x95]\x89¿', '\x96³\x95]\x89¿', '\x96³\x95]\x89¿', '\x96³\x95]\x89¿']
Window Rates: ['1', '1', '1', '1', '1', '1', '1']
Window Rates: ['1', '1', '1', '1', '1', '1', '1']
Window Rates: ['2', '3', '1', '3', '1', '2', '\x96³\x95]\x89¿']
Window Rates: ['3', '3', '2', '3', '3', '2', '\x96³\x95]\x89¿']
Window Rates: ['1

In [5]:
window_data  = list(zip(win_group_rates))

## Save The Macbook's Review Data To Database

In [6]:
import sqlite3

In [12]:
conn = sqlite3.connect('macbooks.db')
cursor = conn.cursor()

cursor.execute('''
CREATE TABLE IF NOT EXISTS macbook_ratings (
    id INTEGER PRIMARY KEY,
    text TEXT,
    rate TEXT
)
''')

for text, rate_group in macbook_data:
    rate_str = ', '.join(rate_group)  
    cursor.execute('INSERT INTO macbook_ratings (text, rate) VALUES (?, ?)', (text, rate_str))

conn.commit()


cursor.execute('SELECT * FROM macbook_ratings')
macbook_ratings_data = cursor.fetchall()
with open('macbook_ratings.csv', 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['ID', 'Text', 'Rate'])
    writer.writerows(macbook_ratings_data)

conn.close()

## Create The Macbook's Review Data CSV File

In [15]:
import pandas as pd
macbook_path = "D:\github\DS_Pro\macbook_ratings.csv"
df = pd.read_csv(macbook_path)

df 

Unnamed: 0,ID,Text,Rate
0,1,MacBook Pro,"5, 4, 5, ���]��, 5, 3, 2, 4, 5, 5"
1,2,MacBook Pro,"4, 5, 5, 5, 3, 4, 5, 5, 5, 3"
2,3,MacBook Pro,"5, 5, 5, 5, 5, 5, 4, 5, 5, 5"
3,4,MacBook Pro,"4, 5, 4, 4, 1, 3, 4, 4, 5, 4"
4,5,MacBook Pro,"5, 5, 5, 4, 1, 5, 3, 5, 4, 5"
...,...,...,...
430,431,MacBook Air,"5, 5, 5, 5, 1, 4, 4, ���]��, 5, ���]��"
431,432,MacBook Pro,"5, 5, 5, 4, 1, 5, 3, 5, 5, ���]��"
432,433,MacBook Pro,"5, 5, 5, 5, 2, 4, 5, 5, 5, ���]��"
433,434,MacBook Air,"5, 5, 5, ���]��, 2, 5, 5, 5, 5, ���]��"


In [16]:
columns = ['Satisfaction', 'Design', 'Processing Speed', 'Graphic Performance', 
           'Expandability', 'Ease of Use', 'Portability', 'Battery', 'Screen', 'Cost Performance']

df_rates = df['Rate'].str.split(',', expand=True)
df_rates.columns = columns

df_rates = df_rates.applymap(lambda x: pd.to_numeric(x, errors='coerce'))

df_combined = pd.concat([df['ID'], df['Text'], df_rates], axis=1)

new_file_path = 'D:\github\DS_Pro\macbook_ratings_new.csv'
df_combined.to_csv(new_file_path, index=False)

print(new_file_path)

D:\github\DS_Pro\macbook_ratings_new.csv


## Save The Window's Computer Data To Database

In [8]:
import csv
conn = sqlite3.connect('windows_ratings.db')
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS window_ratings (
    id INTEGER PRIMARY KEY,
    rates TEXT
)
''')

for rates_tuple in window_data:
    
    rates_str = ', '.join(rates_tuple[0])
    cursor.execute('INSERT INTO window_ratings (rates) VALUES (?)', (rates_str,))
conn.commit()


cursor.execute('SELECT * FROM window_ratings')
window_ratings_data = cursor.fetchall()


with open('window_ratings.csv', 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['ID', 'Rates'])
    writer.writerows(window_ratings_data)


conn.close()

## Create The Window's Computer Review CSV File

In [14]:
import pandas as pd

window_path = "D:\github\DS_Pro\window_ratings.csv"
data = pd.read_csv(window_path)

data

Unnamed: 0,ID,Rates
0,1,3
1,2,3
2,3,2
3,4,4
4,5,1
...,...,...
870,871,"1, 1, 1, 1, 1, 1, 1"
871,872,"1, 1, 1, 1, 1, 1, 1"
872,873,"2, 3, 1, 3, 1, 2, ³]¿"
873,874,"3, 3, 2, 3, 3, 2, ³]¿"


In [15]:
new_columns = ["Satisfaction", "Functionality", "Ease of Use", "Stability", "Agility", "Support", "Manual"]

for i, col in enumerate(new_columns):
    data[col] = data['Rates'].astype(str).str[i]

data[new_columns] = data[new_columns].apply(pd.to_numeric, errors='coerce')

data = data.drop(columns=['Rates'])

new_file_path = 'window_ratings_new.csv'  
data.to_csv(new_file_path, index=False)

In [16]:
data

Unnamed: 0,ID,Satisfaction,Functionality,Ease of Use,Stability,Agility,Support,Manual
0,1,3.0,,,,,,
1,2,3.0,,,,,,
2,3,2.0,,,,,,
3,4,4.0,,,,,,
4,5,1.0,,,,,,
...,...,...,...,...,...,...,...,...
870,871,1.0,,,1.0,,,1.0
871,872,1.0,,,1.0,,,1.0
872,873,2.0,,,3.0,,,1.0
873,874,3.0,,,3.0,,,2.0
