In [39]:
# Load the required libraries
library(rvest)
library(tidyverse)

In [40]:
# URL of the webpage
url <- "https://elbilista.se/"

# Read the HTML content from the webpage
webpage <- read_html(url)

# Extract car information
car_items <- html_nodes(webpage, "div.ev_cars__list-item")

# Create an empty list to store data frames
car_data_list <- list()

# Loop through each car item and extract the information
for (car_item in car_items) {

  brand <- html_text(html_nodes(car_item, "span.ev_cars__car-brand"))
  car_name <- html_text(html_nodes(car_item, "h3.ev_cars__car-name"))
  range <- html_text(html_nodes(car_item, "span.range"))
  acceleration <- html_text(html_nodes(car_item, "span.acceleration"))
  battery_capacity <- html_text(html_nodes(car_item, "span.battery-capacity"))
  top_speed <- html_text(html_nodes(car_item, "span.top-speed"))

  # Create a data frame with the extracted information
  car_data_list[[length(car_data_list) + 1]] <- data.frame(
    Brand = brand,
    CarName = car_name,
    Range = range,
    Acceleration = acceleration,
    BatteryCapacity = battery_capacity,
    TopSpeed = top_speed
  )
}

# Combine the list of data frames into a single data frame
scraped_data <- do.call(rbind, car_data_list)


In [None]:
car_data <- read.csv('car_data.csv')

In [42]:
# Print the dataframe with the scraped data

options(width = 180)

print(head(car_data))

       Brand                                                               CarName Range Acceleration BatteryCapacity TopSpeed
1 Volkswagen             \n                  Volkswagen ID.7 Pro S                   700            7              91      180
2      Volvo \n                  Volvo EX30 Twin Motor Performance                   460          3.6              69      180
3   Maserati      \n                  Maserati GranTurismo Folgore                   425          2.7            92.5      320
4   Polestar  \n                  Polestar 4 Long Range Dual Motor                   560          3.8             102      200
5       Jeep             \n                  Jeep Avenger Electric                   404            9              54      150
6      Smart                    \n                  Smart #1 Pulse                   400          3.9              66      180


In [43]:
#Check for missing values or duplicated values

sum(is.na(car_data))

sum(duplicated(car_data))

In [44]:
# Clean the CarName column by removing spaces and \n
# Clean the columns Range, Acceleration BatteryCapacity and TopSpeed from (km, s, kWh and km/h)

clean_car_data <- car_data %>%
  mutate(
          CarName = trimws(CarName),
          Range = gsub('km', '', Range),
          Acceleration = gsub('s', '', Acceleration),
          BatteryCapacity = gsub('kWh', '', BatteryCapacity),
          TopSpeed = gsub('km/h', '', TopSpeed)
        )

head(clean_car_data)

Unnamed: 0_level_0,Brand,CarName,Range,Acceleration,BatteryCapacity,TopSpeed
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,Volkswagen,Volkswagen ID.7 Pro S,700,7.0,91.0,180
2,Volvo,Volvo EX30 Twin Motor Performance,460,3.6,69.0,180
3,Maserati,Maserati GranTurismo Folgore,425,2.7,92.5,320
4,Polestar,Polestar 4 Long Range Dual Motor,560,3.8,102.0,200
5,Jeep,Jeep Avenger Electric,404,9.0,54.0,150
6,Smart,Smart #1 Pulse,400,3.9,66.0,180


In [None]:
#Clean the CarName column by removing Brand

clean_car_data <- clean_car_data %>%
  mutate(CarName = str_replace(CarName, Brand, ""))

head(clean_car_data)

In [None]:
# Change the columns Range, Acceleration, BatteryCapacity and TopSpeed to Numeric

clean_car_data <- clean_car_data %>%
  mutate_at(vars(Range, Acceleration, BatteryCapacity, TopSpeed), as.numeric)

head(clean_car_data)