In [47]:
import bs4
import regex as re
import itertools
import requests
import numpy as np
import pandas as pd
from geopy.geocoders import Nominatim
import pmdarima 
from datetime import datetime
import json
import os

## EV Database

In [48]:
url = "https://ev-database.org/"
response = requests.get(url)
response.ok

True

In [49]:
soup = bs4.BeautifulSoup(response.text, "html.parser")
print(soup.prettify())

<!DOCTYPE html>
<html lang="en">
 <!-- head -->
 <head>
  <meta charset="utf-8"/>
  <link href="https://ev-database.org/" rel="canonical"/>
  <link href="https://ev-database.org/uk/" hreflang="en-gb" rel="alternate"/>
  <link href="https://ev-database.org/de/" hreflang="de-de" rel="alternate"/>
  <link href="https://ev-database.org/nl/" hreflang="nl-nl" rel="alternate"/>
  <link href="https://ev-database.org/" hreflang="x-default" rel="alternate"/>
  <meta content="width=device-width, initial-scale=1" name="viewport"/>
  <title>
   Compare electric vehicles - EV Database
  </title>
  <meta content="A complete overview of all electric vehicles in Europe. Search and compare by range, make, model and price." name="description"/>
  <meta content="EV Database" property="og:title">
   <meta content="A complete overview of all electric vehicles in Europe. Search and compare by range, make, model and price." property="og:description"/>
   <meta content="website" property="og:type"/>
   <meta c

In [50]:
spans = ['model', 'acceleration', 'topspeed', 'battery', 'erange_real', 'efficiency', 'fastcharge_speed', 'country_uk']
columns = ['make', 'model', 'acceleration', 'topspeed', 'battery', 'range', 'efficiency', 'fastcharge_speed', 'country_uk']

In [51]:
ev_df = pd.DataFrame()
for sp in spans:
    s = soup.find_all('span', attrs={'class': sp})
    ss = pd.DataFrame([i.text for i in s], columns = [sp])
    ev_df = pd.concat([ev_df, pd.DataFrame(ss)], axis=1)
ev_df

Unnamed: 0,model,acceleration,topspeed,battery,erange_real,efficiency,fastcharge_speed,country_uk
0,Model Y Long Range Dual Motor,5.0 sec,217 km/h,75,435 km,172 Wh/km,670,"£52,990"
1,Model 3,6.1 sec,225 km/h,57.5,405 km,142 Wh/km,680,"£42,990"
2,Model Y,6.9 sec,217 km/h,57.5,350 km,164 Wh/km,580,"£44,990"
3,Model 3 Long Range Dual Motor,4.4 sec,233 km/h,75,485 km,155 Wh/km,750,"£50,990"
4,ATTO 3,7.3 sec,160 km/h,60.5,325 km,186 Wh/km,360,"£36,490"
...,...,...,...,...,...,...,...,...
338,eVito Tourer Extra-Long 60 kWh,12.0 sec,160 km/h,60,205 km,293 Wh/km,300,
339,e-Expert Combi Long 50 kWh,13.1 sec,130 km/h,46.3,185 km,250 Wh/km,290,
340,VF 8 Plus Extended Range,5.5 sec,200 km/h,87.7,425 km,206 Wh/km,550,
341,EQV 250 Extra-Long,12.0 sec,160 km/h,60,210 km,286 Wh/km,200,


In [52]:
df = pd.DataFrame([i.text.strip().split(" ")[0] for i in soup.find_all('a', attrs={'class': 'title'})], columns = ['make'])
df

Unnamed: 0,make
0,Tesla
1,Tesla
2,Tesla
3,Tesla
4,BYD
...,...
338,Mercedes
339,Peugeot
340,VinFast
341,Mercedes


In [53]:
ev_df1 = pd.concat([df, ev_df], axis = 1)
ev_df1

Unnamed: 0,make,model,acceleration,topspeed,battery,erange_real,efficiency,fastcharge_speed,country_uk
0,Tesla,Model Y Long Range Dual Motor,5.0 sec,217 km/h,75,435 km,172 Wh/km,670,"£52,990"
1,Tesla,Model 3,6.1 sec,225 km/h,57.5,405 km,142 Wh/km,680,"£42,990"
2,Tesla,Model Y,6.9 sec,217 km/h,57.5,350 km,164 Wh/km,580,"£44,990"
3,Tesla,Model 3 Long Range Dual Motor,4.4 sec,233 km/h,75,485 km,155 Wh/km,750,"£50,990"
4,BYD,ATTO 3,7.3 sec,160 km/h,60.5,325 km,186 Wh/km,360,"£36,490"
...,...,...,...,...,...,...,...,...,...
338,Mercedes,eVito Tourer Extra-Long 60 kWh,12.0 sec,160 km/h,60,205 km,293 Wh/km,300,
339,Peugeot,e-Expert Combi Long 50 kWh,13.1 sec,130 km/h,46.3,185 km,250 Wh/km,290,
340,VinFast,VF 8 Plus Extended Range,5.5 sec,200 km/h,87.7,425 km,206 Wh/km,550,
341,Mercedes,EQV 250 Extra-Long,12.0 sec,160 km/h,60,210 km,286 Wh/km,200,


In [54]:
img_tags  = soup.find_all('img') #Find all
img_tags = img_tags[1:] # the first one is the website logo, so throwing it away
img_tags

[<img alt="Tesla Model Y Long Range Dual Motor" data-src="/img/auto/Tesla_Model_Y/Tesla_Model_Y-01-thumb.jpg" data-src-retina="/img/auto/Tesla_Model_Y/Tesla_Model_Y-01-thumb@2x.jpg" src="/img/common/ajax-loader.gif"/>,
 <img alt="Tesla Model 3 " data-src="/img/auto/Tesla_Model_3_2021/Tesla_Model_3_2021-01-thumb.jpg" data-src-retina="/img/auto/Tesla_Model_3_2021/Tesla_Model_3_2021-01-thumb@2x.jpg" src="/img/common/ajax-loader.gif"/>,
 <img alt="Tesla Model Y " data-src="/img/auto/Tesla_Model_Y/Tesla_Model_Y-01-thumb.jpg" data-src-retina="/img/auto/Tesla_Model_Y/Tesla_Model_Y-01-thumb@2x.jpg" src="/img/common/ajax-loader.gif"/>,
 <img alt="Tesla Model 3 Long Range Dual Motor" data-src="/img/auto/Tesla_Model_3_2021/Tesla_Model_3_2021-01-thumb.jpg" data-src-retina="/img/auto/Tesla_Model_3_2021/Tesla_Model_3_2021-01-thumb@2x.jpg" src="/img/common/ajax-loader.gif"/>,
 <img alt="BYD ATTO 3 " data-src="/img/auto/BYD_ATTO_3/BYD_ATTO_3-01-thumb.jpg" data-src-retina="/img/auto/BYD_ATTO_3/BYD_ATTO

In [55]:
# urls of vehicle images
img1_url = []
img2_url = []
for tag in img_tags:
    #img1 = url[:-1] + tag.attrs['data-src']
    img1 = url[:-1] + tag.attrs['data-src-retina']
    print(img1)
    img1_url.append(img1)
    #img2_url.append(img2)
    
#print(img1_url)
#print(img2_url)    

https://ev-database.org/img/auto/Tesla_Model_Y/Tesla_Model_Y-01-thumb@2x.jpg
https://ev-database.org/img/auto/Tesla_Model_3_2021/Tesla_Model_3_2021-01-thumb@2x.jpg
https://ev-database.org/img/auto/Tesla_Model_Y/Tesla_Model_Y-01-thumb@2x.jpg
https://ev-database.org/img/auto/Tesla_Model_3_2021/Tesla_Model_3_2021-01-thumb@2x.jpg
https://ev-database.org/img/auto/BYD_ATTO_3/BYD_ATTO_3-01-thumb@2x.jpg
https://ev-database.org/img/auto/MG_MG4_Electric_2022/MG_MG4_Electric_2022-01-thumb@2x.jpg
https://ev-database.org/img/auto/Tesla_Model_S_2021/Tesla_Model_S_2021-01-thumb@2x.jpg
https://ev-database.org/img/auto/Tesla_Model_Y/Tesla_Model_Y-01-thumb@2x.jpg
https://ev-database.org/img/auto/Kia-NiroEV-2022/Kia-NiroEV-2022-01-thumb@2x.jpg
https://ev-database.org/img/auto/BMW_i4_eDrive40/BMW_i4_eDrive40-01-thumb@2x.jpg
https://ev-database.org/img/auto/BMW_iX_2022/BMW_iX_2022-01-thumb@2x.jpg
https://ev-database.org/img/auto/BMW_iX1_2022/BMW_iX1_2022-01-thumb@2x.jpg
https://ev-database.org/img/auto/BMW

In [56]:
len(img1_url)

343

In [57]:
div_tags = soup.find_all('div', attrs={'class': 'img'})
div_tags

[<div class="img">
 <a href="/car/1619/Tesla-Model-Y-Long-Range-Dual-Motor"><img alt="Tesla Model Y Long Range Dual Motor" data-src="/img/auto/Tesla_Model_Y/Tesla_Model_Y-01-thumb.jpg" data-src-retina="/img/auto/Tesla_Model_Y/Tesla_Model_Y-01-thumb@2x.jpg" src="/img/common/ajax-loader.gif"/></a> </div>,
 <div class="img">
 <a href="/car/1555/Tesla-Model-3"><img alt="Tesla Model 3 " data-src="/img/auto/Tesla_Model_3_2021/Tesla_Model_3_2021-01-thumb.jpg" data-src-retina="/img/auto/Tesla_Model_3_2021/Tesla_Model_3_2021-01-thumb@2x.jpg" src="/img/common/ajax-loader.gif"/></a> </div>,
 <div class="img">
 <a href="/car/1743/Tesla-Model-Y"><img alt="Tesla Model Y " data-src="/img/auto/Tesla_Model_Y/Tesla_Model_Y-01-thumb.jpg" data-src-retina="/img/auto/Tesla_Model_Y/Tesla_Model_Y-01-thumb@2x.jpg" src="/img/common/ajax-loader.gif"/></a> </div>,
 <div class="img">
 <a href="/car/1591/Tesla-Model-3-Long-Range-Dual-Motor"><img alt="Tesla Model 3 Long Range Dual Motor" data-src="/img/auto/Tesla_Mo

In [58]:
# hrefs are links with detailed vehicle info
hrefs = []
for div_tag in div_tags:
    a_tags = div_tag.find_all('a')
    #print(a_tags)
    for a_tag in a_tags:
        href = url[:-1] + a_tag['href']
        print(href)
        hrefs.append(href)
print(hrefs)

https://ev-database.org/car/1619/Tesla-Model-Y-Long-Range-Dual-Motor
https://ev-database.org/car/1555/Tesla-Model-3
https://ev-database.org/car/1743/Tesla-Model-Y
https://ev-database.org/car/1591/Tesla-Model-3-Long-Range-Dual-Motor
https://ev-database.org/car/1782/BYD-ATTO-3
https://ev-database.org/car/1708/MG-MG4-Electric-64-kWh
https://ev-database.org/car/1405/Tesla-Model-S-Plaid
https://ev-database.org/car/1183/Tesla-Model-Y-Performance
https://ev-database.org/car/1666/Kia-Niro-EV
https://ev-database.org/car/1252/BMW-i4-eDrive40
https://ev-database.org/car/1472/BMW-iX-xDrive40
https://ev-database.org/car/1701/BMW-iX1-xDrive30
https://ev-database.org/car/1535/BMW-iX3
https://ev-database.org/car/1910/Volvo-EX30-Single-Motor-ER
https://ev-database.org/car/1747/Hongqi-E-HS9-99-kWh
https://ev-database.org/car/1519/BMW-i4-M50
https://ev-database.org/car/1153/Audi-e-tron-GT-RS
https://ev-database.org/car/1620/Tesla-Model-3-Performance
https://ev-database.org/car/1285/Fiat-500e-Hatchback-42

In [59]:
len(hrefs)

343

In [60]:
ev_df1['img1_url'] = img1_url
#ev_df1['img2_url'] = img2_url
ev_df1['hrefs'] = hrefs
ev_df1

Unnamed: 0,make,model,acceleration,topspeed,battery,erange_real,efficiency,fastcharge_speed,country_uk,img1_url,hrefs
0,Tesla,Model Y Long Range Dual Motor,5.0 sec,217 km/h,75,435 km,172 Wh/km,670,"£52,990",https://ev-database.org/img/auto/Tesla_Model_Y...,https://ev-database.org/car/1619/Tesla-Model-Y...
1,Tesla,Model 3,6.1 sec,225 km/h,57.5,405 km,142 Wh/km,680,"£42,990",https://ev-database.org/img/auto/Tesla_Model_3...,https://ev-database.org/car/1555/Tesla-Model-3
2,Tesla,Model Y,6.9 sec,217 km/h,57.5,350 km,164 Wh/km,580,"£44,990",https://ev-database.org/img/auto/Tesla_Model_Y...,https://ev-database.org/car/1743/Tesla-Model-Y
3,Tesla,Model 3 Long Range Dual Motor,4.4 sec,233 km/h,75,485 km,155 Wh/km,750,"£50,990",https://ev-database.org/img/auto/Tesla_Model_3...,https://ev-database.org/car/1591/Tesla-Model-3...
4,BYD,ATTO 3,7.3 sec,160 km/h,60.5,325 km,186 Wh/km,360,"£36,490",https://ev-database.org/img/auto/BYD_ATTO_3/BY...,https://ev-database.org/car/1782/BYD-ATTO-3
...,...,...,...,...,...,...,...,...,...,...,...
338,Mercedes,eVito Tourer Extra-Long 60 kWh,12.0 sec,160 km/h,60,205 km,293 Wh/km,300,,https://ev-database.org/img/auto/Mercedes_eVit...,https://ev-database.org/car/1618/Mercedes-eVit...
339,Peugeot,e-Expert Combi Long 50 kWh,13.1 sec,130 km/h,46.3,185 km,250 Wh/km,290,,https://ev-database.org/img/auto/Peugeot_e-Tra...,https://ev-database.org/car/1606/Peugeot-e-Exp...
340,VinFast,VF 8 Plus Extended Range,5.5 sec,200 km/h,87.7,425 km,206 Wh/km,550,,https://ev-database.org/img/auto/VinFast_VF_8/...,https://ev-database.org/car/1809/VinFast-VF-8-...
341,Mercedes,EQV 250 Extra-Long,12.0 sec,160 km/h,60,210 km,286 Wh/km,200,,https://ev-database.org/img/auto/Mercedes_EQV/...,https://ev-database.org/car/1543/Mercedes-EQV-...


In [61]:
current_directory = os.getcwd()
print(current_directory)
parent_directory = os.path.abspath(os.path.join(current_directory, ".."))
parent_directory

C:\Users\blusi\OneDrive\Documents\Learning\TDI\Capstone\codes


'C:\\Users\\blusi\\OneDrive\\Documents\\Learning\\TDI\\Capstone'

In [62]:
ev_df1.to_csv(parent_directory + '/data/electric_vehicles.csv', index = False)

In [63]:
json_data = ev_df1.to_json(orient='records')
json_data

'[{"make":"Tesla","model":"Model Y Long Range Dual Motor","acceleration":"5.0 sec","topspeed":"217 km\\/h","battery":"75","erange_real":"435 km","efficiency":"172 Wh\\/km","fastcharge_speed":"670","country_uk":"\\u00a352,990","img1_url":"https:\\/\\/ev-database.org\\/img\\/auto\\/Tesla_Model_Y\\/Tesla_Model_Y-01-thumb@2x.jpg","hrefs":"https:\\/\\/ev-database.org\\/car\\/1619\\/Tesla-Model-Y-Long-Range-Dual-Motor"},{"make":"Tesla","model":"Model 3 ","acceleration":"6.1 sec","topspeed":"225 km\\/h","battery":"57.5","erange_real":"405 km","efficiency":"142 Wh\\/km","fastcharge_speed":"680","country_uk":"\\u00a342,990","img1_url":"https:\\/\\/ev-database.org\\/img\\/auto\\/Tesla_Model_3_2021\\/Tesla_Model_3_2021-01-thumb@2x.jpg","hrefs":"https:\\/\\/ev-database.org\\/car\\/1555\\/Tesla-Model-3"},{"make":"Tesla","model":"Model Y ","acceleration":"6.9 sec","topspeed":"217 km\\/h","battery":"57.5","erange_real":"350 km","efficiency":"164 Wh\\/km","fastcharge_speed":"580","country_uk":"\\u00a3

In [64]:
with open(parent_directory + '/data/electric_vehicles.json', 'w') as file:
    # Write the JSON data to the file
    json.dump(json_data, file)

In [65]:
with open('aws_keys.json', 'r') as file:
    # Write the JSON data to the file
    keys = json.load(file)

In [66]:
import boto3
session = boto3.session.Session( 
    aws_access_key_id = keys["aws_access_key_id"], 
    aws_secret_access_key = keys["aws_secret_access_key"],
    region_name='us-east-1'
)

In [67]:
try:
    #Connect to S3
    s3 = session.client('s3',
        region_name='us-east-1'
    )
    bucket_name = "tdi-capstone-lb"      
    csv_data = ev_df1.to_csv(index=False)
    bytes_data = csv_data.encode()
    response = s3.put_object(Body=bytes_data, Bucket=bucket_name, Key="data/electric_vehicles.csv")

except (KeyError) as e:
    print(f"Unable to upload data to S3, got error {e}")
    raise e