In [1]:
#Seba
import pandas as pd
from ckanapi import RemoteCKAN

def scrape_ckan_data(resource_id, limit):
    # URL del CKAN
    ckan_url = 'https://open.canada.ca/data/en/'

    # Inicializar el objeto RemoteCKAN
    rc = RemoteCKAN(ckan_url)

    # Construir la consulta para recuperar datos específicos utilizando algunos campos del diccionario de datos
    query = {
        "resource_id": resource_id,
        "fields": [
            "Model year",
            "Make",
            "Model",
            "Vehicle class",
            "Motor (kW)",
            "Transmission",
            "Fuel type",
            "City (kWh/100 km)",
            "Highway (kWh/100 km)",
            "Combined (kWh/100 km)",
            "City (Le/100 km)",
            "Highway (Le/100 km)",
            "Combined (Le/100 km)",
            "Range (km)",
            "CO2 emissions (g/km)",
            "CO2 rating",
            "Smog rating",
            "Recharge time (h)"
        ],
        "limit": limit
    }

    # Realizar la consulta
    result = rc.action.datastore_search(**query)

    # Obtener los registros
    records = result['records']

    # Convertir los registros a un DataFrame de pandas
    df = pd.DataFrame(records)
    
    return df

# Llamar a la función para cada recurso y obtener los datos
df_electric = scrape_ckan_data("026e45b4-eb63-451f-b34f-d9308ea3a3d9", 668)  # Battery-electric vehicles (2012-2024)

In [2]:
df_electric

Unnamed: 0,Model year,Make,Model,Vehicle class,Motor (kW),Transmission,Fuel type,City (kWh/100 km),Highway (kWh/100 km),Combined (kWh/100 km),City (Le/100 km),Highway (Le/100 km),Combined (Le/100 km),Range (km),CO2 emissions (g/km),CO2 rating,Smog rating,Recharge time (h)
0,2012,Mitsubishi,i-MiEV,Subcompact,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,100,0,,,7
1,2012,Nissan,LEAF,Mid-size,80,A1,B,19.3,23.0,21.1,2.2,2.6,2.4,117,0,,,7
2,2013,Ford,Focus Electric,Compact,107,A1,B,19.0,21.1,20.0,2.1,2.4,2.2,122,0,,,4
3,2013,Mitsubishi,i-MiEV,Subcompact,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,100,0,,,7
4,2013,Nissan,LEAF,Mid-size,80,A1,B,19.3,23.0,21.1,2.2,2.6,2.4,117,0,,,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
662,2024,Volkswagen,ID.4 AWD Pro S,Sport utility vehicle: Small,250,A1,B,19.4,21.9,20.5,2.2,2.5,2.3,423,0,10,10,8
663,2024,Volvo,C40 Recharge,Sport utility vehicle: Small,185,A1,B,17.8,21.8,19.6,2.0,2.4,2.2,478,0,10,10,8
664,2024,Volvo,C40 Recharge Twin,Sport utility vehicle: Small,300,A1,B,19.8,23.0,21.2,2.2,2.6,2.4,414,0,10,10,8
665,2024,Volvo,XC40 Recharge,Sport utility vehicle: Small,185,A1,B,17.8,22.0,19.7,2.0,2.5,2.2,472,0,10,10,8


In [3]:
df_electric['Range (km)'].unique()

array(['100', '117', '122', '109', '224', '335', '426', '131', '135',
       '130', '149', '377', '386', '435', '407', '172', '338', '401',
       '351', '417', '473', '507', '322', '383', '414', '402', '465',
       '183', '185', '200', '150', '539', '475', '201', '179', '242',
       '92', '93', '418', '499', '329', '246', '415', '385', '243', '363',
       '349', '151', '425', '459', '595', '555', '523', '410', '491',
       '274', '248', '177', '240', '327', '323', '309', '531', '518',
       '489', '481', '462', '600', '629', '647', '560', '525', '528',
       '565', '438', '509', '468', '198', '357', '370', '340', '483',
       '375', '346', '320', '365', '341', '328', '325', '423', '568',
       '652', '623', '538', '637', '597', '549', '393', '488', '400',
       '298', '373', '388', '484', '454', '521', '397', '515', '360',
       '446', '505', '502', '354', '441', '758', '726', '837', '774',
       '830', '755', '161', '547', '396', '146', '576', '536', '500',
       '449', '

In [4]:
df_electric = df_electric.drop(['Transmission', 'Fuel type', 'Smog rating', 'Highway (kWh/100 km)', 'CO2 rating', 'Combined (kWh/100 km)', 'Highway (Le/100 km)', 'Combined (Le/100 km)'], axis=1)

In [5]:
df_electric['Recharge time (h)'] = df_electric['Recharge time (h)'].astype('float')
df_electric = df_electric.loc[df_electric['Recharge time (h)'] < 10]

In [6]:
df_electric['Range (km)'] = df_electric['Range (km)'].astype('int32')
df_electric = df_electric.loc[df_electric['Range (km)'] >= 250]

In [7]:
df_electric['City (kWh/100 km)'].unique()

array(['16.4', '16.2', '17.0', '14.9', '18.5', '20.0', '14.5', '16.8',
       '14.1', '19.9', '21.0', '16.9', '21.9', '26.6', '13.9', '15.0',
       '20.2', '21.5', '24.5', '16.0', '28.7', '19.0', '21.1', '15.5',
       '18.0', '22.4', '15.2', '16.3', '18.7', '22.2', '22.8', '18.2',
       '19.6', '25.1', '24.7', '24.8', '21.6', '20.3', '21.7', '21.3',
       '18.9', '13.7', '18.8', '17.5', '28.1', '21.4', '24.2', '25.4',
       '25.0', '23.0', '25.5', '16.6', '20.1', '17.2', '19.1', '20.9',
       '19.5', '26.4', '23.3', '23.5', '24.0', '24.3', '22.1', '21.8',
       '17.9', '20.7', '19.7', '22.0', '20.6', '23.6', '27.3', '20.5',
       '23.1', '17.4', '19.3', '14.7', '17.1', '19.4', '17.8', '19.8'],
      dtype=object)

In [8]:
df_electric['City (kWh/100 km)'] = df_electric['City (kWh/100 km)'].astype('float')
df_electric = df_electric.loc[df_electric['City (kWh/100 km)'] <= 20.0]

In [9]:
df_electric.drop([], inplace=True)

In [10]:
df_electric.drop([], inplace=True)

In [11]:
df_electric

Unnamed: 0,Model year,Make,Model,Vehicle class,Motor (kW),City (kWh/100 km),City (Le/100 km),Range (km),CO2 emissions (g/km),Recharge time (h)
63,2017,Chevrolet,Bolt EV,Station wagon: Small,150,16.4,1.8,383,0,9.3
86,2018,Chevrolet,Bolt EV,Station wagon: Small,150,16.4,1.8,383,0,9.3
108,2019,Chevrolet,Bolt EV,Station wagon: Small,150,16.4,1.8,383,0,9.3
110,2019,Hyundai,Kona Electric,Sport utility vehicle: Small,150,16.2,1.8,415,0,9.0
112,2019,Kia,Niro EV,Station wagon: Small,150,17.0,1.9,385,0,9.5
...,...,...,...,...,...,...,...,...,...,...
662,2024,Volkswagen,ID.4 AWD Pro S,Sport utility vehicle: Small,250,19.4,2.2,423,0,8.0
663,2024,Volvo,C40 Recharge,Sport utility vehicle: Small,185,17.8,2.0,478,0,8.0
664,2024,Volvo,C40 Recharge Twin,Sport utility vehicle: Small,300,19.8,2.2,414,0,8.0
665,2024,Volvo,XC40 Recharge,Sport utility vehicle: Small,185,17.8,2.0,472,0,8.0
