In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [19]:
DEGREE_COLUMNS = [
    'url',
    'name',
    'university',
    'university_url',
    'modality',
    'price',
    'address',
    'lat',
    'lon',
    'credits',
    'years'
]

In [3]:
LISTA_GRADOS_URL = 'https://www.gradomania.com/grados-ingenieria-en-informatica-5048_xgrd1_q01.html'

In [4]:
PATH= 'https://www.gradomania.com/'

In [5]:
DEGREE_LIST = []
DEGREE_ERRORS = []

In [6]:
def get_location(address):
    api_key = ''
    location = requests.get('https://geocoder.ls.hereapi.com/6.2/geocode.json?apiKey={0}&searchtext={1}'.format(api_key, address))
    
    lat = 0
    lon = 0
    
    if location.status_code == 200:
        data = location.json()

        try:
            lat = data['Response']['View'][0]['Result'][0]['Location']['DisplayPosition']['Latitude']
            lon = data['Response']['View'][0]['Result'][0]['Location']['DisplayPosition']['Longitude']
        
        except:
            lat = 0
            lon = 0

    return [lat, lon]

In [7]:
def get_duration(value):
    years = 0
    credits = 0

    if value is not None and len(value) != 0 and value[0]:
        value = value[0].getText(strip=True).lower()

        if 'ects' in value:
            credits = int(value.replace(' ects', '').strip())
            years = credits / 60
        elif 'créditos' in value:
            credits = int(value.replace(' créditos', '').strip())
            years = credits / 60
        elif 'años' in value:
            years = int(value.replace(' años', '').strip())
            credits = 60 * years
    
    return {
        'years': years,
        'credits': credits
    }

In [8]:
def get_text(value, info, url):
    text = ''

    try:
        text = value[0].getText(strip=True) if len(value) else ''
    except:
        DEGREE_ERRORS.append([value, info, url])
    
    return text

In [9]:
def get_degree(url):
    page = requests.get(url, verify=False)
    soup = BeautifulSoup(page.content, 'html.parser')

    name = get_text(soup.select('.valor.titulacion'), 'name', url)
    university = get_text(soup.select('.valor.imparte'), 'university', url)
    university_url = soup.select('.valor.imparte a')[0].get('href')
    modality = get_text(soup.select('.valor.modalidad'), 'modality', url)
    price = get_text(soup.select('.valor.precio'), 'price', url)
    address = get_text(soup.select('.valor.lugar'), 'address', url)
    
    location = get_location(address)

    duration = get_duration(soup.select('.valor.duracion'))

    degree_data = {
        'url': url,
        'name': name,
        'university': university,
        'university_url': university_url,
        'modality': modality,
        'price': price,
        'address': address,
        'lat': location[0],
        'lon': location[1],
        'credits': duration['credits'],
        'years': duration['years']
    }

    DEGREE_LIST.append(degree_data)

In [10]:
def get_degrees(url):
    page = requests.get(url, verify=False)
    soup = BeautifulSoup(page.content, 'html.parser')
    
    degrees = soup.select('.ficha_evento_texto > article > a:last-of-type')

    for degree in degrees:
        degree_url = degree.get('href')
        get_degree(degree_url)

In [11]:
def get_degree_pages(url):
    page = requests.get(url, verify=False)
    soup = BeautifulSoup(page.content, 'html.parser')
    
    lista = soup.select('.paginador_lista ul > li > a')
    
    for elem in lista:
        page_url = elem.get('href')
        get_degrees(PATH + page_url)

In [12]:
get_degree_pages(LISTA_GRADOS_URL)

















In [20]:
degrees_df = pd.DataFrame(DEGREE_LIST, columns=DEGREE_COLUMNS)

In [22]:
degrees_df.head(30)

Unnamed: 0,url,name,university,university_url,modality,price,address,lat,lon,credits,years
0,https://www.gradomania.com/grado-en-ingenieria...,Grado en Ingeniería de Sistemas,Estudios Oficiales - Universidad Internacional...,https://www.gradomania.com/estudios-oficiales-...,Online,4.200 €/año,Se imparte Online,0.0,0.0,240,4.0
1,https://www.gradomania.com/grado-en-ingenieria...,Título Oficial de Grado en Ingeniería Informática,Escuela Superior de Tecnología y Ciencias Expe...,https://www.gradomania.com/escuela-superior-de...,Presencial,Consultar rellenando el formulario,Castellón / Castelló 12071España,39.9619,-0.05131,240,4.0
2,https://www.gradomania.com/grado-en-ingenieria...,Grado en Ingeniería Informática,Escuela de Ingeniería - UNIR - Universidad Int...,https://www.gradomania.com/escuela-de-ingenier...,Online,€105 / ECTS,Se imparte Online,0.0,0.0,240,4.0
3,https://www.gradomania.com/grado-en-ingenieria...,Graduado o Graduada en Ingeniería Informática,Facultad de Ciencias y Tecnología - Universida...,https://www.gradomania.com/facultad-de-ciencia...,Online,"Descuentos especiales de hasta un 40% dto, Bec...",Se imparte Online,0.0,0.0,240,4.0
4,https://www.gradomania.com/grado-en-ingenieria...,Grado en Ingeniería Informática. Título oficia...,Escuela Universitaria Politécnica - Universida...,https://www.gradomania.com/escuela-universitar...,Presencial,4200 € / curso,MurciaEspaña,0.0,0.0,0,0.0
5,https://www.gradomania.com/grado-en-ingenieria...,Grado en Ingeniería Informática\r\n* Titulació...,Escuela Politécnica Superior y Escuela de Arqu...,https://www.gradomania.com/escuela-politecnica...,Presencial,Información no disponible,Campus de la Dehesa de la VillaMadridEspaña,0.0,0.0,240,4.0
6,https://www.gradomania.com/grado-en-ingenieria...,Título Oficial de Grado en Ingeniería Informát...,UDIMA Estudios Oficiales - Universidad a Dista...,https://www.gradomania.com/udima-estudios-ofic...,Distancia,Precio crédito = 77 €,Se imparte a Distancia,0.0,0.0,240,4.0
7,https://www.gradomania.com/doble-titulacion-in...,Grado Universitario de La Salle - Universidad ...,La Salle - Ramón Llull - La Salle Campus Barce...,https://www.gradomania.com/la-salle-ramon-llul...,Presencial,Consultar rellenando el formulario,Edificio LluçanésBarcelona 08022España,0.0,0.0,329,5.483333
8,https://www.gradomania.com/doble-grado-en-inge...,Doble Grado en Ingeniería Informática + Diseño...,Escuela de Arquitectura y Tecnología - Univers...,https://www.gradomania.com/escuela-de-arquitec...,Presencial,Consultar rellenando el formulario,Autovía A-23 Zaragoza-Huesca Km. 299Villanueva...,41.77112,-0.82425,333,5.55
9,https://www.gradomania.com/doble-titulacion-de...,Grado Universitario de La Salle - Universidad ...,La Salle - Ramón Llull - La Salle Campus Barce...,https://www.gradomania.com/la-salle-ramon-llul...,Presencial,Consultar rellenando el formulario,Edificio LluçanésBarcelona 08022España,0.0,0.0,391,6.516667


In [15]:
degrees_df.to_csv('{}.csv'.format('grados_informatica'), index=False)

In [16]:
DEGREE_ERRORS

[]