In [8]:
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [9]:
offers_connection = sqlite3.connect('../../datasets/offers_dataset.db')
offers_frame = pd.read_sql_query('''SELECT * FROM offers''', offers_connection)
offers_connection.close()

In [10]:
offers_frame.dropna(subset='Name',inplace=True)
offers_frame.drop_duplicates(inplace=True)

In [11]:
LOCATION_TYPES = {"Remote" : "Remote", "On Site" : "OnSite"}
locations_present = offers_frame["Location"].notna()
types = []

for is_present in locations_present:
    if is_present:
        types.append(LOCATION_TYPES["On Site"])
    else:
        types.append(LOCATION_TYPES["Remote"])

offers_frame.loc[:, "LocationType"] = types
offers_frame

Unnamed: 0,Name,Description,Location,LocationType
0,Software Developer,Miniclip is a global leader in digital games w...,"Genova, Liguria",OnSite
1,Junior Software Developer,"NETtoWORK, azienda italiana nata nel 2016, ope...",17100 Savona,OnSite
2,Software Developer,We are looking for talented and passionate peo...,55100 Lucca,OnSite
3,Software Developer,ARESYS is a R&D oriented company with nearly ...,"Matera, Basilicata",OnSite
4,Senior Software Developer,Il/la Candidato/a dovrà padroneggiare: \n \n- ...,"Catania, Sicilia",OnSite
...,...,...,...,...
294,Machine Learning / Java Developer,Descripción¡En Qindel Group estamos creciendo!...,"A Coruña, A Coruña provincia",OnSite
295,Investigador predoctoral en Machine Learning,Función: - Entrenamiento de modelos de Deep Le...,"Madrid, Madrid provincia",OnSite
296,"Manager, Machine Learning Engineering",Affirm is reinventing credit to make it more h...,"Barcelona, Barcelona provincia",OnSite
297,Machine Learning Ops/Engineer,Attendance to the office in a hybrid work mode...,"28033 Madrid, Madrid provincia",OnSite


### Location Latitude and Longitude

In [12]:
from functools import lru_cache
from math import radians, cos, sin, asin, sqrt
import requests 
from requests import Response

#  API-KEY da rimuovere
PLACES_API = '''https://maps.googleapis.com/maps/api/place/textsearch/json?query={}&key=AIzaSyBg32OrPVN2Qi1q6hJq16EagNSiwW4O6ys&language=it'''  

In [13]:
@lru_cache()
def get_coordinates(location: str) -> dict:
    """
    Make a request to Google Places API to get Longitude and Latitude for a location string.
    """
    response: Response = requests.get(PLACES_API.format(location))
    return response.json()['results'][0]['geometry']['location']

def haversine(lon1, lat1, lon2, lat2):
    """
    Prompt-Engineering basatissimo.
    Calcola la distanza fra due posti utilizzando le coordinate e tenendo presente
    cose che onestamente non mi sono molto chiare
    """
    # Convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # Haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles. Determines return value units.
    return c * r

def location_distance(off1_id, off2_id) -> float:
    loc1 = offers_frame.loc[off1_id, 'Location']
    loc2 = offers_frame.loc[off2_id, 'Location']
    
    lat1, lon1 = get_coordinates(loc1).values()
    lat2, lon2 = get_coordinates(loc2).values()
    
    return haversine(lon1, lat1, lon2, lat2)

In [14]:
print(f'Distance between {offers_frame.loc[1, "Location"]} and {offers_frame.loc[14, "Location"]}: {location_distance(1, 14):.2f}km')

Distance between 17100 Savona and Catanzaro, Calabria: 907.33km


In [15]:
# Compute Latitude and Longitude for each Offer (saving/cache should be done)
offers_frame['Latitude'] = 0.0
offers_frame['Longitude'] = 0.0
for index, row in offers_frame.iterrows():
    try:
        lat, lon = get_coordinates(row['Location']).values()
        offers_frame.loc[index, 'Latitude'] = lat
        offers_frame.loc[index, 'Longitude'] = lon
    except Exception:
        pass
    
offers_frame

Unnamed: 0,Name,Description,Location,LocationType,Latitude,Longitude
0,Software Developer,Miniclip is a global leader in digital games w...,"Genova, Liguria",OnSite,44.405650,8.946256
1,Junior Software Developer,"NETtoWORK, azienda italiana nata nel 2016, ope...",17100 Savona,OnSite,44.342550,8.429389
2,Software Developer,We are looking for talented and passionate peo...,55100 Lucca,OnSite,43.842920,10.502698
3,Software Developer,ARESYS is a R&D oriented company with nearly ...,"Matera, Basilicata",OnSite,40.666379,16.604320
4,Senior Software Developer,Il/la Candidato/a dovrà padroneggiare: \n \n- ...,"Catania, Sicilia",OnSite,37.507877,15.083030
...,...,...,...,...,...,...
294,Machine Learning / Java Developer,Descripción¡En Qindel Group estamos creciendo!...,"A Coruña, A Coruña provincia",OnSite,43.362344,-8.411540
295,Investigador predoctoral en Machine Learning,Función: - Entrenamiento de modelos de Deep Le...,"Madrid, Madrid provincia",OnSite,40.416775,-3.703790
296,"Manager, Machine Learning Engineering",Affirm is reinventing credit to make it more h...,"Barcelona, Barcelona provincia",OnSite,41.387397,2.168568
297,Machine Learning Ops/Engineer,Attendance to the office in a hybrid work mode...,"28033 Madrid, Madrid provincia",OnSite,40.472915,-3.654719


In [16]:
import folium
EUROPE = [51.5074, -0.1278]

offers_map = folium.Map(location=EUROPE, zoom_start=4)

for index, row in offers_frame.iterrows():
    tooltip_text = f'<b>{row["Name"]}</b>'
    # popup_text = f'{row["RequiredSkills"]}'
    folium.Marker(
        [row['Latitude'], row['Longitude']], 
        tooltip=row['Location'],
        popup=tooltip_text
    ).add_to(offers_map)

offers_map # F5 Network Engineer ha latitudine e longitudine 0, 0 (...)

### Recommend by Location

In [17]:
def get_near_offers(location: str, max_distance: float = 500):
    """
    Gets a location string as input and returns pandas Indexes used to filter a Dataframe:
    1. calls get_coordinates to get latitude and longitude;
    2. computes distance from query location and an offer;
    3. sort distances and filter by max_distance.
    """
    user_lat, user_lon = get_coordinates(location).values()
    distances: pd.Series = offers_frame.apply(
        lambda offer: haversine(
            user_lon, user_lat, 
            offer['Longitude'], offer['Latitude']
        )
        , axis=1
    ).rename('Distance').sort_values()
    return list(distances[distances<max_distance].index)

**Esempio: Offerte vicino Milano**

In [18]:
example_location = 'Milano'
near_ids = get_near_offers(example_location, 400)

near_offers = offers_frame.loc[near_ids]
near_offers

Unnamed: 0,Name,Description,Location,LocationType,Latitude,Longitude
201,Python Developer/DevOps Engineer,rev.ng Labs è un'azienda che sta sviluppando u...,"Milano, Lombardia",OnSite,45.464204,9.189982
18,Java Developer,Siamo alla ricerca di un/a Java Developer ch...,"Milano, Lombardia",OnSite,45.464204,9.189982
176,Database Administrator | SQL,Who are we?Amaris Consulting is an independen...,"Milano, Lombardia",OnSite,45.464204,9.189982
25,Flutter Developer,We want to significantly strengthen the role o...,"Milano, Lombardia",OnSite,45.464204,9.189982
125,FrontEnd Javascript,Luogo di lavoroMilanoInizioImmediato – Durata:...,"Milano, Lombardia",OnSite,45.464204,9.189982
...,...,...,...,...,...,...
228,Software Engineer,Short DescriptionContexte : \n La Digital Tea...,13857 Aix-en-Provence,OnSite,43.529742,5.447427
223,BUSINESS DEVELOPER H/F BTOC - BTOB,Wall Street English est un groupe internation...,42000 Saint-Étienne,OnSite,45.450626,4.385943
5,SOFTWARE DEVELOPER,La passione ci guida in tutto ciò che facciamo...,60030 Monsano,OnSite,43.562682,13.248231
17,Programmatore senior Java - settore sanità,Per ampliare il team di sviluppo della nostra ...,60131 Ancona,OnSite,43.554373,13.462737


In [19]:
query_map = folium.Map(location=EUROPE, zoom_start=4)

for index, row in near_offers.iterrows():
    tooltip_text = f'<b>{row["Name"]}</b>'
    folium.Marker(
        [row['Latitude'], row['Longitude']], 
        tooltip=row['Location'],
        popup=tooltip_text
    ).add_to(query_map)

query_map

**Esempio: Offerte vicino Londra**

In [20]:
example_location = 'Londra'
near_ids = get_near_offers(example_location, 300)

near_offers = offers_frame.loc[near_ids]
near_offers

Unnamed: 0,Name,Description,Location,LocationType,Latitude,Longitude
34,UK Software Engineering Apprenticeship,Who We AreWe’re a global technology communicat...,London,OnSite,51.507218,-0.127586
159,Senior Backend Developer,About the roleWe’re hiring for two Senior Back...,London,OnSite,51.507218,-0.127586
158,Developer - GOV.UK Notify,"DetailsReference number335072Salary£53,400 - £...",London,OnSite,51.507218,-0.127586
154,Backend Product Engineer,Let’s start with the important stuff… Why woul...,London,OnSite,51.507218,-0.127586
148,Front End Developer Intern,We are seeking a Front End Developer Intern to...,London,OnSite,51.507218,-0.127586
95,Network Site Reliability Engineer,Location:LondonIndustry:Network EngineeringSal...,London,OnSite,51.507218,-0.127586
86,Network Production Engineer,This will be your network and it is constantly...,London,OnSite,51.507218,-0.127586
47,"System Software Engineer, Summer Intern - 2024...",ResponsibilitiesTikTok is the leading destinat...,London,OnSite,51.507218,-0.127586
55,Software Engineer (University Grad),Want to build new features and improve existin...,London,OnSite,51.507218,-0.127586
58,Technology Cyber & Security Analyst Foundation...,"Annual Salary: £22,250 + benefitsOur Foundatio...",London E14,OnSite,51.507218,-0.127586


In [21]:
query_map = folium.Map(location=EUROPE, zoom_start=4)

for index, row in near_offers.iterrows():
    tooltip_text = f'<b>{row["Name"]}</b>'
    folium.Marker(
        [row['Latitude'], row['Longitude']], 
        tooltip=row['Location'],
        popup=tooltip_text
    ).add_to(query_map)

query_map

In [25]:
example_location = 'Madrid'
near_ids = get_near_offers(example_location, 600)

near_offers = offers_frame.loc[near_ids]
near_offers

Unnamed: 0,Name,Description,Location,LocationType,Latitude,Longitude
295,Investigador predoctoral en Machine Learning,Función: - Entrenamiento de modelos de Deep Le...,"Madrid, Madrid provincia",OnSite,40.416775,-3.70379
291,Data Scientist- 100% remoto M/F,"Job description¡Desde AKKODIS, multinacional d...","Madrid, Madrid provincia",OnSite,40.416775,-3.70379
289,Data Science (Remoto),"Descripción:Desde Grupo Digital, buscamos perf...","Madrid, Madrid provincia",OnSite,40.416775,-3.70379
288,Junior Data Scientist,Descripción de la empresa¿Quienes somos? ‍‍ \...,"Madrid, Madrid provincia",OnSite,40.416775,-3.70379
280,Java Backend Developer (Remote in Spain),Java Backend Developer \n Location: SpainTh...,"Madrid, Madrid provincia",OnSite,40.416775,-3.70379
298,FULLREMOTE-Data Scientist (Time Series),-Country:España-Province:Comunidad de Madrid-D...,Comunidad de Madrid,OnSite,40.416751,-3.703832
262,Frontend Developer España,CloudAPPi es una empresa de innovación espec...,España,OnSite,40.463667,-3.74922
265,Frontend Developer (React) Freelance,En HAYS estamos colaborando con una compañía...,España,OnSite,40.463667,-3.74922
279,Java Springboot Developer (remote),INFORMACIÓNPuesto : Java Springboot DeveloperS...,España,OnSite,40.463667,-3.74922
297,Machine Learning Ops/Engineer,Attendance to the office in a hybrid work mode...,"28033 Madrid, Madrid provincia",OnSite,40.472915,-3.654719
