In [150]:
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [151]:
offers_connection = sqlite3.connect('../../datasets/offers_dataset.db')
offers_frame = pd.read_sql_query('''SELECT * FROM offers''', offers_connection)
offers_connection.close()

In [152]:
offers_frame.dropna(subset='Name',inplace=True)
offers_frame.drop_duplicates(inplace=True)

In [153]:
LOCATION_TYPES = {"Remote" : "Remote", "On Site" : "OnSite"}
locations_present = offers_frame["Location"].notna()
types = []

for is_present in locations_present:
    if is_present:
        types.append(LOCATION_TYPES["On Site"])
    else:
        types.append(LOCATION_TYPES["Remote"])

offers_frame.loc[:, "LocationType"] = types
offers_frame

Unnamed: 0,Name,Description,Location,LocationType
0,Software Developer,Miniclip is a global leader in digital games w...,"Genova, Liguria",OnSite
1,Junior Software Developer,"NETtoWORK, azienda italiana nata nel 2016, ope...",17100 Savona,OnSite
2,Software Developer,We are looking for talented and passionate peo...,55100 Lucca,OnSite
3,Software Developer,ARESYS is a R&D oriented company with nearly ...,"Matera, Basilicata",OnSite
4,Senior Software Developer,Il/la Candidato/a dovrà padroneggiare: \n \n- ...,"Catania, Sicilia",OnSite
...,...,...,...,...
209,Senior Staff Product Engineer for Embedded Too...,Do you want to be part of a new project team w...,"Padova, Veneto",OnSite
210,Internship Engineer for Advanced Process Control,Internship Engineer for Advanced Process Contr...,"Genova, Liguria",OnSite
211,DevOps Engineer Senior,ARGO LOGICA società di consulenza informatica ...,"Roma, Lazio",OnSite
212,Software Quality Engineer,CentralReach is the #1 provider of SaaS softwa...,37121 Verona,OnSite


### Location Latitude and Longitude

In [154]:
from functools import lru_cache
from math import radians, cos, sin, asin, sqrt
import requests 
from requests import Response

#  API-KEY da rimuovere
PLACES_API = '''https://maps.googleapis.com/maps/api/place/textsearch/json?query={}&key=AIzaSyBg32OrPVN2Qi1q6hJq16EagNSiwW4O6ys&language=it'''  

# subset del dataset per ridurre le query a google places in fase di sviluppo
frame = offers_frame[offers_frame['LocationType'] == 'OnSite'].iloc[:50]

In [155]:
@lru_cache()
def get_coordinates(location: str) -> dict:
    """
    Make a request to Google Places API to get Longitude and Latitude for a location string.
    """
    response: Response = requests.get(PLACES_API.format(location))
    return response.json()['results'][0]['geometry']['location']

def haversine(lon1, lat1, lon2, lat2):
    """
    Prompt-Engineering basatissimo.
    Calcola la distanza fra due posti utilizzando le coordinate e tenendo presente
    cose che onestamente non mi sono molto chiare
    """
    # Convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # Haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles. Determines return value units.
    return c * r

def location_distance(off1_id, off2_id) -> float:
    loc1 = frame.loc[off1_id, 'Location']
    loc2 = frame.loc[off2_id, 'Location']
    
    lat1, lon1 = get_coordinates(loc1).values()
    lat2, lon2 = get_coordinates(loc2).values()
    
    return haversine(lon1, lat1, lon2, lat2)

In [156]:
print(f'Distance between {frame.loc[1, "Location"]} and {frame.loc[14, "Location"]}: {location_distance(1, 14):.2f}km')

Distance between 17100 Savona and Catanzaro, Calabria: 907.33km


In [157]:
# Compute Latitude and Longitude for each Offer (saving/cache should be done)
frame['Latitude'] = 0.0
frame['Longitude'] = 0.0
for index, row in frame.iterrows():
    try:
        lat, lon = get_coordinates(row['Location']).values()
        frame.loc[index, 'Latitude'] = lat
        frame.loc[index, 'Longitude'] = lon
    except Exception:
        pass
    
frame

Unnamed: 0,Name,Description,Location,LocationType,Latitude,Longitude
0,Software Developer,Miniclip is a global leader in digital games w...,"Genova, Liguria",OnSite,44.40565,8.946256
1,Junior Software Developer,"NETtoWORK, azienda italiana nata nel 2016, ope...",17100 Savona,OnSite,44.34255,8.429389
2,Software Developer,We are looking for talented and passionate peo...,55100 Lucca,OnSite,43.84292,10.502698
3,Software Developer,ARESYS is a R&D oriented company with nearly ...,"Matera, Basilicata",OnSite,40.666379,16.60432
4,Senior Software Developer,Il/la Candidato/a dovrà padroneggiare: \n \n- ...,"Catania, Sicilia",OnSite,37.507877,15.08303
5,SOFTWARE DEVELOPER,La passione ci guida in tutto ciò che facciamo...,60030 Monsano,OnSite,43.562682,13.248231
6,IBP Junior Algorithms Software Development,Pirelli is looking for the following profile t...,"Bari, Puglia",OnSite,41.117143,16.871872
9,Software Engineer,"Who we are:At Mambu, we believe that banking a...","Provincia di Latina, Lazio",OnSite,41.408748,13.08179
10,JUNIOR DEVELOPER,Sede lavoro: Bergamo | Tempo pieno \n Livello:...,"Bergamo, Lombardia",OnSite,45.698264,9.67727
12,Stage Software Developer,CHI SIAMO \n Golilla è la start up delle azi...,20089 Rozzano,OnSite,45.376031,9.142766


In [160]:
import folium
EUROPE = [51.5074, -0.1278]

offers_map = folium.Map(location=EUROPE, zoom_start=4)

for index, row in frame.iterrows():
    tooltip_text = f'<b>{row["Name"]}</b>'
    # popup_text = f'{row["RequiredSkills"]}'
    folium.Marker(
        [row['Latitude'], row['Longitude']], 
        tooltip=row['Location'],
        popup=tooltip_text
    ).add_to(offers_map)

offers_map # F5 Network Engineer ha latitudine e longitudine 0, 0 (...)

### Recommend by Location

In [161]:
def get_near_offers(location: str, max_distance: float = 500):
    """
    Gets a location string as input and returns pandas Indexes used to filter a Dataframe:
    1. calls get_coordinates to get latitude and longitude;
    2. computes distance from query location and an offer;
    3. sort distances and filter by max_distance.
    """
    user_lat, user_lon = get_coordinates(location).values()
    distances: pd.Series = frame.apply(
        lambda offer: haversine(
            user_lon, user_lat, 
            offer['Longitude'], offer['Latitude']
        )
        , axis=1
    ).rename('Distance').sort_values()
    return list(distances[distances<max_distance].index)

**Esempio: Offerte vicino Milano**

In [162]:
example_location = 'Milano'
near_ids = get_near_offers(example_location, 600)

near_offers = frame.loc[near_ids]
near_offers

Unnamed: 0,Name,Description,Location,LocationType,Latitude,Longitude
25,Flutter Developer,We want to significantly strengthen the role o...,"Milano, Lombardia",OnSite,45.464204,9.189982
21,Front-End - App Developer,Siamo alla ricerca di candidati brillanti per ...,"Milano, Lombardia",OnSite,45.464204,9.189982
18,Java Developer,Siamo alla ricerca di un/a Java Developer ch...,"Milano, Lombardia",OnSite,45.464204,9.189982
12,Stage Software Developer,CHI SIAMO \n Golilla è la start up delle azi...,20089 Rozzano,OnSite,45.376031,9.142766
33,Appartenente alle categorie protette Software ...,Annuncio dedicato alle persone appartenenti al...,20864 Agrate Brianza,OnSite,45.572976,9.353542
10,JUNIOR DEVELOPER,Sede lavoro: Bergamo | Tempo pieno \n Livello:...,"Bergamo, Lombardia",OnSite,45.698264,9.67727
26,Software Developer,Cerchiamo tre nuovi developers (middle e senio...,"Cremona, Lombardia",OnSite,45.133249,10.022651
0,Software Developer,Miniclip is a global leader in digital games w...,"Genova, Liguria",OnSite,44.40565,8.946256
20,Manufacturing Software Engineer / Relocation USA,The Manufacturing Software Engineer will work ...,"Torino, Piemonte",OnSite,45.070339,7.686864
23,Sviluppatore software,Cosa stiamo cercando \n Stiamo selezionando u...,37138 Verona,OnSite,45.448014,10.961274


In [163]:
query_map = folium.Map(location=EUROPE, zoom_start=4)

for index, row in near_offers.iterrows():
    tooltip_text = f'<b>{row["Name"]}</b>'
    folium.Marker(
        [row['Latitude'], row['Longitude']], 
        tooltip=row['Location'],
        popup=tooltip_text
    ).add_to(query_map)

query_map

**Esempio: Offerte vicino Londra**

In [168]:
example_location = 'Londra'
near_ids = get_near_offers(example_location, 400)

near_offers = frame.loc[near_ids]
near_offers

Unnamed: 0,Name,Description,Location,LocationType,Latitude,Longitude
47,"System Software Engineer, Summer Intern - 2024...",ResponsibilitiesTikTok is the leading destinat...,London,OnSite,51.507218,-0.127586
34,UK Software Engineering Apprenticeship,Who We AreWe’re a global technology communicat...,London,OnSite,51.507218,-0.127586
58,Technology Cyber & Security Analyst Foundation...,"Annual Salary: £22,250 + benefitsOur Foundatio...",London E14,OnSite,51.507218,-0.127586
55,Software Engineer (University Grad),Want to build new features and improve existin...,London,OnSite,51.507218,-0.127586
39,Graduate Software Engineer (London),Full-timeEmployee Status: RegularRole Type: Hy...,London SW15,OnSite,51.456989,-0.228805
56,Programming Assistant,You’ll assist in the delivery and development ...,Westerham TN16,OnSite,51.266969,0.071827
35,Campus - Graduate Programme - Technology Gradu...,You Lead the Way. We’ve Got Your Back. \n Wi...,Burgess Hill,OnSite,50.954469,-0.128701
48,Software Developer,ORA (Oxford University Research Archive) is lo...,Oxford OX2,OnSite,51.771097,-1.307656
64,Software Application Engineer Intern,Job DescriptionIntel's Extreme Computing Softw...,Swindon,OnSite,51.558378,-1.780976
65,"Data Analyst, University Placement (September ...",DescriptionSnap Analytics is a customer-focuss...,Bristol BS1,OnSite,51.452258,-2.592462


In [169]:
query_map = folium.Map(location=EUROPE, zoom_start=4)

for index, row in near_offers.iterrows():
    tooltip_text = f'<b>{row["Name"]}</b>'
    folium.Marker(
        [row['Latitude'], row['Longitude']], 
        tooltip=row['Location'],
        popup=tooltip_text
    ).add_to(query_map)

query_map