In [93]:
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [94]:
offers_connection = sqlite3.connect('../../datasets/offers_dataset.db')
offers_frame = pd.read_sql_query('''SELECT * FROM offers''', offers_connection)
offers_connection.close()

In [95]:
offers_frame.dropna(subset='Name',inplace=True)
offers_frame.drop_duplicates(inplace=True)

In [96]:
LOCATION_TYPES = {"Remote" : "Remote", "On Site" : "OnSite"}
locations_present = offers_frame["Location"].notna()
types = []

for is_present in locations_present:
    if is_present:
        types.append(LOCATION_TYPES["On Site"])
    else:
        types.append(LOCATION_TYPES["Remote"])

offers_frame.loc[:, "LocationType"] = types
offers_frame

Unnamed: 0,Name,Description,Location,LocationType
0,Software Developer,Miniclip is a global leader in digital games w...,"Genova, Liguria",OnSite
1,Junior Software Developer,"NETtoWORK, azienda italiana nata nel 2016, ope...",17100 Savona,OnSite
2,Software Developer,We are looking for talented and passionate peo...,55100 Lucca,OnSite
3,Software Developer,ARESYS is a R&D oriented company with nearly ...,"Matera, Basilicata",OnSite
4,Senior Software Developer,Il/la Candidato/a dovrà padroneggiare: \n \n- ...,"Catania, Sicilia",OnSite
...,...,...,...,...
209,Senior Staff Product Engineer for Embedded Too...,Do you want to be part of a new project team w...,"Padova, Veneto",OnSite
210,Internship Engineer for Advanced Process Control,Internship Engineer for Advanced Process Contr...,"Genova, Liguria",OnSite
211,DevOps Engineer Senior,ARGO LOGICA società di consulenza informatica ...,"Roma, Lazio",OnSite
212,Software Quality Engineer,CentralReach is the #1 provider of SaaS softwa...,37121 Verona,OnSite


### Location Latitude and Longitude

In [97]:
from urllib.parse import quote
from math import radians, cos, sin, asin, sqrt

import requests 
from requests import Response

#  API-KEY da rimuovere
PLACES_API = '''https://maps.googleapis.com/maps/api/place/textsearch/json?query={}&key=AIzaSyBg32OrPVN2Qi1q6hJq16EagNSiwW4O6ys&language=it'''  

# subset del dataset per ridurre le query a google places in fase di sviluppo
frame = offers_frame[offers_frame['LocationType'] == 'OnSite'].iloc[:50]

In [98]:
def get_coordinates(location: str) -> dict:
    #location_encoded = quote(location)
    response: Response = requests.get(PLACES_API.format(location))
    return response.json()['results'][0]['geometry']['location']

def haversine(lon1, lat1, lon2, lat2):
    """
    Prompt-Engineering basatissimo.
    Calcola la distanza fra due posti utilizzando le coordinate e tenendo presente
    cose che onestamente non mi sono molto chiare
    """
    # Convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # Haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles. Determines return value units.
    return c * r

def location_distance(off1_id, off2_id) -> float:
    loc1 = frame.loc[off1_id, 'Location']
    loc2 = frame.loc[off2_id, 'Location']
    
    lat1, lon1 = get_coordinates(loc1).values()
    lat2, lon2 = get_coordinates(loc2).values()
    
    return haversine(lon1, lat1, lon2, lat2)

In [99]:
print(f'Distance between {frame.loc[1, "Location"]} and {frame.loc[14, "Location"]}: {location_distance(1, 14):.2f}km')

Distance between 17100 Savona and Catanzaro, Calabria: 907.33km


In [100]:
# Compute Latitude and Longitude for each Offer (saving/cache should be done)
frame['Latitude'] = 0.0
frame['Longitude'] = 0.0
for index, row in frame.iterrows():
    try:
        lat, lon = get_coordinates(row['Location']).values()
        frame.loc[index, 'Latitude'] = lat
        frame.loc[index, 'Longitude'] = lon
    except Exception:
        pass
    
frame

Unnamed: 0,Name,Description,Location,LocationType,Latitude,Longitude
0,Software Developer,Miniclip is a global leader in digital games w...,"Genova, Liguria",OnSite,44.405650,8.946256
1,Junior Software Developer,"NETtoWORK, azienda italiana nata nel 2016, ope...",17100 Savona,OnSite,44.342550,8.429389
2,Software Developer,We are looking for talented and passionate peo...,55100 Lucca,OnSite,43.842920,10.502698
3,Software Developer,ARESYS is a R&D oriented company with nearly ...,"Matera, Basilicata",OnSite,40.666379,16.604320
4,Senior Software Developer,Il/la Candidato/a dovrà padroneggiare: \n \n- ...,"Catania, Sicilia",OnSite,37.507877,15.083030
...,...,...,...,...,...,...
209,Senior Staff Product Engineer for Embedded Too...,Do you want to be part of a new project team w...,"Padova, Veneto",OnSite,45.406435,11.876761
210,Internship Engineer for Advanced Process Control,Internship Engineer for Advanced Process Contr...,"Genova, Liguria",OnSite,44.405650,8.946256
211,DevOps Engineer Senior,ARGO LOGICA società di consulenza informatica ...,"Roma, Lazio",OnSite,41.902701,12.496235
212,Software Quality Engineer,CentralReach is the #1 provider of SaaS softwa...,37121 Verona,OnSite,45.441049,10.996259


In [110]:
# Plot 
import folium
EUROPE = [51.5074, -0.1278]

offers_map = folium.Map(location=EUROPE, zoom_start=4)

for index, row in offers_frame.iterrows():
    folium.Marker([row['Latitude'], row['Longitude']], popup=row['Location']).add_to(offers_map)

offers_map