In [3]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [1]:
def fetch(url):
    return requests.get(url).json()

In [233]:
def get_data(url):
    response = fetch(url)
    urls = []
    cities = []
    for res in response['_links']['ua:item']:
        urls.append(res['href'])
        cities.append(res['name'])
    
    city_categories = []
    city_scores = []
    city_summaries = []
    
    for url in urls:
        response = fetch(f"{url}scores/")
        city_categories.append(response['categories'])
        city_scores.append(round(response['teleport_city_score'], 2))
        soup = BeautifulSoup(response['summary'], 'html.parser')
        city_summaries.append(soup.text)
        
    return (cities, city_categories, city_scores, city_summaries)
        
    

In [234]:
cities, city_categories, city_scores, city_summaries = get_data("https://api.teleport.org/api/urban_areas")

In [277]:
def transform(cities, city_categories, city_scores, city_summaries):
    housing_scores = []
    venture_capital_scores = []
    education_scores = []
    economy_scores = []
    environmental_quality_scores = []
    startups_scores = []
    cost_of_living_scores = []
    healthcare_scores = []
    commute_scores = []
    travel_connectivity_scores = []
    business_freedom_scores = []
    taxation_scores = []
    internet_access_scores = []
    leisure_and_culture_scores = []
    tolerance_scores = []
    outdoors_scores = []
    safety_scores = []
    
    for category in city_categories:
        for c in category:
            if c['name'] == 'Housing':
                housing_scores.append(round(c['score_out_of_10'], 2))
            if c['name'] == 'Venture Capital':
                venture_capital_scores.append(round(c['score_out_of_10'], 2))
            if c['name'] == 'Economy':
                economy_scores.append(round(c['score_out_of_10'], 2))
            if c['name'] == 'Education':
                education_scores.append(round(c['score_out_of_10'], 2))
            if c['name'] == 'Safety':
                safety_scores.append(round(c['score_out_of_10'], 2))
            if c['name'] == 'Leisure and Culture':
                leisure_and_culture_scores.append(round(c['score_out_of_10'], 2))
            if c['name'] == 'Taxation':
                taxation_scores.append(round(c['score_out_of_10'], 2))
            if c['name'] == 'Environmental Quality':
                environmental_quality_scores.append(round(c['score_out_of_10'], 2))
            if c['name'] == 'Internet Access':
                internet_access_scores.append(round(c['score_out_of_10'], 2))
            if c['name'] == 'Outdoors':
                outdoors_scores.append(round(c['score_out_of_10'], 2))
            if c['name'] == 'Tolerance':
                tolerance_scores.append(round(c['score_out_of_10'], 2))
            if c['name'] == 'Commute':
                commute_scores.append(round(c['score_out_of_10'], 2))
            if c['name'] == 'Business Freedom':
                business_freedom_scores.append(round(c['score_out_of_10'], 2))
            if c['name'] == 'Startups':
                startups_scores.append(round(c['score_out_of_10'], 2))
            if c['name'] == 'Travel Connectivity':
                travel_connectivity_scores.append(round(c['score_out_of_10'], 2))
            if c['name'] == 'Healthcare':
                healthcare_scores.append(round(c['score_out_of_10'], 2))
            if c['name'] == 'Cost of Living':
                cost_of_living_scores.append(round(c['score_out_of_10'], 2))
    
    data = {"City": cities, "Housing_Score": housing_scores, "Economy_Score": economy_scores, 
            "Education_Score": education_scores, "Cost_of_Living_Score": cost_of_living_scores, 
            "Taxation_Score": taxation_scores, "Healthcare_Score": healthcare_scores, 
            "Environmental_Quality_Score": environmental_quality_scores, 
            "Business_Freedom_Score": business_freedom_scores, "Internet_Access_Score": internet_access_scores, 
            "Outdoors_Score": outdoors_scores, "Startups_Score": startups_scores,
           "Commute_Score": commute_scores, "Tolerance_Score": tolerance_scores, 
            "Travel_Connectivity_Score": travel_connectivity_scores,
           "Venture_Capital_Score": venture_capital_scores, "Safety_Score": safety_scores, 
            "City_Score": city_scores, "Summary": city_summaries}     
    df = pd.DataFrame(data)
    df["Summary"] = df['Summary'].replace({r'\s+$': '', r'^\s+': ''}, regex=True).replace({r'\n\s+': ' '}, regex=True)
    return df.drop('Summary', axis=1)

In [278]:
df = transform(cities, city_categories, city_scores, city_summaries)

In [279]:
df.head()

Unnamed: 0,City,Housing_Score,Economy_Score,Education_Score,Cost_of_Living_Score,Taxation_Score,Healthcare_Score,Environmental_Quality_Score,Business_Freedom_Score,Internet_Access_Score,Outdoors_Score,Startups_Score,Commute_Score,Tolerance_Score,Travel_Connectivity_Score,Venture_Capital_Score,Safety_Score,City_Score
0,Aarhus,6.13,4.89,5.37,4.02,5.07,9.13,7.63,9.94,8.37,4.13,2.83,6.31,9.74,3.54,2.51,9.62,65.09
1,Adelaide,6.31,6.07,5.14,4.69,4.59,9.15,8.33,9.4,4.34,5.53,3.14,5.34,7.82,1.78,2.64,7.93,61.2
2,Albuquerque,7.26,6.51,4.15,6.06,4.35,8.51,7.32,8.67,5.4,3.52,3.77,5.06,7.03,1.46,1.49,1.34,51.51
3,Almaty,9.28,5.27,2.28,9.33,8.52,3.69,3.86,5.57,2.89,5.5,2.46,5.87,6.54,4.59,0.0,7.31,52.76
4,Amsterdam,3.05,5.05,6.18,3.82,4.95,8.9,7.6,8.84,4.52,5.31,7.97,6.12,8.37,8.32,6.11,8.5,69.6


In [280]:
df.tail()

Unnamed: 0,City,Housing_Score,Economy_Score,Education_Score,Cost_of_Living_Score,Taxation_Score,Healthcare_Score,Environmental_Quality_Score,Business_Freedom_Score,Internet_Access_Score,Outdoors_Score,Startups_Score,Commute_Score,Tolerance_Score,Travel_Connectivity_Score,Venture_Capital_Score,Safety_Score,City_Score
261,Winnipeg,7.55,5.84,5.34,5.88,7.27,8.54,5.67,8.97,4.9,4.24,3.8,5.1,6.66,1.48,0.0,6.02,57.54
262,Wroclaw,8.45,4.76,4.16,8.43,5.1,6.18,4.96,5.7,5.76,4.08,3.59,4.95,3.9,2.24,2.39,8.17,53.17
263,Yerevan,9.69,3.46,0.0,9.43,8.06,4.09,4.44,6.5,3.9,7.78,3.93,5.65,7.35,1.79,0.0,8.87,55.57
264,Zagreb,8.91,2.24,3.21,7.19,7.99,6.3,7.33,5.72,5.03,4.81,4.45,4.83,7.95,2.03,1.77,8.66,57.26
265,Zurich,1.47,6.55,7.29,1.0,6.43,9.74,8.64,8.89,7.27,5.4,6.2,5.84,8.78,7.73,3.87,9.07,68.81


In [282]:
len(df.columns)

18

In [283]:
def save(df):
    df.to_csv("./Datasets/quality_of_life_v1.csv", index=False)
    print("Saved...")

In [284]:
save(df)

Saved...


In [6]:
response = requests.get("https://api.teleport.org/api/continents")

In [5]:
response.content

b'<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">\n<HTML><HEAD><META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">\n<TITLE>ERROR: The request could not be satisfied</TITLE>\n</HEAD><BODY>\n<H1>504 ERROR</H1>\n<H2>The request could not be satisfied.</H2>\n<HR noshade size="1px">\nCloudFront attempted to establish a connection with the origin, but either the attempt failed or the origin closed the connection.\nWe can\'t connect to the server for this app or website at this time. There might be too much traffic or a configuration error. Try again later, or contact the app or website owner.\n<BR clear="all">\nIf you provide content to customers through CloudFront, you can find steps to troubleshoot and help prevent this error by reviewing the CloudFront documentation.\n<BR clear="all">\n<HR noshade size="1px">\n<PRE>\nGenerated by cloudfront (CloudFront)\nRequest ID: eoYPPPIjqErEfnxCf4Zh4CGU9n0z4Io4jhATO9S3dj8wXP9q