# Data Visualization with Python : Map

In [98]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import mplleaflet

## Load and Process Data

### Load City Attributes Data

In [99]:
df_cities = pd.read_csv("../../../data/city_attributes.csv")
print(df_cities.shape)
df_cities.head()

(36, 4)


Unnamed: 0,City,Country,Latitude,Longitude
0,Vancouver,Canada,49.24966,-123.119339
1,Portland,United States,45.523449,-122.676208
2,San Francisco,United States,37.774929,-122.419418
3,Seattle,United States,47.606209,-122.332069
4,Los Angeles,United States,34.052231,-118.243683


__Remove cities in Israel__

In [100]:
in_israel = df_cities.Country == 'Israel'
df_cities = df_cities[~in_israel]
df_cities.City.unique()

array(['Vancouver', 'Portland', 'San Francisco', 'Seattle', 'Los Angeles',
       'San Diego', 'Las Vegas', 'Phoenix', 'Albuquerque', 'Denver',
       'San Antonio', 'Dallas', 'Houston', 'Kansas City', 'Minneapolis',
       'Saint Louis', 'Chicago', 'Nashville', 'Indianapolis', 'Atlanta',
       'Detroit', 'Jacksonville', 'Charlotte', 'Miami', 'Pittsburgh',
       'Toronto', 'Philadelphia', 'New York', 'Montreal', 'Boston'],
      dtype=object)

## Load Temperature Data

In [101]:
df_temperature = pd.read_csv("../../../data/temperature.csv")
print(df_temperature.shape)
df_temperature.head()

(45253, 37)


Unnamed: 0,datetime,Vancouver,Portland,San Francisco,Seattle,Los Angeles,San Diego,Las Vegas,Phoenix,Albuquerque,...,Philadelphia,New York,Montreal,Boston,Beersheba,Tel Aviv District,Eilat,Haifa,Nahariyya,Jerusalem
0,2012-10-01 12:00:00,,,,,,,,,,...,,,,,,,309.1,,,
1,2012-10-01 13:00:00,284.63,282.08,289.48,281.8,291.87,291.53,293.41,296.6,285.12,...,285.63,288.22,285.83,287.17,307.59,305.47,310.58,304.4,304.4,303.5
2,2012-10-01 14:00:00,284.629041,282.083252,289.474993,281.797217,291.868186,291.533501,293.403141,296.608509,285.154558,...,285.663208,288.247676,285.83465,287.186092,307.59,304.31,310.495769,304.4,304.4,303.5
3,2012-10-01 15:00:00,284.626998,282.091866,289.460618,281.789833,291.862844,291.543355,293.392177,296.631487,285.233952,...,285.756824,288.32694,285.84779,287.231672,307.391513,304.281841,310.411538,304.4,304.4,303.5
4,2012-10-01 16:00:00,284.624955,282.100481,289.446243,281.782449,291.857503,291.553209,293.381213,296.654466,285.313345,...,285.85044,288.406203,285.860929,287.277251,307.1452,304.238015,310.327308,304.4,304.4,303.5


**Parse str datetime into new year, year_month and date columns**

In [102]:
df_temperature['year'] = df_temperature.datetime.str[0:4]

In [103]:
print(df_temperature.shape)
df_temperature.head()

(45253, 38)


Unnamed: 0,datetime,Vancouver,Portland,San Francisco,Seattle,Los Angeles,San Diego,Las Vegas,Phoenix,Albuquerque,...,New York,Montreal,Boston,Beersheba,Tel Aviv District,Eilat,Haifa,Nahariyya,Jerusalem,year
0,2012-10-01 12:00:00,,,,,,,,,,...,,,,,,309.1,,,,2012
1,2012-10-01 13:00:00,284.63,282.08,289.48,281.8,291.87,291.53,293.41,296.6,285.12,...,288.22,285.83,287.17,307.59,305.47,310.58,304.4,304.4,303.5,2012
2,2012-10-01 14:00:00,284.629041,282.083252,289.474993,281.797217,291.868186,291.533501,293.403141,296.608509,285.154558,...,288.247676,285.83465,287.186092,307.59,304.31,310.495769,304.4,304.4,303.5,2012
3,2012-10-01 15:00:00,284.626998,282.091866,289.460618,281.789833,291.862844,291.543355,293.392177,296.631487,285.233952,...,288.32694,285.84779,287.231672,307.391513,304.281841,310.411538,304.4,304.4,303.5,2012
4,2012-10-01 16:00:00,284.624955,282.100481,289.446243,281.782449,291.857503,291.553209,293.381213,296.654466,285.313345,...,288.406203,285.860929,287.277251,307.1452,304.238015,310.327308,304.4,304.4,303.5,2012


**Melt temperature dataframe into long frame, rename columns**

In [104]:
df_temperature_long = df_temperature.melt(id_vars=['datetime', 'year'])
df_temperature_long.rename(columns={"variable": "City", "value": "temperature"}, inplace=True)

**Convert temperature values from Kelvin to Fahrenheit**

In [105]:
df_temperature_long.temperature = pd.to_numeric(df_temperature_long.temperature)
df_temperature_long.temperature = 1.8 * (df_temperature_long.temperature - 273.15) + 32

In [106]:
print(df_temperature_long.shape)
df_temperature_long.head()

(1629108, 4)


Unnamed: 0,datetime,year,City,temperature
0,2012-10-01 12:00:00,2012,Vancouver,
1,2012-10-01 13:00:00,2012,Vancouver,52.664
2,2012-10-01 14:00:00,2012,Vancouver,52.662274
3,2012-10-01 15:00:00,2012,Vancouver,52.658596
4,2012-10-01 16:00:00,2012,Vancouver,52.654918


### Merge City and Temperature Data

In [107]:
df_city_temp = df_cities.merge(df_temperature_long, on='City')

In [112]:
print(df_city_temp.shape)
df_city_temp.head(15)

(1357590, 7)


Unnamed: 0,City,Country,Latitude,Longitude,datetime,year,temperature
0,Vancouver,Canada,49.24966,-123.119339,2012-10-01 12:00:00,2012,
1,Vancouver,Canada,49.24966,-123.119339,2012-10-01 13:00:00,2012,52.664
2,Vancouver,Canada,49.24966,-123.119339,2012-10-01 14:00:00,2012,52.662274
3,Vancouver,Canada,49.24966,-123.119339,2012-10-01 15:00:00,2012,52.658596
4,Vancouver,Canada,49.24966,-123.119339,2012-10-01 16:00:00,2012,52.654918
5,Vancouver,Canada,49.24966,-123.119339,2012-10-01 17:00:00,2012,52.65124
6,Vancouver,Canada,49.24966,-123.119339,2012-10-01 18:00:00,2012,52.647562
7,Vancouver,Canada,49.24966,-123.119339,2012-10-01 19:00:00,2012,52.643884
8,Vancouver,Canada,49.24966,-123.119339,2012-10-01 20:00:00,2012,52.640206
9,Vancouver,Canada,49.24966,-123.119339,2012-10-01 21:00:00,2012,52.636528


### Aggregate

**Aggregate merged dataframes by year, City, Latitude and Longitude**

In [113]:
df_city_temp_agg = df_city_temp.groupby(['year', 'City', 'Latitude', 'Longitude']).temperature.agg(['mean'])
print(df_city_temp_agg.shape)
df_city_temp_agg.head()

(180, 1)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,mean
year,City,Latitude,Longitude,Unnamed: 4_level_1
2012,Albuquerque,35.084492,-106.651138,45.623425
2012,Atlanta,33.749001,-84.387978,53.561912
2012,Boston,42.358429,-71.059769,44.192129
2012,Charlotte,35.227089,-80.843132,52.220145
2012,Chicago,41.850029,-87.650047,43.90315


In [114]:
df_city_temp_agg.rename(columns={"mean": "mean_temperature"}, inplace=True)
df_city_temp_agg.reset_index(inplace=True)

In [115]:
print(df_city_temp_agg.shape)
df_city_temp_agg.head()

(180, 5)


Unnamed: 0,year,City,Latitude,Longitude,mean_temperature
0,2012,Albuquerque,35.084492,-106.651138,45.623425
1,2012,Atlanta,33.749001,-84.387978,53.561912
2,2012,Boston,42.358429,-71.059769,44.192129
3,2012,Charlotte,35.227089,-80.843132,52.220145
4,2012,Chicago,41.850029,-87.650047,43.90315


**Map temperature to a size**

In [116]:
def make_interp_generator(temp_domain, size_range):
    return lambda v: np.interp(v, temp_domain, size_range)

In [117]:
temp_domain = [df_city_temp_agg.mean_temperature.min(), df_city_temp_agg.mean_temperature.max()]
size_range = [1, 500]
interpreter = make_interp_generator(temp_domain, size_range)
df_city_temp_agg['temp_size'] = df_city_temp_agg.mean_temperature.apply(interpreter)

In [118]:
df_city_temp_agg.head()

Unnamed: 0,year,City,Latitude,Longitude,mean_temperature,temp_size
0,2012,Albuquerque,35.084492,-106.651138,45.623425,117.495461
1,2012,Atlanta,33.749001,-84.387978,53.561912,209.130578
2,2012,Boston,42.358429,-71.059769,44.192129,100.973794
3,2012,Charlotte,35.227089,-80.843132,52.220145,193.642363
4,2012,Chicago,41.850029,-87.650047,43.90315,97.63807


## Visualize City Temperature Map

In [120]:
cities = df_city_temp_agg.City.unique()
cities

array(['Albuquerque', 'Atlanta', 'Boston', 'Charlotte', 'Chicago',
       'Dallas', 'Denver', 'Detroit', 'Houston', 'Indianapolis',
       'Jacksonville', 'Kansas City', 'Las Vegas', 'Los Angeles', 'Miami',
       'Minneapolis', 'Montreal', 'Nashville', 'New York', 'Philadelphia',
       'Phoenix', 'Pittsburgh', 'Portland', 'Saint Louis', 'San Antonio',
       'San Diego', 'San Francisco', 'Seattle', 'Toronto', 'Vancouver'],
      dtype=object)

__Map the each city temperature, as of 2012, as a dot on the map. The size of the dot is a function of the temperature__

In [121]:
b_is_year = df_city_temp_agg.year == '2012'
df = df_city_temp_agg[b_is_year]

plt.figure(figsize=(10,5))

color = 'blue'
for city in cities:
    b_is_city = df.City == city
    df_city = df[b_is_city]
    plt.scatter(df_city.Longitude, df_city.Latitude, c=color, s=df_city.temp_size, alpha=0.6)
mplleaflet.display()

__Map the each city temperature, as of 2017, as a dot on the map. The size of the dot is a function of the temperature__

In [122]:
b_is_year = df_city_temp_agg.year == '2017'
df = df_city_temp_agg[b_is_year]

plt.figure(figsize=(10,5))

color = 'blue'
for city in cities:
    b_is_city = df.City == city
    df_city = df[b_is_city]
    plt.scatter(df_city.Longitude, df_city.Latitude, c=color, s=df_city.temp_size, alpha=0.6)
mplleaflet.display()