# Data Viz with Python : Map

In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import mplleaflet

## Load and Process Data

### Load City Attributes Data

In [30]:
df_cities = pd.read_csv("../../../data/city_attributes.csv")
print(df_cities.shape)
df_cities.head()

(36, 4)


Unnamed: 0,City,Country,Latitude,Longitude
0,Vancouver,Canada,49.24966,-123.119339
1,Portland,United States,45.523449,-122.676208
2,San Francisco,United States,37.774929,-122.419418
3,Seattle,United States,47.606209,-122.332069
4,Los Angeles,United States,34.052231,-118.243683


## Load Temperature Data

In [31]:
df_temperature = pd.read_csv("../../../data/temperature.csv")
print(df_temperature.shape)
df_temperature.head()

(45253, 37)


Unnamed: 0,datetime,Vancouver,Portland,San Francisco,Seattle,Los Angeles,San Diego,Las Vegas,Phoenix,Albuquerque,...,Philadelphia,New York,Montreal,Boston,Beersheba,Tel Aviv District,Eilat,Haifa,Nahariyya,Jerusalem
0,2012-10-01 12:00:00,,,,,,,,,,...,,,,,,,309.1,,,
1,2012-10-01 13:00:00,284.63,282.08,289.48,281.8,291.87,291.53,293.41,296.6,285.12,...,285.63,288.22,285.83,287.17,307.59,305.47,310.58,304.4,304.4,303.5
2,2012-10-01 14:00:00,284.629041,282.083252,289.474993,281.797217,291.868186,291.533501,293.403141,296.608509,285.154558,...,285.663208,288.247676,285.83465,287.186092,307.59,304.31,310.495769,304.4,304.4,303.5
3,2012-10-01 15:00:00,284.626998,282.091866,289.460618,281.789833,291.862844,291.543355,293.392177,296.631487,285.233952,...,285.756824,288.32694,285.84779,287.231672,307.391513,304.281841,310.411538,304.4,304.4,303.5
4,2012-10-01 16:00:00,284.624955,282.100481,289.446243,281.782449,291.857503,291.553209,293.381213,296.654466,285.313345,...,285.85044,288.406203,285.860929,287.277251,307.1452,304.238015,310.327308,304.4,304.4,303.5


**Parse str datetime into new year, year_month and date columns**

In [32]:
df_temperature['year'] = df_temperature.datetime.str[0:4]
df_temperature['year_month'] = df_temperature.datetime.str[0:7]
df_temperature['date'] = df_temperature.datetime.str[0:10]

In [33]:
print(df_temperature.shape)
df_temperature.head()

(45253, 40)


Unnamed: 0,datetime,Vancouver,Portland,San Francisco,Seattle,Los Angeles,San Diego,Las Vegas,Phoenix,Albuquerque,...,Boston,Beersheba,Tel Aviv District,Eilat,Haifa,Nahariyya,Jerusalem,year,year_month,date
0,2012-10-01 12:00:00,,,,,,,,,,...,,,,309.1,,,,2012,2012-10,2012-10-01
1,2012-10-01 13:00:00,284.63,282.08,289.48,281.8,291.87,291.53,293.41,296.6,285.12,...,287.17,307.59,305.47,310.58,304.4,304.4,303.5,2012,2012-10,2012-10-01
2,2012-10-01 14:00:00,284.629041,282.083252,289.474993,281.797217,291.868186,291.533501,293.403141,296.608509,285.154558,...,287.186092,307.59,304.31,310.495769,304.4,304.4,303.5,2012,2012-10,2012-10-01
3,2012-10-01 15:00:00,284.626998,282.091866,289.460618,281.789833,291.862844,291.543355,293.392177,296.631487,285.233952,...,287.231672,307.391513,304.281841,310.411538,304.4,304.4,303.5,2012,2012-10,2012-10-01
4,2012-10-01 16:00:00,284.624955,282.100481,289.446243,281.782449,291.857503,291.553209,293.381213,296.654466,285.313345,...,287.277251,307.1452,304.238015,310.327308,304.4,304.4,303.5,2012,2012-10,2012-10-01


**Melt temperature dataframe into long frame, rename columns and convert temperature values**

In [34]:
df_temperature_long = df_temperature.melt(id_vars=['datetime', 'year', 'year_month', 'date'])
df_temperature_long.rename(columns={"variable": "City", "value": "temperature"}, inplace=True)
df_temperature_long.temperature = pd.to_numeric(df_temperature_long.temperature)

In [35]:
print(df_temperature_long.shape)
df_temperature_long.head()

(1629108, 6)


Unnamed: 0,datetime,year,year_month,date,City,temperature
0,2012-10-01 12:00:00,2012,2012-10,2012-10-01,Vancouver,
1,2012-10-01 13:00:00,2012,2012-10,2012-10-01,Vancouver,284.63
2,2012-10-01 14:00:00,2012,2012-10,2012-10-01,Vancouver,284.629041
3,2012-10-01 15:00:00,2012,2012-10,2012-10-01,Vancouver,284.626998
4,2012-10-01 16:00:00,2012,2012-10,2012-10-01,Vancouver,284.624955


### Merge City and Temperature Data

In [36]:
df_city_temp = df_cities.merge(df_temperature_long, on='City')

In [37]:
print(df_city_temp.shape)
df_city_temp.head()

(1629108, 9)


Unnamed: 0,City,Country,Latitude,Longitude,datetime,year,year_month,date,temperature
0,Vancouver,Canada,49.24966,-123.119339,2012-10-01 12:00:00,2012,2012-10,2012-10-01,
1,Vancouver,Canada,49.24966,-123.119339,2012-10-01 13:00:00,2012,2012-10,2012-10-01,284.63
2,Vancouver,Canada,49.24966,-123.119339,2012-10-01 14:00:00,2012,2012-10,2012-10-01,284.629041
3,Vancouver,Canada,49.24966,-123.119339,2012-10-01 15:00:00,2012,2012-10,2012-10-01,284.626998
4,Vancouver,Canada,49.24966,-123.119339,2012-10-01 16:00:00,2012,2012-10,2012-10-01,284.624955


### Aggregate

**Aggregate merged dataframes by year, City, Latitude and Longitude**

In [38]:
df_city_temp_agg = df_city_temp.groupby(['year', 'City', 'Latitude', 'Longitude']).temperature.agg(
    ['min', 'max', 'mean', 'median'])
print(df_city_temp_agg.shape)
df_city_temp_agg.head()

(216, 4)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,min,max,mean,median
year,City,Latitude,Longitude,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2012,Albuquerque,35.084492,-106.651138,260.63,301.41,280.71857,280.43
2012,Atlanta,33.749001,-84.387978,270.8,300.32,285.12884,285.01
2012,Beersheba,31.25181,34.791302,280.7985,314.82,294.620317,293.87
2012,Boston,42.358429,-71.059769,265.96,297.94,279.923405,279.35
2012,Charlotte,35.227089,-80.843132,269.33,300.71,284.383414,283.85


In [39]:
df_city_temp_agg.rename(columns={"min": "min_temperature", "max": "max_temperature", "median": "median_temperature", "mean": "mean_temperature"}, inplace=True)
df_city_temp_agg.reset_index(inplace=True)

In [40]:
print(df_city_temp_agg.shape)
df_city_temp_agg.head()

(216, 8)


Unnamed: 0,year,City,Latitude,Longitude,min_temperature,max_temperature,mean_temperature,median_temperature
0,2012,Albuquerque,35.084492,-106.651138,260.63,301.41,280.71857,280.43
1,2012,Atlanta,33.749001,-84.387978,270.8,300.32,285.12884,285.01
2,2012,Beersheba,31.25181,34.791302,280.7985,314.82,294.620317,293.87
3,2012,Boston,42.358429,-71.059769,265.96,297.94,279.923405,279.35
4,2012,Charlotte,35.227089,-80.843132,269.33,300.71,284.383414,283.85


**Map temperature to a size**

In [41]:
def make_interp_generator(domain, range):
    return lambda v: np.interp(v,domain, range)

In [42]:
domain = [df_city_temp_agg.mean_temperature.min(), df_city_temp_agg.mean_temperature.max()]
range = [1, 500]
interpreter = make_interp_generator(domain, range)
df_city_temp_agg['temp_size'] = df_city_temp_agg.mean_temperature.apply(interpreter)

## Visualize City Temperature Map

In [43]:
cities = df_city_temp_agg.City.unique()
cities

array(['Albuquerque', 'Atlanta', 'Beersheba', 'Boston', 'Charlotte',
       'Chicago', 'Dallas', 'Denver', 'Detroit', 'Eilat', 'Haifa',
       'Houston', 'Indianapolis', 'Jacksonville', 'Jerusalem',
       'Kansas City', 'Las Vegas', 'Los Angeles', 'Miami', 'Minneapolis',
       'Montreal', 'Nahariyya', 'Nashville', 'New York', 'Philadelphia',
       'Phoenix', 'Pittsburgh', 'Portland', 'Saint Louis', 'San Antonio',
       'San Diego', 'San Francisco', 'Seattle', 'Tel Aviv District',
       'Toronto', 'Vancouver'], dtype=object)

In [46]:
def plot_map(year):

    


In [52]:
b_is_year = df_city_temp_agg.year == '2012'
df = df_city_temp_agg[b_is_year]

plt.figure(figsize=(10,10))

color = 'blue'
for city in cities:
    b_is_city = df.City == city
    df_city = df[b_is_city]
    plt.scatter(df_city.Longitude, df_city.Latitude, c=color, s=df_city.temp_size, alpha=0.6)
mplleaflet.display()

In [53]:
b_is_year = df_city_temp_agg.year == '2017'
df = df_city_temp_agg[b_is_year]

plt.figure(figsize=(10,10))

color = 'blue'
for city in cities:
    b_is_city = df.City == city
    df_city = df[b_is_city]
    plt.scatter(df_city.Longitude, df_city.Latitude, c=color, s=df_city.temp_size, alpha=0.6)
mplleaflet.display()