In [0]:
!pip install geopandas
!pip install descartes
!pip install mplleaflet
!pip install seaborn



In [0]:
import numpy as np
import pandas as pd
import geopandas as gpd
from functools import reduce 

import time

import matplotlib as mp
import matplotlib.pyplot as plt
import mplleaflet
import descartes
import seaborn as sns
pd.options.mode.chained_assignment = None

import warnings
warnings.filterwarnings('ignore')

sns.set(style="whitegrid")
sns.set(font_scale=2)

In [0]:
from google.colab import auth
auth.authenticate_user()
print('Authenticated')

Authenticated


In [0]:
from google.cloud import bigquery
client = bigquery.Client('thesisprojects2019')

In [0]:
#import london statistic table from bigquery
query = """ SELECT * FROM `thesisprojects2019.audeMS.step3_statistic_london_homelocation_spectatorswithhome` ORDER BY eventid """
data = client.query(query).to_dataframe()

In [0]:
data.head(2)

Unnamed: 0,venueid,eventid,event,ward_id,ward_name,count_spect_london,perc_spect_london
0,1,1_2017-11-04,Barbarians New Zealand,E05000614,Fairfield,10,1.339
1,1,1_2017-11-04,Barbarians New Zealand,E05000527,St. Margarets and North Twickenham,9,1.205


In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
#import the london shapefiles from GoogleDrive
pathdrive = 'drive/My Drive/Colab Notebooks/Shapefiles/London/London_Ward_CityMerged.shp'
geom_data = gpd.read_file(pathdrive)

In [0]:
geom_data.head(2)

Unnamed: 0,NAME,GSS_CODE,DISTRICT,LAGSSCODE,HECTARES,NONLD_AREA,geometry
0,Chessington South,E05000405,Kingston upon Thames,E09000021,755.173,0.0,"POLYGON ((516401.5959642464 160201.8020401799,..."
1,Tolworth and Hook Rise,E05000414,Kingston upon Thames,E09000021,259.464,0.0,"POLYGON ((519552.9984840818 164295.60020387, 5..."


In [0]:
len(geom_data)

633

In [0]:
# CRS for London Ward
crs = {'init': 'epsg:4326'}
lnd_ward = geom_data.to_crs({'init':'epsg:4326'})

# rename the columns
lnd_ward = lnd_ward.rename(columns={'NAME':'ward_name','GSS_CODE':'ward_id'})
# Select only the columns of interest
lnd_ward = lnd_ward[['ward_name','ward_id','geometry']]

In [0]:
lnd_ward.head(2)

Unnamed: 0,ward_name,ward_id,geometry
0,Chessington South,E05000405,POLYGON ((-0.3306790785982648 51.3290110240824...
1,Tolworth and Hook Rise,E05000414,POLYGON ((-0.2840948624066646 51.3651516247340...


In [0]:
lnd_ward.to_csv('drive/My Drive/Colab Notebooks/data/london_wards.csv')

In [0]:
# Join Districts SHP with data_homelocation_count
jointable = pd.merge(data,lnd_ward,how='right', on=['ward_id'])
jointable = jointable[['venueid','eventid','event','ward_id','ward_name_x','perc_spect_london','geometry']]
jointable = jointable.sort_values('ward_id').reset_index(drop=True)
jointable['perc_spect_ward'] = round(jointable['perc_spect_london'],3)

In [0]:
jointable.head(2)

Unnamed: 0,venueid,eventid,event,ward_id,ward_name_x,perc_spect_london,geometry,perc_spect_ward
0,3.0,3_2017-12-13,West Ham Arsenal,E05000026,Abbey,0.248,POLYGON ((0.08547999441580269 51.5370421145770...,0.248
1,3.0,3_2017-11-04,West Ham Liverpool,E05000026,Abbey,0.137,POLYGON ((0.08547999441580269 51.5370421145770...,0.137


In [0]:
# Get the list of all the events
event_list = data.eventid.unique()

In [0]:
#Download images into google drive
from google.colab import files

In [0]:
#def GetDataFrame(datasource):
  # Selection event
for e in event_list:
    # Mergedata 
    dataevent = data[data['eventid']==e]
    datajoin = pd.merge(dataevent,lnd_ward,how='right',on=['ward_id'])
    datajoin = datajoin[['venueid','eventid','event','ward_id','ward_name_x','perc_spect_london','geometry']]
    
    # select the event name
    eventname = datajoin['event'][0]
    
    # change to gdp
    gpd_df = gpd.GeoDataFrame(datajoin, geometry='geometry', crs={'init': 'epsg:27700'})
      
    # If NaN -> change it to 0!   
    values = {'venueid': 0, 'eventid':0,'perc_spect_london':0}  
    gpd_df = gpd_df.fillna(value=values)
      
    # Selection the variable to plot
    var_map = gpd_df['perc_spect_london']
      
    # Set the range for the choropleth
    vmin = gpd_df['perc_spect_london'].min()
    vmax = gpd_df['perc_spect_london'].max()
      
    # create figure and axes for Matplotlib
    fig, ax = plt.subplots(1, figsize=(8, 7.5), dpi=160)
 #   fig, ax = plt.subplots(1, figsize=(10, 9.5))
 
    # create map
    gpd_df.plot(column=var_map, cmap='YlGn', linewidth=0.6, ax=ax, edgecolor='0.8')

    plt.title(str(eventname), fontsize=12)
    plt.xlabel('Longitude',fontsize=12, horizontalalignment='center')
    plt.ylabel('Latitude',fontsize=12, horizontalalignment='center')
    plt.xticks(horizontalalignment='center', fontsize=9)
    plt.yticks(horizontalalignment='right', fontsize=9)
    
    ax.tick_params(axis='both', which='major', pad=6)
    
    sm = plt.cm.ScalarMappable(cmap='YlGn', norm=plt.Normalize(vmin=vmin, vmax=vmax))
    sm._A = []

    cbar = fig.colorbar(sm, orientation='horizontal',fraction=0.04, pad=0.14)
    cbar.set_label('% Event Spectators (of total spectators with home)', size=11)
    cbar.ax.xaxis.set_label_position('top')
    cbar.ax.tick_params(labelsize=10)
     
    fig.tight_layout()
    
    plt.savefig('drive/My Drive/Dan - Meetings/Thesis Maps & Graphs/London_Home/Home_Spectators/lndward_map_'+str(eventname)+'.png')
    
    plt.close('all')
   # plt.show()

In [0]:
import os

folderpath = '/content/drive/My Drive/Dan - Meetings/Thesis Maps & Graphs/London_Home/Home_Spectators'

for filename in os.listdir(folderpath):
 # print(filename)
  files.download(folderpath+'/'+str(filename))