# Map Exploration 

In [65]:
# import folium
import pandas as pd
import json
import os
import random
import numpy as np
import plotly
import plotly.express as px
from geojson_rewind import rewind

#### Install specific plotly version when using Google Collab

In [44]:
# !pip install plotly==4.14.3

In [45]:
# from google.colab import drive
# drive.mount('/content/drive')

## Loading Geojson

In [46]:
"""
Geojson information sources
"""

### UK Geoportal
# https://geoportal.statistics.gov.uk   # UK gov stats body
# https://geoportal.statistics.gov.uk/datasets/01fd6b2d7600446d8af768005992f76a_0/explore?location=54.650000%2C-3.250000%2C5.53   # UK 12 region map

### Explanation of different boundary breakdowns
# https://geoportal.statistics.gov.uk/documents/9c04ff58854040d09a5a7ce146ab59b4/explore
# file:///C:/Users/Seb/Desktop/Perso/CS%20-%20General%20Info/Le%20Wagon/Final%20Project/hierarchy_poster_v2.5_JUN_2019_A3.pdf



'\nGeojson information sources\n'

In [47]:
### Source of topojson ###
# https://martinjc.github.io/UK-GeoJSON/json/eng/topo_eer.json
### converted from topojson to geojson ###
# https://mapshaper.org

In [50]:
## filepath from Drive
# geojson_path = '/content/drive/MyDrive/data/le_kingmakers/geojson_full_extent.geojson'
## filepath from hard drive
# geojson_path = '\Users\Seb\Desktop\Perso\CS - General Info\Le Wagon\Final Project\Data Files\geojson_full_extent.geojson'
# geojson_path = 'C:/Users/Seb/Desktop/Perso/CS - General Info/Le Wagon/Final Project/Data Files/geojson_full_extent.geojson'
# relative path from hard drive
geojson_path = '../raw_data/geojson_full_extent_super_gen.geojson'

In [51]:
with open(geojson_path) as json_file:
    uk_regions_json = json.load(json_file)

In [52]:
uk_regions_json.keys()

dict_keys(['type', 'name', 'crs', 'features'])

## locations dictionary

### Example breakdown of Geojson

In [74]:
uk_regions_json['features'][0]['properties']

{'objectid': 1,
 'nuts118cd': 'UKC',
 'nuts118nm': 'North East (England)',
 'bng_e': 417313,
 'bng_n': 600358,
 'long': -1.72889996,
 'lat': 55.2970314,
 'st_areashape': 8609938892.516281,
 'st_lengthshape': 657578.2253037834}

### Compiling Locations Dictionary

In [76]:
location_dict = {}

for feature in uk_regions_json['features']:
  location_dict[feature['properties']['nuts118nm']] = [feature['properties']['lat'],feature['properties']['long']]

location_dict

{'North East (England)': [55.2970314, -1.72889996],
 'North West (England)': [54.44945145, -2.7723701],
 'Yorkshire and The Humber': [53.93264008, -1.28711998],
 'East Midlands (England)': [52.79571915, -0.84966999],
 'West Midlands (England)': [52.55696869, -2.2035799],
 'East of England': [52.24066925, 0.50414598],
 'London': [51.49227142, -0.30864],
 'South East (England)': [51.4509697, -0.99311],
 'South West (England)': [50.81119156, -3.63343],
 'Wales': [52.06740952, -3.99415994],
 'Scotland': [56.1774292, -3.97091007],
 'Northern Ireland': [54.61494064, -6.85481024]}

### Inputting Search Radius (manually)

In [None]:
{'North East (England)': [55.127663, -1.473992, 70],
 'North West (England)': [53.786177, -2.94986, 80],
 'Yorkshire and The Humber': [54.054663, -0.916311, 56],
 'East Midlands (England)': [53.240963, -0.799282, 60],
 'West Midlands (England)': [52.557403, -2.081606 , 60],
 'East of England': [52.318458, 0.802540, 82],
 'London': [51.485280, -0.165762, 30],
 'South East (England)': [50.696906, -0.397168, 58],
 'South West (England)': [50.207629, -3.951667, 122],
 'Wales': [52.325072, -3.932561, 115],
 'Scotland': [56.553386, -3.471093, 157],
 'Northern Ireland': [54.658571, -6.415764, 71]}

### Obtain List of IDs (from Geojson)

In [53]:
### Region ID dictionary
region_id = {}

for feature in uk_regions_json['features']:
  region_id[feature['properties']['nuts118nm']] = feature['properties']['nuts118cd']

region_id

{'North East (England)': 'UKC',
 'North West (England)': 'UKD',
 'Yorkshire and The Humber': 'UKE',
 'East Midlands (England)': 'UKF',
 'West Midlands (England)': 'UKG',
 'East of England': 'UKH',
 'London': 'UKI',
 'South East (England)': 'UKJ',
 'South West (England)': 'UKK',
 'Wales': 'UKL',
 'Scotland': 'UKM',
 'Northern Ireland': 'UKN'}

## Exploring DF

### Using real df

In [54]:
### importing PROCESSED data
## path from Drive
# csv_path = '/content/drive/MyDrive/data/le_kingmakers/3_cities_data_7_days.csv'

## path from hard disk
csv_path = '3_cities_data_7_days.csv'

df = pd.read_csv(csv_path, encoding='latin')
df = df.copy()


Columns (1,2,6,7,16,17,18,22,24,26,28,29,30,31,33,34,35,36) have mixed types.Specify dtype option on import or set low_memory=False.



In [55]:
# explore df

display(df.shape)
display(df.head(2))

(31608, 38)

Unnamed: 0.1,Unnamed: 0,id,conversation_id,created_at,date,time,timezone,user_id,username,name,...,source,user_rt_id,user_rt,retweet_id,reply_to,retweet_date,translate,trans_src,trans_dest,handle
0,0,1402247138382422021,1401937589507067908,2021-06-08 13:52:24 BST,2021-06-08,13:52:24,100,247411887,minjas_zugik,MinjaÅ¡ Å½ugiÄ #FBPE #RejoinEU #ProEU,...,,,,,"[{'screen_name': 'mikegalsworthy', 'name': 'Dr...",,,,,JonAshworth
1,1,1402246907557302273,1402235685319368705,2021-06-08 13:51:29 BST,2021-06-08,13:51:29,100,351557555,bowen_james,ðªðº James Bowen ðªðº,...,,,,,"[{'screen_name': 'JonAshworth', 'name': 'Jonat...",,,,,JonAshworth


### Creating a sample df

In [56]:
### create sample_df

# use only 4 rows
sample_df = df.head(12)
# select columns
sample_df = sample_df[['id', 'tweet', ]]
# set values for location
rows_0_4 = sample_df.iloc[0:12]  # rows 12
sample_df['location'] = ['UKC', 'UKD', 'UKE', 'UKF', 'UKG', 'UKH', 'UKI', 'UKJ', 'UKK', 'UKL', 'UKM', 'UKN']  # set location as North East
# set values for sentiment score
dummy_sentiment = pd.DataFrame(np.random.uniform(low=0.00, high=1.00, size=(12,)), columns=['Score'])
# combine into single df
sample_df['score'] = dummy_sentiment
# reset index
sample_df.reset_index(drop=True, inplace=True)
sample_df

Unnamed: 0,id,tweet,location,score
0,1402247138382422021,@mikegalsworthy @TBlower2020 @JonAshworth Done,UKC,0.918553
1,1402246907557302273,"@JonAshworth ""Oh no. Somebody stole all the da...",UKD,0.468676
2,1402246786157268994,@JonAshworth Paused? Or hidden.,UKE,0.127473
3,1402242909794091016,#RT @labourpress: âTen years of Tory underfu...,UKF,0.449149
4,1402235045893582849,Happy Carers Week #CarersWeek2021 @MattHancoc...,UKG,0.283496
5,1402230694928125958,@JonAshworth If you do want to opt out - here'...,UKH,0.744277
6,1402166860074962944,@TimGalsworthy @NHSEngland @JonAshworth @Keir_...,UKI,0.320339
7,1402164350958424064,@0bj3ctivity @JonAshworth It doesn't matter if...,UKJ,0.321271
8,1402138600020582402,We are operating blindly unless EVERY home is ...,UKK,0.748511
9,1402016188473950209,@JonAshworth @EmmaHardyMP Yep- a decision made...,UKL,0.437418


In [57]:
### saving to csv
# sample_df.to_csv('/content/drive/MyDrive/data/le_kingmakers/sample_df_w1d1.csv', index=False)

## Plotting

In [58]:
# # Create the log count column
# sample_df['count_color'] = sample_df['score'].apply(np.log10)

# # Get the maximum value to cap displayed values
# max_log = sample_df['count_color'].max()
# max_val = int(max_log) + 1

# # Prepare the range of the colorbar
# values = [i for i in range(max_val)]
# ticks = [10**i for i in values]

In [70]:
"""
Attempting plot with px.choropleth - from medium (foca alvarez)
https://focaalvarez.medium.com/mapping-the-uk-and-navigating-the-post-code-maze-4898e758b82f
"""
counties_corrected = rewind(uk_regions_json,rfc7946=False)

fig = px.choropleth(sample_df, geojson=counties_corrected, locations='location', featureidkey="properties.nuts118cd", color='score',
                            color_continuous_scale="PurPor", labels={'label name':'label name'}, title='MAP TITLE',
                            scope="europe")

fig.update_geos(fitbounds="locations", visible=False)



In [61]:
"""
Attempting plot with px.choropleth - from youtube

"""

# Create figure
fig = px.choropleth(
    sample_df,
    locations='location',
    geojson=uk_regions_json,
    color='score',
    scope='europe'
)

# Add other infromation

# fig.update_geos(fitbounds='locations', visible=False)

# Display figure
fig.show()