In [2]:
#https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/analysisofpopulationestimatestool

#age 
#import everything
import pandas as pd
import numpy as np
import geopandas as gpd
import altair as alt
from IPython.display import Markdown, display
from itertools import combinations as combo
from matplotlib import pyplot as plt
import folium as fm
from shapely.geometry import Polygon
from math import radians, asin, sqrt, sin, cos, log, log10

#Linear regression
import statsmodels.formula.api as smf
from statsmodels.stats.outliers_influence import variance_inflation_factor

#Geographically weighted statistics
from pysal.model.mgwr.gwr import GWR,GWRResults
from pysal.model.mgwr.diagnostics import corr

#If you're using Jupyter Notebook, you need to run the command below
alt.renderers.enable('notebook')

You can install them with  `pip install urbanaccess pandana` or `conda install -c udst pandana urbanaccess`
  "You need pandana and urbanaccess to work with segregation's network module\n"
  from .sqlite import head_to_sql, start_sql


RendererRegistry.enable('notebook')

In [3]:
age = pd.read_csv('data/age_uk.csv')

In [7]:
ladcode = age[age['level'] == 'ladcode']
ladcode.head(4)

Unnamed: 0,code,level,age,sex,population_2001,population_2002,population_2003,population_2004,population_2005,population_2006,...,population_2009,population_2010,population_2011,population_2012,population_2013,population_2014,population_2015,population_2016,population_2017,population_2018
182,E06000001,ladcode,0,1,525,502,516,553,531,583,...,572,584,601,595,576,551,497,576,507,535
183,E06000001,ladcode,1,1,544,526,498,526,550,539,...,597,577,573,615,601,567,553,512,570,515
184,E06000001,ladcode,2,1,548,548,551,507,521,539,...,584,591,580,586,621,600,576,562,534,576
185,E06000001,ladcode,3,1,556,546,548,553,508,514,...,560,593,600,578,584,625,598,579,565,530


In [10]:
ladcode2 = ladcode[['code','level','age','sex','population_2015','population_2016','population_2017','population_2018']]

In [11]:
ladcode2.head(4)

Unnamed: 0,code,level,age,sex,population_2015,population_2016,population_2017,population_2018
182,E06000001,ladcode,0,1,497,576,507,535
183,E06000001,ladcode,1,1,553,512,570,515
184,E06000001,ladcode,2,1,576,562,534,576
185,E06000001,ladcode,3,1,598,579,565,530


In [14]:
ladcode3 = ladcode2[ladcode2['age'] >= 16]
ladcode3.head(4)

#will also model what would happen if 16 year olds would be able to vote

Unnamed: 0,code,level,age,sex,population_2015,population_2016,population_2017,population_2018
198,E06000001,ladcode,16,1,546,560,605,521
199,E06000001,ladcode,17,1,547,556,564,604
200,E06000001,ladcode,18,1,622,546,548,554
201,E06000001,ladcode,19,1,575,577,497,461


In [17]:
max(ladcode2['age'])

90

In [21]:
bins = [15, 18, 25, 35, 45, 55, 65, 90]
ladcode3['binned'] = pd.cut(ladcode3['age'], bins)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [22]:
ladcode3.head(4)

Unnamed: 0,code,level,age,sex,population_2015,population_2016,population_2017,population_2018,binned
198,E06000001,ladcode,16,1,546,560,605,521,"(15, 18]"
199,E06000001,ladcode,17,1,547,556,564,604,"(15, 18]"
200,E06000001,ladcode,18,1,622,546,548,554,"(15, 18]"
201,E06000001,ladcode,19,1,575,577,497,461,"(18, 25]"


In [26]:
ladcode4 = ladcode3.groupby(by = ['code','binned']).sum()

In [28]:
ladcode4.head(12)

Unnamed: 0_level_0,Unnamed: 1_level_0,age,sex,population_2015,population_2016,population_2017,population_2018
code,binned,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
E06000001,"(15, 18]",102,9,3430,3391,3375,3217
E06000001,"(18, 25]",308,21,8014,8041,7764,7468
E06000001,"(25, 35]",610,30,11413,11541,11786,11998
E06000001,"(35, 45]",810,30,10749,10610,10355,10398
E06000001,"(45, 55]",1010,30,13730,13664,13570,13304
E06000001,"(55, 65]",1210,30,11353,11458,11751,12069
E06000001,"(65, 90]",3900,75,16229,16444,16632,16895
E06000002,"(15, 18]",102,9,5246,5114,5027,4857
E06000002,"(18, 25]",308,21,16390,16626,16437,16011
E06000002,"(25, 35]",610,30,18629,19100,19348,19738


In [34]:
#drop age and sex
ladcode4.drop(['age', 'sex'], axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,population_2015,population_2016,population_2017,population_2018
code,binned,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
E06000001,"(15, 18]",3430,3391,3375,3217
E06000001,"(18, 25]",8014,8041,7764,7468
E06000001,"(25, 35]",11413,11541,11786,11998
E06000001,"(35, 45]",10749,10610,10355,10398
E06000001,"(45, 55]",13730,13664,13570,13304
...,...,...,...,...,...
W06000024,"(25, 35]",8082,8298,8306,8316
W06000024,"(35, 45]",6875,6887,6887,6926
W06000024,"(45, 55]",8560,8502,8490,8394
W06000024,"(55, 65]",7183,7312,7401,7556


In [35]:
ladcode4.to_csv('ladcode4.csv')

In [None]:
#map ages across the UK


In [None]:
#percentage ages of constituencies


In [None]:
#overall percentages that vote one way or the other


In [None]:
#turnout