In [1]:
#Note that shapely isn't part of the default Anaconda distribution. See library website for download instructions.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry import LineString
from datetime import datetime

%matplotlib inline

In [2]:
#read the data from github repo
csv_path = 'https://raw.githubusercontent.com/gherka/tableau-intercept/master/births_and_deaths.csv'

In [3]:
df = pd.read_csv(csv_path)

In [4]:
#make sure the year column is treated as a date
df['reg_year'] = pd.to_datetime(df['reg_year'])

In [5]:
#create an empty DF to fill

cols = ['reg_year', 'council_area', 'poly_y', 'path_id', 'poly_id', 'poly_colour', 'births_y', 'deaths_y', 
       'last_intersection_x', 'last_intersection_y']
df_final = pd.DataFrame(columns=cols)

#Iterate over DFs made from individual council areas. If there are other levels, set up loops accordingly. Make sure
#the main loop is the innermost.

polygon_id = 1

for council, council_df in df.groupby(['council_area']):
    
    #clear the coordinate lists before starting a new council area
    
    poly_x = []
    poly_y = []
    path_order = []
    poly_id = []
    poly_colour = []
    births_final = []
    deaths_final = []
    
    x_temp = []
    births_y_temp = []
    deaths_y_temp = []
    poly_y_temp = []

#main loop begins. Process 2 rows at the same time.

    for i in range(len(council_df)-1):

        births_y1 = council_df['pct_births'].iloc[i]
        births_y2 = council_df['pct_births'].iloc[i+1]
        deaths_y1 = council_df['pct_deaths'].iloc[i]
        deaths_y2 = council_df['pct_deaths'].iloc[i+1]
        x1 = council_df['reg_year'].iloc[i].timestamp() #convert date to timestamp for calculations
        x2 = council_df['reg_year'].iloc[i+1].timestamp()

        line1 = LineString([(x1,births_y1), (x2,births_y2)])
        line2 = LineString([(x1,deaths_y1), (x2,deaths_y2)])

        xcom = line1.intersection(line2)

        #If i is at the end, create the last open-ended polygon

        if i == len(council_df)-2:

            x_temp = x_temp + list(reversed(x_temp))
            poly_x.extend(x_temp)
            path_order.extend(list(range(1, len(x_temp)+1)))
            poly_id.extend([polygon_id]*(len(x_temp)))

            births_final.extend(births_y_temp + list(reversed(births_y_temp)))        
            deaths_final.extend(deaths_y_temp + list(reversed(deaths_y_temp)))

            poly_y_temp = births_y_temp + list(reversed(deaths_y_temp))
            poly_y.extend(poly_y_temp)

            if births_y1 > deaths_y1:

                poly_colour.extend([1]*(len(poly_y_temp)))

            else:

                poly_colour.extend([0]*(len(poly_y_temp)))
                
        else:


            if xcom.is_empty:

                births_y_temp.extend([births_y1, births_y2])
                deaths_y_temp.extend([deaths_y1, deaths_y2])
                x_temp.extend([x1, x2])

            else:

                #create poly_x
                
                #if intesection appears between the first two line segments, include x1, y1 in temp lists
                
                if i == 0:
                    
                    x_temp.append(x1)
                    births_y_temp.append(births_y1)
                    deaths_y_temp.append(deaths_y1)
            

                x_temp.append(xcom.x)
                x_temp = x_temp[:-1]+list(reversed(x_temp))
                poly_x.extend(x_temp)

                #write polygon parameters

                path_order.extend(list(range(1, len(x_temp)+1)))
                poly_id.extend([polygon_id]*(len(x_temp)))
                polygon_id += 1

                #flush x_temp

                x_temp = []

                #create new and return original lines in a matching order with poly_x

                births_y_temp.append(xcom.y)       
                births_final.extend(births_y_temp[:-1] + list(reversed(births_y_temp)))

                deaths_y_temp.append(xcom.y)
                deaths_final.extend(deaths_y_temp[:-1] + list(reversed(deaths_y_temp)))

                #create polygon y values

                poly_y_temp = births_y_temp[:-1] + list(reversed(deaths_y_temp))
                poly_y.extend(poly_y_temp)

                #add colour flag

                if births_y1 > deaths_y1:

                    poly_colour.extend([1]*(len(poly_y_temp)))

                else:

                    poly_colour.extend([0]*(len(poly_y_temp)))

                #flush y temps

                births_y_temp = []
                deaths_y_temp = []
                poly_y_temp = []

                #start new polygon

                x_temp.extend([xcom.x, x2])
                births_y_temp.extend([xcom.y, births_y2])
                deaths_y_temp.extend([xcom.y, deaths_y2])
                
                last_intersection_x = datetime.fromtimestamp(xcom.x)
                last_intersection_y = xcom.y
    
    #return the dates back from timestamp() and write the temporary DF and add it to the final DF
    
    new_dates = []

    for d in poly_x:
        new_dates.append(datetime.fromtimestamp(d))

    temp_df = pd.DataFrame({'reg_year':new_dates, 'poly_y':poly_y, 'path_id':path_order,
                        'poly_id':poly_id, 'poly_colour': poly_colour, 'council_area':council,
                       'births_y':births_final, 'deaths_y': deaths_final,
                           'last_intersection_x':last_intersection_x,
                           'last_intersection_y':last_intersection_y})

    df_final = df_final.append(temp_df)
    
print('done')

done


In [6]:
#if you want, can do a left join to include any extra information in the polygon DF (to use in tooltips, for example)

df_final = df_final.merge(df[['council_area','reg_year','num_births','num_deaths']], how='left', on=['council_area', 'reg_year'])

In [7]:
df_final.to_csv(r'E:\Python\work\area chart\city_polygons.csv', index=False)