Permalink
Fetching contributors…
Cannot retrieve contributors at this time
executable file 81 lines (71 sloc) 2.34 KB
#!/usr/bin/perl -w
# canonicalise-eha:
# Convert provided TSV file into one with standard names for MaPit
#
# Copyright (c) 2008 UK Citizens Online Democracy. All rights reserved.
# Email: matthew@mysociety.org. WWW: http://www.mysociety.org
#
# $Id: canonicalise-eha,v 1.1 2009-05-27 11:11:18 matthew Exp $
use strict;
require 5.8.0;
# Horrible boilerplate to set up appropriate library paths.
use FindBin;
use lib "$FindBin::Bin/../perllib";
use lib "$FindBin::Bin/../commonlib/perllib";
use mySociety::Config;
use mySociety::MaPit;
use mySociety::VotingArea;
BEGIN {
mySociety::Config::set_file("$FindBin::Bin/../conf/general");
mySociety::MaPit::configure();
}
my %councils;
open(FP, "$FindBin::Bin/../data/eha_listN");
while (<FP>) {
s/\r?\n//g;
my ($name, $email) = split /\s*\t+\s*/;
$email ||= '';
#$name = 'Blackburn' if $name eq 'Blackburn with Darwen';
$name =~ s/^St /St. /;
$name =~ s/-(upon|on|le)-/ $1 /;
$name =~ s/^LB //;
$name =~ s/ BC| DC| MDC| MBC| (Metropolitan|Borough|District|County|City) Council| Council//;
print "Already have $name\n" if $councils{$name};
$councils{$name} = $email;
}
close(FP);
my @types = grep { $_ !~ /CTY|LGD/ } @$mySociety::VotingArea::council_parent_types;
my (%out);
foreach my $type (@types) {
my $areas = mySociety::MaPit::get_areas_by_type($type);
my $areas_info = mySociety::MaPit::get_voting_areas_info($areas);
foreach my $id (keys %$areas_info) {
my $area_info = $areas_info->{$id};
next if $area_info->{country} eq 'S';
my $name = $area_info->{name};
if ($name eq 'Durham City Council') {
$out{$id} = $councils{'Durham City'};
delete $councils{'Durham City'};
next;
#} elsif ($name eq 'Durham County Council') {
# $out{$id} = $councils{'Durham County'};
# next;
}
$name =~ s/( (Borough|City|District|County))* Council//;
if ($councils{$name} && $councils{$name} =~ /@/) {
$out{$id} = $councils{$name};
delete $councils{$name};
} else {
print "Problem: $name $area_info->{country}\n";
}
}
}
foreach (sort keys %councils) {
print "Not matched: $_ $councils{$_}\n";
}
# Output emails to canonical CSV
open(FP, ">$FindBin::Bin/../data/eha.csv");
foreach (sort keys %out) {
print FP "$_," . $out{$_} . "\n";
}
close FP;