In [10]:
import csv
import re

from geojson import Point, Feature, FeatureCollection, dump, MultiPoint

## Process the records into GeoJson

In [11]:
records = []

with open('DATA MASTER-dh-copy-v4-2024-07-04.csv', encoding='utf-8-sig') as file:
    reader = csv.DictReader(file)
    for row in reader:
        records.append(row)
    file.close()    

In [12]:
list(records[0].keys())

['Theme (KEF Themes)',
 'Title',
 'Project Location - In London',
 'Date',
 'Identification- DOI/URL/ISBN/Reference no.',
 'Author + Location (Lat,Long)',
 'Department',
 'Abstract',
 'Partners',
 'Partner Type',
 'Partnership Location - In London',
 'Partnership type',
 'Policy Sources////Citations ( from Overton)',
 'Policy source subcategory',
 'Filter (Impact Themes from Impact casa)',
 'Filter 2 (Themes)',
 'Funder',
 'Source']

In [13]:
features = []

for row in records:
    points = []
    for string in row['Project Location - In London'].split('\n'):
        res = re.search(r'\((-\d+|\d+)\d+.\d+,(-\d+|\d+).\d+\)', string)
        if (res):
            clean = re.sub(r'\((-\d+|\d+)\d+.\d+,(-\d+|\d+).\d+\)', '', string)
            coord_string = re.sub(r'\(|\)', '', res[0])
            coords = tuple([float(i) for i in coord_string.split(',')])
            p = Point((coords[1], coords[0]))
            points.append(p)
    geom = MultiPoint(points)
    f = Feature(geometry=geom, properties={
            'name': clean.strip(), 
            'title': row['Title'].split(':')[0],
            'kef_theme': row['Theme (KEF Themes)'],
            'subject_area': row['Filter 2 (Themes)'],
            'output_type': 'research',
            'department': row['Department'],
            'abstract': row['Abstract'],
            'partners': row['Partners'],
        },
    )
    features.append(f)

In [14]:
collection = FeatureCollection(features)

In [15]:
with open('./data-master-research-locations-v5-2024-07-04.geojson', 'w') as file:
    dump(collection, file)
    file.close()

## Create a graph...

In [1]:
import networkx as nx

In [7]:
g = nx.Graph()

for r in records:
    g.add_edge(r['Department'], r['Partnership type'])

In [9]:
nx.write_gexf(g, 'departments-and-partnerships-network-v1-2024-07-03.gexf')